1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2000-2009, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: genuca.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created at the end of XX century
14 * created by: Vladimir Weinstein
15 *
16 * This program reads the Franctional UCA table and generates
17 * internal format for UCA table as well as inverse UCA table.
18 * It then writes binary files containing the data: ucadata.dat
19 * & invuca.dat
20 * Change history:
21 * 02/23/2001 grhoten Made it into a tool
22 * 02/23/2001 weiv Moved element & table handling code to i18n
23 * 05/09/2001 weiv Case bits are now in the CEs, not in front
24 */
25
26 #include "unicode/utypes.h"
27 #include "unicode/putil.h"
28 #include "unicode/udata.h"
29 #include "unicode/uclean.h"
30 #include "ucol_imp.h"
31 #include "genuca.h"
32 #include "uoptions.h"
33 #include "toolutil.h"
34 #include "unewdata.h"
35 #include "cstring.h"
36 #include "cmemory.h"
37
38 #include <stdio.h>
39
40 /*
41 * Global - verbosity
42 */
43 UBool VERBOSE = FALSE;
44
45 static UVersionInfo UCAVersion;
46
47 #if UCONFIG_NO_COLLATION
48
49 /* dummy UDataInfo cf. udata.h */
50 static UDataInfo dummyDataInfo = {
51 sizeof(UDataInfo),
52 0,
53
54 U_IS_BIG_ENDIAN,
55 U_CHARSET_FAMILY,
56 U_SIZEOF_UCHAR,
57 0,
58
59 { 0, 0, 0, 0 }, /* dummy dataFormat */
60 { 0, 0, 0, 0 }, /* dummy formatVersion */
61 { 0, 0, 0, 0 } /* dummy dataVersion */
62 };
63
64 #else
65
66 static const UDataInfo ucaDataInfo={
67 sizeof(UDataInfo),
68 0,
69
70 U_IS_BIG_ENDIAN,
71 U_CHARSET_FAMILY,
72 sizeof(UChar),
73 0,
74
75 {UCA_DATA_FORMAT_0, UCA_DATA_FORMAT_1, UCA_DATA_FORMAT_2, UCA_DATA_FORMAT_3}, /* dataFormat="UCol" */
76 /* 03/26/2002 bumped up version since format has changed */
77 /* 09/16/2002 bumped up version since we went from UColAttributeValue */
78 /* to int32_t in UColOptionSet */
79 /* 05/13/2003 This one also updated since we added UCA and UCD versions */
80 /* to header */
81 /* 09/11/2003 Adding information required by data swapper */
82 {UCA_FORMAT_VERSION_0, UCA_FORMAT_VERSION_1, UCA_FORMAT_VERSION_2, UCA_FORMAT_VERSION_3}, /* formatVersion */
83 {0, 0, 0, 0} /* dataVersion = Unicode Version*/
84 };
85
86 static const UDataInfo invUcaDataInfo={
87 sizeof(UDataInfo),
88 0,
89
90 U_IS_BIG_ENDIAN,
91 U_CHARSET_FAMILY,
92 sizeof(UChar),
93 0,
94
95 {INVUCA_DATA_FORMAT_0, INVUCA_DATA_FORMAT_1, INVUCA_DATA_FORMAT_2, INVUCA_DATA_FORMAT_3}, /* dataFormat="InvC" */
96 /* 03/26/2002 bumped up version since format has changed */
97 /* 04/29/2003 2.1 format - we have added UCA version to header */
98 {INVUCA_FORMAT_VERSION_0, INVUCA_FORMAT_VERSION_1, INVUCA_FORMAT_VERSION_2, INVUCA_FORMAT_VERSION_3}, /* formatVersion */
99 {0, 0, 0, 0} /* dataVersion = Unicode Version*/
100 };
101
102 UCAElements le;
103
readElement(char ** from,char * to,char separator,UErrorCode * status)104 int32_t readElement(char **from, char *to, char separator, UErrorCode *status) {
105 if(U_FAILURE(*status)) {
106 return 0;
107 }
108 char buffer[1024];
109 int32_t i = 0;
110 while(**from != separator) {
111 if(**from != ' ') {
112 *(buffer+i++) = **from;
113 }
114 (*from)++;
115 }
116 (*from)++;
117 *(buffer + i) = 0;
118 //*to = (char *)malloc(strlen(buffer)+1);
119 strcpy(to, buffer);
120 return i/2;
121 }
122
123
getSingleCEValue(char * primary,char * secondary,char * tertiary,UErrorCode * status)124 uint32_t getSingleCEValue(char *primary, char *secondary, char *tertiary, UErrorCode *status) {
125 if(U_FAILURE(*status)) {
126 return 0;
127 }
128 uint32_t value = 0;
129 char primsave = '\0';
130 char secsave = '\0';
131 char tersave = '\0';
132 char *primend = primary+4;
133 if(strlen(primary) > 4) {
134 primsave = *primend;
135 *primend = '\0';
136 }
137 char *secend = secondary+2;
138 if(strlen(secondary) > 2) {
139 secsave = *secend;
140 *secend = '\0';
141 }
142 char *terend = tertiary+2;
143 if(strlen(tertiary) > 2) {
144 tersave = *terend;
145 *terend = '\0';
146 }
147 uint32_t primvalue = (uint32_t)((*primary!='\0')?strtoul(primary, &primend, 16):0);
148 uint32_t secvalue = (uint32_t)((*secondary!='\0')?strtoul(secondary, &secend, 16):0);
149 uint32_t tervalue = (uint32_t)((*tertiary!='\0')?strtoul(tertiary, &terend, 16):0);
150 if(primvalue <= 0xFF) {
151 primvalue <<= 8;
152 }
153
154 value = ((primvalue<<UCOL_PRIMARYORDERSHIFT)&UCOL_PRIMARYORDERMASK)|
155 ((secvalue<<UCOL_SECONDARYORDERSHIFT)&UCOL_SECONDARYORDERMASK)|
156 (tervalue&UCOL_TERTIARYORDERMASK);
157
158 if(primsave!='\0') {
159 *primend = primsave;
160 }
161 if(secsave!='\0') {
162 *secend = secsave;
163 }
164 if(tersave!='\0') {
165 *terend = tersave;
166 }
167 return value;
168 }
169
170 static uint32_t inverseTable[0xFFFF][3];
171 static uint32_t inversePos = 0;
172 static UChar stringContinue[0xFFFF];
173 static uint32_t sContPos = 0;
174
addNewInverse(UCAElements * element,UErrorCode * status)175 static void addNewInverse(UCAElements *element, UErrorCode *status) {
176 if(U_FAILURE(*status)) {
177 return;
178 }
179 if(VERBOSE && isContinuation(element->CEs[1])) {
180 //fprintf(stdout, "+");
181 }
182 inversePos++;
183 inverseTable[inversePos][0] = element->CEs[0];
184 if(element->noOfCEs > 1 && isContinuation(element->CEs[1])) {
185 inverseTable[inversePos][1] = element->CEs[1];
186 } else {
187 inverseTable[inversePos][1] = 0;
188 }
189 if(element->cSize < 2) {
190 inverseTable[inversePos][2] = element->cPoints[0];
191 } else { /* add a new store of cruft */
192 inverseTable[inversePos][2] = ((element->cSize+1) << UCOL_INV_SHIFTVALUE) | sContPos;
193 memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
194 sContPos += element->cSize+1;
195 }
196 }
197
insertInverse(UCAElements * element,uint32_t position,UErrorCode * status)198 static void insertInverse(UCAElements *element, uint32_t position, UErrorCode *status) {
199 if(U_FAILURE(*status)) {
200 return;
201 }
202
203 if(VERBOSE && isContinuation(element->CEs[1])) {
204 //fprintf(stdout, "+");
205 }
206 if(position <= inversePos) {
207 /*move stuff around */
208 uint32_t amountToMove = (inversePos - position+1)*sizeof(inverseTable[0]);
209 uprv_memmove(inverseTable[position+1], inverseTable[position], amountToMove);
210 }
211 inverseTable[position][0] = element->CEs[0];
212 if(element->noOfCEs > 1 && isContinuation(element->CEs[1])) {
213 inverseTable[position][1] = element->CEs[1];
214 } else {
215 inverseTable[position][1] = 0;
216 }
217 if(element->cSize < 2) {
218 inverseTable[position][2] = element->cPoints[0];
219 } else { /* add a new store of cruft */
220 inverseTable[position][2] = ((element->cSize+1) << UCOL_INV_SHIFTVALUE) | sContPos;
221 memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
222 sContPos += element->cSize+1;
223 }
224 inversePos++;
225 }
226
addToExistingInverse(UCAElements * element,uint32_t position,UErrorCode * status)227 static void addToExistingInverse(UCAElements *element, uint32_t position, UErrorCode *status) {
228
229 if(U_FAILURE(*status)) {
230 return;
231 }
232
233 if((inverseTable[position][2] & UCOL_INV_SIZEMASK) == 0) { /* single element, have to make new extension place and put both guys there */
234 stringContinue[sContPos] = (UChar)inverseTable[position][2];
235 inverseTable[position][2] = ((element->cSize+3) << UCOL_INV_SHIFTVALUE) | sContPos;
236 sContPos++;
237 stringContinue[sContPos++] = 0xFFFF;
238 memcpy(stringContinue+sContPos, element->cPoints, element->cSize*sizeof(UChar));
239 sContPos += element->cSize;
240 stringContinue[sContPos++] = 0xFFFE;
241 } else { /* adding to the already existing continuing table */
242 uint32_t contIndex = inverseTable[position][2] & UCOL_INV_OFFSETMASK;
243 uint32_t contSize = (inverseTable[position][2] & UCOL_INV_SIZEMASK) >> UCOL_INV_SHIFTVALUE;
244
245 if(contIndex+contSize < sContPos) {
246 /*fprintf(stderr, ".", sContPos, contIndex+contSize);*/
247 memcpy(stringContinue+contIndex+contSize+element->cSize+1, stringContinue+contIndex+contSize, (element->cSize+1)*sizeof(UChar));
248 }
249
250 stringContinue[contIndex+contSize-1] = 0xFFFF;
251 memcpy(stringContinue+contIndex+contSize, element->cPoints, element->cSize*sizeof(UChar));
252 sContPos += element->cSize+1;
253 stringContinue[contIndex+contSize+element->cSize] = 0xFFFE;
254
255 inverseTable[position][2] = ((contSize+element->cSize+1) << UCOL_INV_SHIFTVALUE) | contIndex;
256 }
257 }
258
259 /*
260 * Takes two CEs (lead and continuation) and
261 * compares them as CEs should be compared:
262 * primary vs. primary, secondary vs. secondary
263 * tertiary vs. tertiary
264 */
compareCEs(uint32_t * source,uint32_t * target)265 static int32_t compareCEs(uint32_t *source, uint32_t *target) {
266 uint32_t s1 = source[0], s2, t1 = target[0], t2;
267 if(isContinuation(source[1])) {
268 s2 = source[1];
269 } else {
270 s2 = 0;
271 }
272 if(isContinuation(target[1])) {
273 t2 = target[1];
274 } else {
275 t2 = 0;
276 }
277
278 uint32_t s = 0, t = 0;
279 if(s1 == t1 && s2 == t2) {
280 return 0;
281 }
282 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
283 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
284 if(s < t) {
285 return -1;
286 } else if(s > t) {
287 return 1;
288 } else {
289 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
290 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
291 if(s < t) {
292 return -1;
293 } else if(s > t) {
294 return 1;
295 } else {
296 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
297 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
298 if(s < t) {
299 return -1;
300 } else {
301 return 1;
302 }
303 }
304 }
305 }
306
addToInverse(UCAElements * element,UErrorCode * status)307 static uint32_t addToInverse(UCAElements *element, UErrorCode *status) {
308 uint32_t position = inversePos;
309 uint32_t saveElement = element->CEs[0];
310 int32_t compResult = 0;
311 element->CEs[0] &= 0xFFFFFF3F;
312 if(element->noOfCEs == 1) {
313 element->CEs[1] = 0;
314 }
315 if(inversePos == 0) {
316 inverseTable[0][0] = inverseTable[0][1] = inverseTable[0][2] = 0;
317 addNewInverse(element, status);
318 } else if(compareCEs(inverseTable[inversePos], element->CEs) > 0) {
319 while((compResult = compareCEs(inverseTable[--position], element->CEs)) > 0);
320 if(VERBOSE) { fprintf(stdout, "p:%u ", (int)position); }
321 if(compResult == 0) {
322 addToExistingInverse(element, position, status);
323 } else {
324 insertInverse(element, position+1, status);
325 }
326 } else if(compareCEs(inverseTable[inversePos], element->CEs) == 0) {
327 addToExistingInverse(element, inversePos, status);
328 } else {
329 addNewInverse(element, status);
330 }
331 element->CEs[0] = saveElement;
332 if(VERBOSE) { fprintf(stdout, "+"); }
333 return inversePos;
334 }
335
assembleInverseTable(UErrorCode * status)336 static InverseUCATableHeader *assembleInverseTable(UErrorCode *status)
337 {
338 InverseUCATableHeader *result = NULL;
339 uint32_t headerByteSize = paddedsize(sizeof(InverseUCATableHeader));
340 uint32_t inverseTableByteSize = (inversePos+2)*sizeof(uint32_t)*3;
341 uint32_t contsByteSize = sContPos * sizeof(UChar);
342 uint32_t i = 0;
343
344 result = (InverseUCATableHeader *)uprv_malloc(headerByteSize + inverseTableByteSize + contsByteSize);
345 uprv_memset(result, 0, headerByteSize + inverseTableByteSize + contsByteSize);
346 if(result != NULL) {
347 result->byteSize = headerByteSize + inverseTableByteSize + contsByteSize;
348
349 inversePos++;
350 inverseTable[inversePos][0] = 0xFFFFFFFF;
351 inverseTable[inversePos][1] = 0xFFFFFFFF;
352 inverseTable[inversePos][2] = 0x0000FFFF;
353 inversePos++;
354
355 for(i = 2; i<inversePos; i++) {
356 if(compareCEs(inverseTable[i-1], inverseTable[i]) > 0) {
357 fprintf(stderr, "Error at %i: %08X & %08X\n", (int)i, (int)inverseTable[i-1][0], (int)inverseTable[i][0]);
358 } else if(inverseTable[i-1][0] == inverseTable[i][0] && !(inverseTable[i-1][1] < inverseTable[i][1])) {
359 fprintf(stderr, "Continuation error at %i: %08X %08X & %08X %08X\n", (int)i, (int)inverseTable[i-1][0], (int)inverseTable[i-1][1], (int)inverseTable[i][0], (int)inverseTable[i][1]);
360 }
361 }
362
363 result->tableSize = inversePos;
364 result->contsSize = sContPos;
365
366 result->table = headerByteSize;
367 result->conts = headerByteSize + inverseTableByteSize;
368
369 memcpy((uint8_t *)result + result->table, inverseTable, inverseTableByteSize);
370 memcpy((uint8_t *)result + result->conts, stringContinue, contsByteSize);
371
372 } else {
373 *status = U_MEMORY_ALLOCATION_ERROR;
374 return NULL;
375 }
376
377 return result;
378 }
379
380
writeOutInverseData(InverseUCATableHeader * data,const char * outputDir,const char * copyright,UErrorCode * status)381 static void writeOutInverseData(InverseUCATableHeader *data,
382 const char *outputDir,
383 const char *copyright,
384 UErrorCode *status)
385 {
386 UNewDataMemory *pData;
387
388 long dataLength;
389
390 UDataInfo invUcaInfo;
391 uprv_memcpy(&invUcaInfo, &invUcaDataInfo, sizeof(UDataInfo));
392 u_getUnicodeVersion(invUcaInfo.dataVersion);
393
394 pData=udata_create(outputDir, INVC_DATA_TYPE, INVC_DATA_NAME, &invUcaInfo,
395 copyright, status);
396
397 if(U_FAILURE(*status)) {
398 fprintf(stderr, "Error: unable to create %s"INVC_DATA_NAME", error %s\n", outputDir, u_errorName(*status));
399 return;
400 }
401
402 /* write the data to the file */
403 if (VERBOSE) {
404 fprintf(stdout, "Writing out inverse UCA table: %s%c%s.%s\n", outputDir, U_FILE_SEP_CHAR,
405 INVC_DATA_NAME,
406 INVC_DATA_TYPE);
407 }
408 udata_writeBlock(pData, data, data->byteSize);
409
410 /* finish up */
411 dataLength=udata_finish(pData, status);
412 if(U_FAILURE(*status)) {
413 fprintf(stderr, "Error: error %d writing the output file\n", *status);
414 return;
415 }
416 }
417
418
419
hex2num(char hex)420 static int32_t hex2num(char hex) {
421 if(hex>='0' && hex <='9') {
422 return hex-'0';
423 } else if(hex>='a' && hex<='f') {
424 return hex-'a'+10;
425 } else if(hex>='A' && hex<='F') {
426 return hex-'A'+10;
427 } else {
428 return 0;
429 }
430 }
431
readAnElement(FILE * data,tempUCATable * t,UCAConstants * consts,UErrorCode * status)432 UCAElements *readAnElement(FILE *data, tempUCATable *t, UCAConstants *consts, UErrorCode *status) {
433 char buffer[2048], primary[100], secondary[100], tertiary[100];
434 UBool detectedContraction;
435 int32_t i = 0;
436 unsigned int theValue;
437 char *pointer = NULL;
438 char *commentStart = NULL;
439 char *startCodePoint = NULL;
440 char *endCodePoint = NULL;
441 char *spacePointer = NULL;
442 char *dashPointer = NULL;
443 char *result = fgets(buffer, 2048, data);
444 int32_t buflen = (int32_t)uprv_strlen(buffer);
445 if(U_FAILURE(*status)) {
446 return 0;
447 }
448 *primary = *secondary = *tertiary = '\0';
449 if(result == NULL) {
450 if(feof(data)) {
451 return NULL;
452 } else {
453 fprintf(stderr, "empty line but no EOF!\n");
454 *status = U_INVALID_FORMAT_ERROR;
455 return NULL;
456 }
457 }
458 while(buflen>0 && (buffer[buflen-1] == '\r' || buffer[buflen-1] == '\n')) {
459 buffer[--buflen] = 0;
460 }
461
462 if(buffer[0] == 0 || buffer[0] == '#') {
463 return NULL; // just a comment, skip whole line
464 }
465
466 UCAElements *element = ≤ //(UCAElements *)malloc(sizeof(UCAElements));
467
468 enum ActionType {
469 READCE,
470 READHEX,
471 READUCAVERSION
472 };
473
474 // Directives.
475 if(buffer[0] == '[') {
476 uint32_t cnt = 0;
477 static const struct {
478 char name[128];
479 uint32_t *what;
480 ActionType what_to_do;
481 } vt[] = { {"[first tertiary ignorable", consts->UCA_FIRST_TERTIARY_IGNORABLE, READCE},
482 {"[last tertiary ignorable", consts->UCA_LAST_TERTIARY_IGNORABLE, READCE},
483 {"[first secondary ignorable", consts->UCA_FIRST_SECONDARY_IGNORABLE, READCE},
484 {"[last secondary ignorable", consts->UCA_LAST_SECONDARY_IGNORABLE, READCE},
485 {"[first primary ignorable", consts->UCA_FIRST_PRIMARY_IGNORABLE, READCE},
486 {"[last primary ignorable", consts->UCA_LAST_PRIMARY_IGNORABLE, READCE},
487 {"[first variable", consts->UCA_FIRST_VARIABLE, READCE},
488 {"[last variable", consts->UCA_LAST_VARIABLE, READCE},
489 {"[first regular", consts->UCA_FIRST_NON_VARIABLE, READCE},
490 {"[last regular", consts->UCA_LAST_NON_VARIABLE, READCE},
491 {"[first implicit", consts->UCA_FIRST_IMPLICIT, READCE},
492 {"[last implicit", consts->UCA_LAST_IMPLICIT, READCE},
493 {"[first trailing", consts->UCA_FIRST_TRAILING, READCE},
494 {"[last trailing", consts->UCA_LAST_TRAILING, READCE},
495
496 {"[fixed top", &consts->UCA_PRIMARY_TOP_MIN, READHEX},
497 {"[fixed first implicit byte", &consts->UCA_PRIMARY_IMPLICIT_MIN, READHEX},
498 {"[fixed last implicit byte", &consts->UCA_PRIMARY_IMPLICIT_MAX, READHEX},
499 {"[fixed first trail byte", &consts->UCA_PRIMARY_TRAILING_MIN, READHEX},
500 {"[fixed last trail byte", &consts->UCA_PRIMARY_TRAILING_MAX, READHEX},
501 {"[fixed first special byte", &consts->UCA_PRIMARY_SPECIAL_MIN, READHEX},
502 {"[fixed last special byte", &consts->UCA_PRIMARY_SPECIAL_MAX, READHEX},
503 {"[variable top = ", &t->options->variableTopValue, READHEX},
504 {"[UCA version = ", NULL, READUCAVERSION}
505 };
506 for (cnt = 0; cnt<sizeof(vt)/sizeof(vt[0]); cnt++) {
507 uint32_t vtLen = (uint32_t)uprv_strlen(vt[cnt].name);
508 if(uprv_strncmp(buffer, vt[cnt].name, vtLen) == 0) {
509 element->variableTop = TRUE;
510 if(vt[cnt].what_to_do == READHEX) {
511 if(sscanf(buffer+vtLen, "%4x", &theValue) != 1) /* read first code point */
512 {
513 fprintf(stderr, " scanf(hex) failed on !\n ");
514 }
515 *(vt[cnt].what) = (UChar)theValue;
516 //if(cnt == 1) { // first implicit
517 // we need to set the value for top next
518 //uint32_t nextTop = ucol_prv_calculateImplicitPrimary(0x4E00); // CJK base
519 //consts->UCA_NEXT_TOP_VALUE = theValue<<24 | 0x030303;
520 //}
521 } else if (vt[cnt].what_to_do == READCE) { /* vt[cnt].what_to_do == READCE */
522 // TODO: combine & clean up the two CE parsers
523 pointer = strchr(buffer+vtLen, '[');
524 if(pointer) {
525 pointer++;
526 element->sizePrim[0]=readElement(&pointer, primary, ',', status);
527 element->sizeSec[0]=readElement(&pointer, secondary, ',', status);
528 element->sizeTer[0]=readElement(&pointer, tertiary, ']', status);
529
530 vt[cnt].what[0] = getSingleCEValue(primary, secondary, tertiary, status);
531 if(element->sizePrim[0] > 2 || element->sizeSec[0] > 1 || element->sizeTer[0] > 1) {
532 uint32_t CEi = 1;
533 uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
534 if(2*CEi<element->sizePrim[i]) {
535 value |= ((hex2num(*(primary+4*CEi))&0xF)<<28);
536 value |= ((hex2num(*(primary+4*CEi+1))&0xF)<<24);
537 }
538
539 if(2*CEi+1<element->sizePrim[i]) {
540 value |= ((hex2num(*(primary+4*CEi+2))&0xF)<<20);
541 value |= ((hex2num(*(primary+4*CEi+3))&0xF)<<16);
542 }
543
544 if(CEi<element->sizeSec[i]) {
545 value |= ((hex2num(*(secondary+2*CEi))&0xF)<<12);
546 value |= ((hex2num(*(secondary+2*CEi+1))&0xF)<<8);
547 }
548
549 if(CEi<element->sizeTer[i]) {
550 value |= ((hex2num(*(tertiary+2*CEi))&0x3)<<4);
551 value |= (hex2num(*(tertiary+2*CEi+1))&0xF);
552 }
553
554 CEi++;
555
556 vt[cnt].what[1] = value;
557 //element->CEs[CEindex++] = value;
558 } else {
559 vt[cnt].what[1] = 0;
560 }
561 } else {
562 fprintf(stderr, "Failed to read a CE from line %s\n", buffer);
563 }
564 } else { //vt[cnt].what_to_do == READUCAVERSION
565 u_versionFromString(UCAVersion, buffer+vtLen);
566 if(VERBOSE) {
567 fprintf(stdout, "UCA version [%hu.%hu.%hu.%hu]\n", UCAVersion[0], UCAVersion[1], UCAVersion[2], UCAVersion[3]);
568 }
569 }
570 //element->cPoints[0] = (UChar)theValue;
571 //return element;
572 return NULL;
573 }
574 }
575 fprintf(stderr, "Warning: unrecognized option: %s\n", buffer);
576 //*status = U_INVALID_FORMAT_ERROR;
577 return NULL;
578 }
579 element->variableTop = FALSE;
580
581 startCodePoint = buffer;
582 endCodePoint = strchr(startCodePoint, ';');
583
584 if(endCodePoint == 0) {
585 fprintf(stderr, "error - line with no code point!\n");
586 *status = U_INVALID_FORMAT_ERROR; /* No code point - could be an error, but probably only an empty line */
587 return NULL;
588 } else {
589 *(endCodePoint) = 0;
590 }
591
592 memset(element, 0, sizeof(*element));
593
594 element->cPoints = element->uchars;
595
596 spacePointer = strchr(buffer, ' ');
597 if(sscanf(buffer, "%4x", &theValue) != 1) /* read first code point */
598 {
599 fprintf(stderr, " scanf(hex) failed!\n ");
600 }
601 element->cPoints[0] = (UChar)theValue;
602
603 if(spacePointer == 0) {
604 detectedContraction = FALSE;
605 element->cSize = 1;
606 } else {
607 dashPointer = strchr(buffer, '|');
608 if (dashPointer != NULL) {
609 // prefix characters
610 element->prefixChars[0] = (UChar)theValue;
611 element->prefixSize = 1;
612 element->prefix = element->prefixChars;
613 sscanf(dashPointer+1, "%4x", &theValue);
614 element->cPoints[0] = (UChar)theValue;
615 element->cSize = 1;
616 }
617 else {
618 // Contractions or surrogate characters.
619 i = 1;
620 detectedContraction = TRUE;
621 while(spacePointer != NULL) {
622 sscanf(spacePointer+1, "%4x", &theValue);
623 element->cPoints[i++] = (UChar)theValue;
624 spacePointer = strchr(spacePointer+1, ' ');
625 }
626 element->cSize = i;
627 }
628
629
630 //fprintf(stderr, "Number of codepoints in contraction: %i\n", i);
631 }
632
633 startCodePoint = endCodePoint+1;
634
635 commentStart = strchr(startCodePoint, '#');
636 if(commentStart == NULL) {
637 commentStart = strlen(startCodePoint) + startCodePoint;
638 }
639
640 i = 0;
641 uint32_t CEindex = 0;
642 element->noOfCEs = 0;
643 for(;;) {
644 endCodePoint = strchr(startCodePoint, ']');
645 if(endCodePoint == NULL || endCodePoint >= commentStart) {
646 break;
647 }
648 pointer = strchr(startCodePoint, '[');
649 pointer++;
650
651 element->sizePrim[i]=readElement(&pointer, primary, ',', status);
652 element->sizeSec[i]=readElement(&pointer, secondary, ',', status);
653 element->sizeTer[i]=readElement(&pointer, tertiary, ']', status);
654
655
656 /* I want to get the CEs entered right here, including continuation */
657 element->CEs[CEindex++] = getSingleCEValue(primary, secondary, tertiary, status);
658
659 uint32_t CEi = 1;
660 while(2*CEi<element->sizePrim[i] || CEi<element->sizeSec[i] || CEi<element->sizeTer[i]) {
661 uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
662 if(2*CEi<element->sizePrim[i]) {
663 value |= ((hex2num(*(primary+4*CEi))&0xF)<<28);
664 value |= ((hex2num(*(primary+4*CEi+1))&0xF)<<24);
665 }
666
667 if(2*CEi+1<element->sizePrim[i]) {
668 value |= ((hex2num(*(primary+4*CEi+2))&0xF)<<20);
669 value |= ((hex2num(*(primary+4*CEi+3))&0xF)<<16);
670 }
671
672 if(CEi<element->sizeSec[i]) {
673 value |= ((hex2num(*(secondary+2*CEi))&0xF)<<12);
674 value |= ((hex2num(*(secondary+2*CEi+1))&0xF)<<8);
675 }
676
677 if(CEi<element->sizeTer[i]) {
678 value |= ((hex2num(*(tertiary+2*CEi))&0x3)<<4);
679 value |= (hex2num(*(tertiary+2*CEi+1))&0xF);
680 }
681
682 CEi++;
683
684 element->CEs[CEindex++] = value;
685 }
686
687 startCodePoint = endCodePoint+1;
688 i++;
689 }
690 element->noOfCEs = CEindex;
691 #if 0
692 element->isThai = UCOL_ISTHAIPREVOWEL(element->cPoints[0]);
693 #endif
694 // we don't want any strange stuff after useful data!
695 if (pointer == NULL) {
696 /* huh? Did we get ']' without the '['? Pair your brackets! */
697 *status=U_INVALID_FORMAT_ERROR;
698 }
699 else {
700 while(pointer < commentStart) {
701 if(*pointer != ' ' && *pointer != '\t')
702 {
703 *status=U_INVALID_FORMAT_ERROR;
704 break;
705 }
706 pointer++;
707 }
708 }
709 // Check for valid bytes in CE weights.
710 // TODO: Tighten this so that it allows 03 & 04 in intermediate bytes
711 // but not in final bytes.
712 // See http://bugs.icu-project.org/trac/ticket/7167
713 for (i = 0; i < (int32_t)CEindex; ++i) {
714 uint32_t value = element->CEs[i];
715 uint8_t bytes[4] = {
716 (uint8_t)(value >> 24),
717 (uint8_t)(value >> 16),
718 (uint8_t)(value >> 8),
719 (uint8_t)(value & UCOL_NEW_TERTIARYORDERMASK)
720 };
721 for (int j = 0; j < 4; ++j) {
722 uint8_t maxByte =
723 (isContinuation(value) || j == 1) ?
724 UCOL_BYTE_FIRST_TAILORED :
725 UCOL_BYTE_COMMON;
726 if (0 != bytes[j] && bytes[j] < maxByte) {
727 fprintf(stderr, "Warning: invalid UCA weight byte %02X for %s\n", bytes[j], buffer);
728 // TODO: return NULL;
729 }
730 }
731 }
732
733 if(U_FAILURE(*status)) {
734 fprintf(stderr, "problem putting stuff in hash table %s\n", u_errorName(*status));
735 *status = U_INTERNAL_PROGRAM_ERROR;
736 return NULL;
737 }
738
739 return element;
740 }
741
742
writeOutData(UCATableHeader * data,UCAConstants * consts,UChar contractions[][3],uint32_t noOfcontractions,const char * outputDir,const char * copyright,UErrorCode * status)743 void writeOutData(UCATableHeader *data,
744 UCAConstants *consts,
745 UChar contractions[][3],
746 uint32_t noOfcontractions,
747 const char *outputDir,
748 const char *copyright,
749 UErrorCode *status)
750 {
751 if(U_FAILURE(*status)) {
752 return;
753 }
754
755 uint32_t size = data->size;
756
757 data->UCAConsts = data->size;
758 data->size += paddedsize(sizeof(UCAConstants));
759
760 if(noOfcontractions != 0) {
761 contractions[noOfcontractions][0] = 0;
762 contractions[noOfcontractions][1] = 0;
763 contractions[noOfcontractions][2] = 0;
764 noOfcontractions++;
765
766
767 data->contractionUCACombos = data->size;
768 data->contractionUCACombosWidth = 3;
769 data->contractionUCACombosSize = noOfcontractions;
770 data->size += paddedsize((noOfcontractions*3*sizeof(UChar)));
771 }
772
773 UNewDataMemory *pData;
774
775 long dataLength;
776 UDataInfo ucaInfo;
777 uprv_memcpy(&ucaInfo, &ucaDataInfo, sizeof(UDataInfo));
778 u_getUnicodeVersion(ucaInfo.dataVersion);
779
780 pData=udata_create(outputDir, UCA_DATA_TYPE, UCA_DATA_NAME, &ucaInfo,
781 copyright, status);
782
783 if(U_FAILURE(*status)) {
784 fprintf(stderr, "Error: unable to create %s"UCA_DATA_NAME", error %s\n", outputDir, u_errorName(*status));
785 return;
786 }
787
788 /* write the data to the file */
789 if (VERBOSE) {
790 fprintf(stdout, "Writing out UCA table: %s%c%s.%s\n", outputDir,
791 U_FILE_SEP_CHAR,
792 U_ICUDATA_NAME "_" UCA_DATA_NAME,
793 UCA_DATA_TYPE);
794 }
795 udata_writeBlock(pData, data, size);
796
797 // output the constants here
798 udata_writeBlock(pData, consts, sizeof(UCAConstants));
799
800 if(noOfcontractions != 0) {
801 udata_writeBlock(pData, contractions, noOfcontractions*3*sizeof(UChar));
802 udata_writePadding(pData, paddedsize((noOfcontractions*3*sizeof(UChar))) - noOfcontractions*3*sizeof(uint16_t));
803 }
804
805 /* finish up */
806 dataLength=udata_finish(pData, status);
807 if(U_FAILURE(*status)) {
808 fprintf(stderr, "Error: error %d writing the output file\n", *status);
809 return;
810 }
811 }
812
813 enum {
814 /*
815 * Maximum number of UCA contractions we can store.
816 * May need to be increased for a new Unicode version.
817 */
818 MAX_UCA_CONTRACTION_CES=2048
819 };
820
821 static int32_t
write_uca_table(const char * filename,const char * outputDir,const char * copyright,UErrorCode * status)822 write_uca_table(const char *filename,
823 const char *outputDir,
824 const char *copyright,
825 UErrorCode *status)
826 {
827 FILE *data = fopen(filename, "r");
828 if(data == NULL) {
829 fprintf(stderr, "Couldn't open file: %s\n", filename);
830 return -1;
831 }
832 uint32_t line = 0;
833 UCAElements *element = NULL;
834 UChar variableTopValue = 0;
835 UCATableHeader *myD = (UCATableHeader *)uprv_malloc(sizeof(UCATableHeader));
836 /* test for NULL */
837 if(myD == NULL) {
838 *status = U_MEMORY_ALLOCATION_ERROR;
839 fclose(data);
840 return 0;
841 }
842 uprv_memset(myD, 0, sizeof(UCATableHeader));
843 UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
844 /* test for NULL */
845 if(opts == NULL) {
846 *status = U_MEMORY_ALLOCATION_ERROR;
847 uprv_free(myD);
848 fclose(data);
849 return 0;
850 }
851 uprv_memset(opts, 0, sizeof(UColOptionSet));
852 UChar contractionCEs[MAX_UCA_CONTRACTION_CES][3];
853 uprv_memset(contractionCEs, 0, sizeof(contractionCEs));
854 uint32_t noOfContractions = 0;
855 UCAConstants consts;
856 uprv_memset(&consts, 0, sizeof(consts));
857 #if 0
858 UCAConstants consts = {
859 UCOL_RESET_TOP_VALUE,
860 UCOL_FIRST_PRIMARY_IGNORABLE,
861 UCOL_LAST_PRIMARY_IGNORABLE,
862 UCOL_LAST_PRIMARY_IGNORABLE_CONT,
863 UCOL_FIRST_SECONDARY_IGNORABLE,
864 UCOL_LAST_SECONDARY_IGNORABLE,
865 UCOL_FIRST_TERTIARY_IGNORABLE,
866 UCOL_LAST_TERTIARY_IGNORABLE,
867 UCOL_FIRST_VARIABLE,
868 UCOL_LAST_VARIABLE,
869 UCOL_FIRST_NON_VARIABLE,
870 UCOL_LAST_NON_VARIABLE,
871
872 UCOL_NEXT_TOP_VALUE,
873 /*
874 UCOL_NEXT_FIRST_PRIMARY_IGNORABLE,
875 UCOL_NEXT_LAST_PRIMARY_IGNORABLE,
876 UCOL_NEXT_FIRST_SECONDARY_IGNORABLE,
877 UCOL_NEXT_LAST_SECONDARY_IGNORABLE,
878 UCOL_NEXT_FIRST_TERTIARY_IGNORABLE,
879 UCOL_NEXT_LAST_TERTIARY_IGNORABLE,
880 UCOL_NEXT_FIRST_VARIABLE,
881 UCOL_NEXT_LAST_VARIABLE,
882 */
883
884 PRIMARY_IMPLICIT_MIN,
885 PRIMARY_IMPLICIT_MAX
886 };
887 #endif
888
889
890 uprv_memset(inverseTable, 0xDA, sizeof(int32_t)*3*0xFFFF);
891
892 opts->variableTopValue = variableTopValue;
893 opts->strength = UCOL_TERTIARY;
894 opts->frenchCollation = UCOL_OFF;
895 opts->alternateHandling = UCOL_NON_IGNORABLE; /* attribute for handling variable elements*/
896 opts->caseFirst = UCOL_OFF; /* who goes first, lower case or uppercase */
897 opts->caseLevel = UCOL_OFF; /* do we have an extra case level */
898 opts->normalizationMode = UCOL_OFF; /* attribute for normalization */
899 opts->hiraganaQ = UCOL_OFF; /* attribute for JIS X 4061, used only in Japanese */
900 opts->numericCollation = UCOL_OFF;
901 myD->jamoSpecial = FALSE;
902
903 tempUCATable *t = uprv_uca_initTempTable(myD, opts, NULL, IMPLICIT_TAG, LEAD_SURROGATE_TAG, status);
904 if(U_FAILURE(*status))
905 {
906 fprintf(stderr, "Failed to init UCA temp table: %s\n", u_errorName(*status));
907 uprv_free(opts);
908 uprv_free(myD);
909 fclose(data);
910 return -1;
911 }
912
913 #if 0
914 IMPLICIT_TAG = 9,
915 /*
916 *****************************************************************************************
917 * NON_CHARACTER FDD0 - FDEF, FFFE, FFFF, 1FFFE, 1FFFF, 2FFFE, 2FFFF,...e.g. **FFFE, **FFFF
918 ******************************************************************************************
919 */
920 #endif
921
922 // * set to zero
923 struct {
924 UChar32 start;
925 UChar32 end;
926 int32_t value;
927 } ranges[] =
928 {
929 #if 0
930 {0xAC00, 0xD7AF, UCOL_SPECIAL_FLAG | (HANGUL_SYLLABLE_TAG << 24) }, //0 HANGUL_SYLLABLE_TAG,/* AC00-D7AF*/
931 {0xD800, 0xDBFF, UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG << 24) }, //1 LEAD_SURROGATE_TAG, /* D800-DBFF*/
932 {0xDC00, 0xDFFF, UCOL_SPECIAL_FLAG | (TRAIL_SURROGATE_TAG << 24) }, //2 TRAIL_SURROGATE DC00-DFFF
933 {0x3400, 0x4DB5, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //3 CJK_IMPLICIT_TAG, /* 0x3400-0x4DB5*/
934 {0x4E00, 0x9FA5, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //4 CJK_IMPLICIT_TAG, /* 0x4E00-0x9FA5*/
935 {0xF900, 0xFA2D, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //5 CJK_IMPLICIT_TAG, /* 0xF900-0xFA2D*/
936 {0x20000, 0x2A6D6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //6 CJK_IMPLICIT_TAG, /* 0x20000-0x2A6D6*/
937 {0x2F800, 0x2FA1D, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //7 CJK_IMPLICIT_TAG, /* 0x2F800-0x2FA1D*/
938 #endif
939 {0xAC00, 0xD7B0, UCOL_SPECIAL_FLAG | (HANGUL_SYLLABLE_TAG << 24) }, //0 HANGUL_SYLLABLE_TAG,/* AC00-D7AF*/
940 //{0xD800, 0xDC00, UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG << 24) }, //1 LEAD_SURROGATE_TAG, /* D800-DBFF*/
941 {0xDC00, 0xE000, UCOL_SPECIAL_FLAG | (TRAIL_SURROGATE_TAG << 24) }, //2 TRAIL_SURROGATE DC00-DFFF
942 // Now directly handled in the collation code by the swapCJK function.
943 //{0x3400, 0x4DB6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //3 CJK_IMPLICIT_TAG, /* 0x3400-0x4DB5*/
944 //{0x4E00, 0x9FA6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //4 CJK_IMPLICIT_TAG, /* 0x4E00-0x9FA5*/
945 //{0xF900, 0xFA2E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //5 CJK_IMPLICIT_TAG, /* 0xF900-0xFA2D*/
946 //{0x20000, 0x2A6D7, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //6 CJK_IMPLICIT_TAG, /* 0x20000-0x2A6D6*/
947 //{0x2F800, 0x2FA1E, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //7 CJK_IMPLICIT_TAG, /* 0x2F800-0x2FA1D*/
948 };
949 uint32_t i = 0;
950
951 for(i = 0; i<sizeof(ranges)/sizeof(ranges[0]); i++) {
952 /*ucmpe32_setRange32(t->mapping, ranges[i].start, ranges[i].end, ranges[i].value); */
953 utrie_setRange32(t->mapping, ranges[i].start, ranges[i].end, ranges[i].value, TRUE);
954 }
955
956
957 int32_t surrogateCount = 0;
958 while(!feof(data)) {
959 if(U_FAILURE(*status)) {
960 fprintf(stderr, "Something returned an error %i (%s) while processing line %u of %s. Exiting...\n",
961 *status, u_errorName(*status), (int)line, filename);
962 exit(*status);
963 }
964
965 element = readAnElement(data, t, &consts, status);
966 line++;
967 if(VERBOSE) {
968 fprintf(stdout, "%u ", (int)line);
969 }
970 if(element != NULL) {
971 // we have read the line, now do something sensible with the read data!
972
973 // Below stuff was taken care of in readAnElement
974 //if(element->variableTop == TRUE && variableTopValue == 0) {
975 // t->options->variableTopValue = element->cPoints[0];
976 //}
977
978 // if element is a contraction, we want to add it to contractions
979 if(element->cSize > 1 && element->cPoints[0] != 0xFDD0) { // this is a contraction
980 if(UTF_IS_LEAD(element->cPoints[0]) && UTF_IS_TRAIL(element->cPoints[1]) && element->cSize == 2) {
981 surrogateCount++;
982 } else {
983 if(noOfContractions>=MAX_UCA_CONTRACTION_CES) {
984 fprintf(stderr,
985 "\nMore than %d contractions. Please increase MAX_UCA_CONTRACTION_CES in genuca.cpp. "
986 "Exiting...\n",
987 (int)MAX_UCA_CONTRACTION_CES);
988 exit(*status);
989 }
990 contractionCEs[noOfContractions][0] = element->cPoints[0];
991 contractionCEs[noOfContractions][1] = element->cPoints[1];
992 if(element->cSize > 2) { // the third one
993 contractionCEs[noOfContractions][2] = element->cPoints[2];
994 } else {
995 contractionCEs[noOfContractions][2] = 0;
996 }
997 noOfContractions++;
998 }
999 }
1000 else {
1001 // TODO (claireho): does this work? Need more tests
1002 // The following code is to handle the UCA pre-context rules
1003 // for L/l with middle dot. We share the structures for contractionCombos.
1004 // The format for pre-context character is
1005 // contractionCEs[0]: codepoint in element->cPoints[0]
1006 // contractionCEs[1]: '\0' to differentiate with contractions.
1007 // contractionCEs[2]: prefix char
1008 if (element->prefixSize>0) {
1009 if(noOfContractions>=MAX_UCA_CONTRACTION_CES) {
1010 fprintf(stderr,
1011 "\nMore than %d contractions. Please increase MAX_UCA_CONTRACTION_CES in genuca.cpp. "
1012 "Exiting...\n",
1013 (int)MAX_UCA_CONTRACTION_CES);
1014 exit(*status);
1015 }
1016 contractionCEs[noOfContractions][0]=element->cPoints[0];
1017 contractionCEs[noOfContractions][1]='\0';
1018 contractionCEs[noOfContractions][2]=element->prefixChars[0];
1019 noOfContractions++;
1020 }
1021
1022 }
1023
1024 /* we're first adding to inverse, because addAnElement will reverse the order */
1025 /* of code points and stuff... we don't want that to happen */
1026 addToInverse(element, status);
1027 if(!(element->cSize > 1 && element->cPoints[0] == 0xFDD0)) {
1028 uprv_uca_addAnElement(t, element, status);
1029 }
1030 }
1031 }
1032
1033 if(UCAVersion[0] == 0 && UCAVersion[1] == 0 && UCAVersion[2] == 0 && UCAVersion[3] == 0) {
1034 fprintf(stderr, "UCA version not specified. Cannot create data file!\n");
1035 uprv_uca_closeTempTable(t);
1036 uprv_free(opts);
1037 uprv_free(myD);
1038 fclose(data);
1039 return -1;
1040 }
1041 /* {
1042 uint32_t trieWord = utrie_get32(t->mapping, 0xDC01, NULL);
1043 }*/
1044
1045 if (VERBOSE) {
1046 fprintf(stdout, "\nLines read: %u\n", (int)line);
1047 fprintf(stdout, "Surrogate count: %i\n", (int)surrogateCount);
1048 fprintf(stdout, "Raw data breakdown:\n");
1049 /*fprintf(stdout, "Compact array stage1 top: %i, stage2 top: %i\n", t->mapping->stage1Top, t->mapping->stage2Top);*/
1050 fprintf(stdout, "Number of contractions: %u\n", (int)noOfContractions);
1051 fprintf(stdout, "Contraction image size: %u\n", (int)t->image->contractionSize);
1052 fprintf(stdout, "Expansions size: %i\n", (int)t->expansions->position);
1053 }
1054
1055
1056 /* produce canonical closure for table */
1057 /* first set up constants for implicit calculation */
1058 uprv_uca_initImplicitConstants(status);
1059 /* do the closure */
1060 int32_t noOfClosures = uprv_uca_canonicalClosure(t, NULL, status);
1061 if(noOfClosures != 0) {
1062 fprintf(stderr, "Warning: %i canonical closures occured!\n", (int)noOfClosures);
1063 }
1064
1065 /* test */
1066 UCATableHeader *myData = uprv_uca_assembleTable(t, status);
1067
1068 if (VERBOSE) {
1069 fprintf(stdout, "Compacted data breakdown:\n");
1070 /*fprintf(stdout, "Compact array stage1 top: %i, stage2 top: %i\n", t->mapping->stage1Top, t->mapping->stage2Top);*/
1071 fprintf(stdout, "Number of contractions: %u\n", (int)noOfContractions);
1072 fprintf(stdout, "Contraction image size: %u\n", (int)t->image->contractionSize);
1073 fprintf(stdout, "Expansions size: %i\n", (int)t->expansions->position);
1074 }
1075
1076 if(U_FAILURE(*status)) {
1077 fprintf(stderr, "Error creating table: %s\n", u_errorName(*status));
1078 uprv_uca_closeTempTable(t);
1079 uprv_free(opts);
1080 uprv_free(myD);
1081 fclose(data);
1082 return -1;
1083 }
1084
1085 /* populate the version info struct with version info*/
1086 myData->version[0] = UCOL_BUILDER_VERSION;
1087 myData->version[1] = UCAVersion[0];
1088 myData->version[2] = UCAVersion[1];
1089 myData->version[3] = UCAVersion[2];
1090 /*TODO:The fractional rules version should be taken from FractionalUCA.txt*/
1091 // Removed this macro. Instead, we use the fields below
1092 //myD->version[1] = UCOL_FRACTIONAL_UCA_VERSION;
1093 //myD->UCAVersion = UCAVersion; // out of FractionalUCA.txt
1094 uprv_memcpy(myData->UCAVersion, UCAVersion, sizeof(UVersionInfo));
1095 u_getUnicodeVersion(myData->UCDVersion);
1096
1097 writeOutData(myData, &consts, contractionCEs, noOfContractions, outputDir, copyright, status);
1098
1099 InverseUCATableHeader *inverse = assembleInverseTable(status);
1100 uprv_memcpy(inverse->UCAVersion, UCAVersion, sizeof(UVersionInfo));
1101 writeOutInverseData(inverse, outputDir, copyright, status);
1102
1103 uprv_uca_closeTempTable(t);
1104 uprv_free(myD);
1105 uprv_free(opts);
1106
1107
1108 uprv_free(myData);
1109 uprv_free(inverse);
1110 fclose(data);
1111
1112 return 0;
1113 }
1114
1115 #endif /* #if !UCONFIG_NO_COLLATION */
1116
1117 static UOption options[]={
1118 UOPTION_HELP_H, /* 0 Numbers for those who*/
1119 UOPTION_HELP_QUESTION_MARK, /* 1 can't count. */
1120 UOPTION_COPYRIGHT, /* 2 */
1121 UOPTION_VERSION, /* 3 */
1122 UOPTION_DESTDIR, /* 4 */
1123 UOPTION_SOURCEDIR, /* 5 */
1124 UOPTION_VERBOSE, /* 6 */
1125 UOPTION_ICUDATADIR /* 7 */
1126 /* weiv can't count :))))) */
1127 };
1128
main(int argc,char * argv[])1129 int main(int argc, char* argv[]) {
1130 UErrorCode status = U_ZERO_ERROR;
1131 const char* destdir = NULL;
1132 const char* srcDir = NULL;
1133 char filename[300];
1134 char *basename = NULL;
1135 const char *copyright = NULL;
1136 uprv_memset(&UCAVersion, 0, 4);
1137
1138 U_MAIN_INIT_ARGS(argc, argv);
1139
1140 /* preset then read command line options */
1141 options[4].value=u_getDataDirectory();
1142 options[5].value="";
1143 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
1144
1145 /* error handling, printing usage message */
1146 if(argc<0) {
1147 fprintf(stderr,
1148 "error in command line argument \"%s\"\n",
1149 argv[-argc]);
1150 } else if(argc<2) {
1151 argc=-1;
1152 }
1153 if(options[0].doesOccur || options[1].doesOccur) {
1154 fprintf(stderr,
1155 "usage: %s [-options] file\n"
1156 "\tRead in UCA collation text data and write out the binary collation data\n"
1157 "options:\n"
1158 "\t-h or -? or --help this usage text\n"
1159 "\t-V or --version show a version message\n"
1160 "\t-c or --copyright include a copyright notice\n"
1161 "\t-d or --destdir destination directory, followed by the path\n"
1162 "\t-s or --sourcedir source directory, followed by the path\n"
1163 "\t-v or --verbose turn on verbose output\n"
1164 "\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
1165 "\t followed by path, defaults to %s\n",
1166 argv[0], u_getDataDirectory());
1167 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
1168 }
1169 if(options[3].doesOccur) {
1170 fprintf(stdout, "genuca version %hu.%hu, ICU tool to read UCA text data and create UCA data tables for collation.\n",
1171 #if UCONFIG_NO_COLLATION
1172 0, 0
1173 #else
1174 UCA_FORMAT_VERSION_0, UCA_FORMAT_VERSION_1
1175 #endif
1176 );
1177 fprintf(stdout, U_COPYRIGHT_STRING"\n");
1178 exit(0);
1179 }
1180
1181 /* get the options values */
1182 destdir = options[4].value;
1183 srcDir = options[5].value;
1184 VERBOSE = options[6].doesOccur;
1185
1186 if (options[2].doesOccur) {
1187 copyright = U_COPYRIGHT_STRING;
1188 }
1189
1190 if (options[7].doesOccur) {
1191 u_setDataDirectory(options[7].value);
1192 }
1193 /* Initialize ICU */
1194 u_init(&status);
1195 if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
1196 fprintf(stderr, "%s: can not initialize ICU. status = %s\n",
1197 argv[0], u_errorName(status));
1198 exit(1);
1199 }
1200 status = U_ZERO_ERROR;
1201
1202
1203 /* prepare the filename beginning with the source dir */
1204 uprv_strcpy(filename, srcDir);
1205 basename=filename+uprv_strlen(filename);
1206
1207 if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
1208 *basename++ = U_FILE_SEP_CHAR;
1209 }
1210
1211 if(argc < 0) {
1212 uprv_strcpy(basename, "FractionalUCA.txt");
1213 } else {
1214 argv++;
1215 uprv_strcpy(basename, getLongPathname(*argv));
1216 }
1217
1218 #if 0
1219 if(u_getCombiningClass(0x0053) == 0)
1220 {
1221 fprintf(stderr, "SEVERE ERROR: Normalization data is not functioning! Bailing out. Was not able to load unorm.dat.\n");
1222 exit(1);
1223 }
1224 #endif
1225
1226 #if UCONFIG_NO_COLLATION
1227
1228 UNewDataMemory *pData;
1229 const char *msg;
1230
1231 msg = "genuca writes dummy " UCA_DATA_NAME "." UCA_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h";
1232 fprintf(stderr, "%s\n", msg);
1233 pData = udata_create(destdir, UCA_DATA_TYPE, UCA_DATA_NAME, &dummyDataInfo,
1234 NULL, &status);
1235 udata_writeBlock(pData, msg, strlen(msg));
1236 udata_finish(pData, &status);
1237
1238 msg = "genuca writes dummy " INVC_DATA_NAME "." INVC_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h";
1239 fprintf(stderr, "%s\n", msg);
1240 pData = udata_create(destdir, INVC_DATA_TYPE, INVC_DATA_NAME, &dummyDataInfo,
1241 NULL, &status);
1242 udata_writeBlock(pData, msg, strlen(msg));
1243 udata_finish(pData, &status);
1244
1245 return (int)status;
1246
1247 #else
1248
1249 return write_uca_table(filename, destdir, copyright, &status);
1250
1251 #endif
1252 }
1253
1254 /*
1255 * Hey, Emacs, please set the following:
1256 *
1257 * Local Variables:
1258 * indent-tabs-mode: nil
1259 * End:
1260 *
1261 */
1262