1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1998-2015, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File parse.cpp
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
19 */
20
21 // Safer use of UnicodeString.
22 #ifndef UNISTR_FROM_CHAR_EXPLICIT
23 # define UNISTR_FROM_CHAR_EXPLICIT explicit
24 #endif
25
26 // Less important, but still a good idea.
27 #ifndef UNISTR_FROM_STRING_EXPLICIT
28 # define UNISTR_FROM_STRING_EXPLICIT explicit
29 #endif
30
31 #include "parse.h"
32 #include "errmsg.h"
33 #include "uhash.h"
34 #include "cmemory.h"
35 #include "cstring.h"
36 #include "uinvchar.h"
37 #include "read.h"
38 #include "ustr.h"
39 #include "reslist.h"
40 #include "rbt_pars.h"
41 #include "genrb.h"
42 #include "unicode/ustring.h"
43 #include "unicode/uscript.h"
44 #include "unicode/utf16.h"
45 #include "unicode/putil.h"
46 #include "collationbuilder.h"
47 #include "collationdata.h"
48 #include "collationdatareader.h"
49 #include "collationdatawriter.h"
50 #include "collationfastlatinbuilder.h"
51 #include "collationinfo.h"
52 #include "collationroot.h"
53 #include "collationruleparser.h"
54 #include "collationtailoring.h"
55 #include <stdio.h>
56
57 /* Number of tokens to read ahead of the current stream position */
58 #define MAX_LOOKAHEAD 3
59
60 #define CR 0x000D
61 #define LF 0x000A
62 #define SPACE 0x0020
63 #define TAB 0x0009
64 #define ESCAPE 0x005C
65 #define HASH 0x0023
66 #define QUOTE 0x0027
67 #define ZERO 0x0030
68 #define STARTCOMMAND 0x005B
69 #define ENDCOMMAND 0x005D
70 #define OPENSQBRACKET 0x005B
71 #define CLOSESQBRACKET 0x005D
72
73 using icu::LocalPointer;
74 using icu::UnicodeString;
75
76 struct Lookahead
77 {
78 enum ETokenType type;
79 struct UString value;
80 struct UString comment;
81 uint32_t line;
82 };
83
84 /* keep in sync with token defines in read.h */
85 const char *tokenNames[TOK_TOKEN_COUNT] =
86 {
87 "string", /* A string token, such as "MonthNames" */
88 "'{'", /* An opening brace character */
89 "'}'", /* A closing brace character */
90 "','", /* A comma */
91 "':'", /* A colon */
92
93 "<end of file>", /* End of the file has been reached successfully */
94 "<end of line>"
95 };
96
97 /* Just to store "TRUE" */
98 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
99
100 typedef struct {
101 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
102 uint32_t lookaheadPosition;
103 UCHARBUF *buffer;
104 struct SRBRoot *bundle;
105 const char *inputdir;
106 uint32_t inputdirLength;
107 const char *outputdir;
108 uint32_t outputdirLength;
109 const char *filename;
110 UBool makeBinaryCollation;
111 UBool omitCollationRules;
112 } ParseState;
113
114 typedef struct SResource *
115 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
116
117 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
118
119 /* The nature of the lookahead buffer:
120 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
121 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
122 When getToken is called, the current pointer is moved to the next slot and the
123 old slot is filled with the next token from the reader by calling getNextToken.
124 The token values are stored in the slot, which means that token values don't
125 survive a call to getToken, ie.
126
127 UString *value;
128
129 getToken(&value, NULL, status);
130 getToken(NULL, NULL, status); bad - value is now a different string
131 */
132 static void
initLookahead(ParseState * state,UCHARBUF * buf,UErrorCode * status)133 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
134 {
135 static uint32_t initTypeStrings = 0;
136 uint32_t i;
137
138 if (!initTypeStrings)
139 {
140 initTypeStrings = 1;
141 }
142
143 state->lookaheadPosition = 0;
144 state->buffer = buf;
145
146 resetLineNumber();
147
148 for (i = 0; i < MAX_LOOKAHEAD; i++)
149 {
150 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
151 if (U_FAILURE(*status))
152 {
153 return;
154 }
155 }
156
157 *status = U_ZERO_ERROR;
158 }
159
160 static void
cleanupLookahead(ParseState * state)161 cleanupLookahead(ParseState* state)
162 {
163 uint32_t i;
164 for (i = 0; i <= MAX_LOOKAHEAD; i++)
165 {
166 ustr_deinit(&state->lookahead[i].value);
167 ustr_deinit(&state->lookahead[i].comment);
168 }
169
170 }
171
172 static enum ETokenType
getToken(ParseState * state,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)173 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
174 {
175 enum ETokenType result;
176 uint32_t i;
177
178 result = state->lookahead[state->lookaheadPosition].type;
179
180 if (tokenValue != NULL)
181 {
182 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
183 }
184
185 if (linenumber != NULL)
186 {
187 *linenumber = state->lookahead[state->lookaheadPosition].line;
188 }
189
190 if (comment != NULL)
191 {
192 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
193 }
194
195 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
196 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
197 ustr_setlen(&state->lookahead[i].comment, 0, status);
198 ustr_setlen(&state->lookahead[i].value, 0, status);
199 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
200
201 /* printf("getToken, returning %s\n", tokenNames[result]); */
202
203 return result;
204 }
205
206 static enum ETokenType
peekToken(ParseState * state,uint32_t lookaheadCount,struct UString ** tokenValue,uint32_t * linenumber,struct UString * comment,UErrorCode * status)207 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
208 {
209 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
210
211 if (U_FAILURE(*status))
212 {
213 return TOK_ERROR;
214 }
215
216 if (lookaheadCount >= MAX_LOOKAHEAD)
217 {
218 *status = U_INTERNAL_PROGRAM_ERROR;
219 return TOK_ERROR;
220 }
221
222 if (tokenValue != NULL)
223 {
224 *tokenValue = &state->lookahead[i].value;
225 }
226
227 if (linenumber != NULL)
228 {
229 *linenumber = state->lookahead[i].line;
230 }
231
232 if(comment != NULL){
233 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
234 }
235
236 return state->lookahead[i].type;
237 }
238
239 static void
expect(ParseState * state,enum ETokenType expectedToken,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)240 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
241 {
242 uint32_t line;
243
244 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
245
246 if (linenumber != NULL)
247 {
248 *linenumber = line;
249 }
250
251 if (U_FAILURE(*status))
252 {
253 return;
254 }
255
256 if (token != expectedToken)
257 {
258 *status = U_INVALID_FORMAT_ERROR;
259 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
260 }
261 else
262 {
263 *status = U_ZERO_ERROR;
264 }
265 }
266
getInvariantString(ParseState * state,uint32_t * line,struct UString * comment,UErrorCode * status)267 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
268 {
269 struct UString *tokenValue;
270 char *result;
271 uint32_t count;
272
273 expect(state, TOK_STRING, &tokenValue, comment, line, status);
274
275 if (U_FAILURE(*status))
276 {
277 return NULL;
278 }
279
280 count = u_strlen(tokenValue->fChars);
281 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
282 *status = U_INVALID_FORMAT_ERROR;
283 error(*line, "invariant characters required for table keys, binary data, etc.");
284 return NULL;
285 }
286
287 result = static_cast<char *>(uprv_malloc(count+1));
288
289 if (result == NULL)
290 {
291 *status = U_MEMORY_ALLOCATION_ERROR;
292 return NULL;
293 }
294
295 u_UCharsToChars(tokenValue->fChars, result, count+1);
296 return result;
297 }
298
299 static struct SResource *
parseUCARules(ParseState * state,char * tag,uint32_t startline,const struct UString *,UErrorCode * status)300 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
301 {
302 struct SResource *result = NULL;
303 struct UString *tokenValue;
304 FileStream *file = NULL;
305 char filename[256] = { '\0' };
306 char cs[128] = { '\0' };
307 uint32_t line;
308 UBool quoted = FALSE;
309 UCHARBUF *ucbuf=NULL;
310 UChar32 c = 0;
311 const char* cp = NULL;
312 UChar *pTarget = NULL;
313 UChar *target = NULL;
314 UChar *targetLimit = NULL;
315 int32_t size = 0;
316
317 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
318
319 if(isVerbose()){
320 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
321 }
322
323 if (U_FAILURE(*status))
324 {
325 return NULL;
326 }
327 /* make the filename including the directory */
328 if (state->inputdir != NULL)
329 {
330 uprv_strcat(filename, state->inputdir);
331
332 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
333 {
334 uprv_strcat(filename, U_FILE_SEP_STRING);
335 }
336 }
337
338 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
339
340 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
341
342 if (U_FAILURE(*status))
343 {
344 return NULL;
345 }
346 uprv_strcat(filename, cs);
347
348 if(state->omitCollationRules) {
349 return res_none();
350 }
351
352 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
353
354 if (U_FAILURE(*status)) {
355 error(line, "An error occured while opening the input file %s\n", filename);
356 return NULL;
357 }
358
359 /* We allocate more space than actually required
360 * since the actual size needed for storing UChars
361 * is not known in UTF-8 byte stream
362 */
363 size = ucbuf_size(ucbuf) + 1;
364 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
365 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
366 target = pTarget;
367 targetLimit = pTarget+size;
368
369 /* read the rules into the buffer */
370 while (target < targetLimit)
371 {
372 c = ucbuf_getc(ucbuf, status);
373 if(c == QUOTE) {
374 quoted = (UBool)!quoted;
375 }
376 /* weiv (06/26/2002): adding the following:
377 * - preserving spaces in commands [...]
378 * - # comments until the end of line
379 */
380 if (c == STARTCOMMAND && !quoted)
381 {
382 /* preserve commands
383 * closing bracket will be handled by the
384 * append at the end of the loop
385 */
386 while(c != ENDCOMMAND) {
387 U_APPEND_CHAR32_ONLY(c, target);
388 c = ucbuf_getc(ucbuf, status);
389 }
390 }
391 else if (c == HASH && !quoted) {
392 /* skip comments */
393 while(c != CR && c != LF) {
394 c = ucbuf_getc(ucbuf, status);
395 }
396 continue;
397 }
398 else if (c == ESCAPE)
399 {
400 c = unescape(ucbuf, status);
401
402 if (c == (UChar32)U_ERR)
403 {
404 uprv_free(pTarget);
405 T_FileStream_close(file);
406 return NULL;
407 }
408 }
409 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
410 {
411 /* ignore spaces carriage returns
412 * and line feed unless in the form \uXXXX
413 */
414 continue;
415 }
416
417 /* Append UChar * after dissembling if c > 0xffff*/
418 if (c != (UChar32)U_EOF)
419 {
420 U_APPEND_CHAR32_ONLY(c, target);
421 }
422 else
423 {
424 break;
425 }
426 }
427
428 /* terminate the string */
429 if(target < targetLimit){
430 *target = 0x0000;
431 }
432
433 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
434
435
436 ucbuf_close(ucbuf);
437 uprv_free(pTarget);
438 T_FileStream_close(file);
439
440 return result;
441 }
442
443 static struct SResource *
parseTransliterator(ParseState * state,char * tag,uint32_t startline,const struct UString *,UErrorCode * status)444 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
445 {
446 struct SResource *result = NULL;
447 struct UString *tokenValue;
448 FileStream *file = NULL;
449 char filename[256] = { '\0' };
450 char cs[128] = { '\0' };
451 uint32_t line;
452 UCHARBUF *ucbuf=NULL;
453 const char* cp = NULL;
454 UChar *pTarget = NULL;
455 const UChar *pSource = NULL;
456 int32_t size = 0;
457
458 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
459
460 if(isVerbose()){
461 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
462 }
463
464 if (U_FAILURE(*status))
465 {
466 return NULL;
467 }
468 /* make the filename including the directory */
469 if (state->inputdir != NULL)
470 {
471 uprv_strcat(filename, state->inputdir);
472
473 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
474 {
475 uprv_strcat(filename, U_FILE_SEP_STRING);
476 }
477 }
478
479 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
480
481 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
482
483 if (U_FAILURE(*status))
484 {
485 return NULL;
486 }
487 uprv_strcat(filename, cs);
488
489
490 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
491
492 if (U_FAILURE(*status)) {
493 error(line, "An error occured while opening the input file %s\n", filename);
494 return NULL;
495 }
496
497 /* We allocate more space than actually required
498 * since the actual size needed for storing UChars
499 * is not known in UTF-8 byte stream
500 */
501 pSource = ucbuf_getBuffer(ucbuf, &size, status);
502 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
503 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
504
505 #if !UCONFIG_NO_TRANSLITERATION
506 size = utrans_stripRules(pSource, size, pTarget, status);
507 #else
508 size = 0;
509 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
510 #endif
511 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
512
513 ucbuf_close(ucbuf);
514 uprv_free(pTarget);
515 T_FileStream_close(file);
516
517 return result;
518 }
519 static struct SResource* dependencyArray = NULL;
520
521 static struct SResource *
parseDependency(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)522 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
523 {
524 struct SResource *result = NULL;
525 struct SResource *elem = NULL;
526 struct UString *tokenValue;
527 uint32_t line;
528 char filename[256] = { '\0' };
529 char cs[128] = { '\0' };
530
531 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
532
533 if(isVerbose()){
534 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
535 }
536
537 if (U_FAILURE(*status))
538 {
539 return NULL;
540 }
541 /* make the filename including the directory */
542 if (state->outputdir != NULL)
543 {
544 uprv_strcat(filename, state->outputdir);
545
546 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
547 {
548 uprv_strcat(filename, U_FILE_SEP_STRING);
549 }
550 }
551
552 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
553
554 if (U_FAILURE(*status))
555 {
556 return NULL;
557 }
558 uprv_strcat(filename, cs);
559 if(!T_FileStream_file_exists(filename)){
560 if(isStrict()){
561 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
562 }else{
563 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
564 }
565 }
566 if(dependencyArray==NULL){
567 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
568 }
569 if(tag!=NULL){
570 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
571 }
572 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
573
574 array_add(dependencyArray, elem, status);
575
576 if (U_FAILURE(*status))
577 {
578 return NULL;
579 }
580 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
581 return result;
582 }
583 static struct SResource *
parseString(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)584 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
585 {
586 struct UString *tokenValue;
587 struct SResource *result = NULL;
588
589 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
590 {
591 return parseUCARules(tag, startline, status);
592 }*/
593 if(isVerbose()){
594 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
595 }
596 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
597
598 if (U_SUCCESS(*status))
599 {
600 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
601 doesn't survive expect either) */
602
603 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
604 if(U_SUCCESS(*status) && result) {
605 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
606
607 if (U_FAILURE(*status))
608 {
609 res_close(result);
610 return NULL;
611 }
612 }
613 }
614
615 return result;
616 }
617
618 static struct SResource *
parseAlias(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)619 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
620 {
621 struct UString *tokenValue;
622 struct SResource *result = NULL;
623
624 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
625
626 if(isVerbose()){
627 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
628 }
629
630 if (U_SUCCESS(*status))
631 {
632 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
633 doesn't survive expect either) */
634
635 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
636
637 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
638
639 if (U_FAILURE(*status))
640 {
641 res_close(result);
642 return NULL;
643 }
644 }
645
646 return result;
647 }
648
649 #if !UCONFIG_NO_COLLATION
650
651 namespace {
652
resLookup(struct SResource * res,const char * key)653 static struct SResource* resLookup(struct SResource* res, const char* key){
654 struct SResource *current = NULL;
655 struct SResTable *list;
656 if (res == res_none()) {
657 return NULL;
658 }
659
660 list = &(res->u.fTable);
661
662 current = list->fFirst;
663 while (current != NULL) {
664 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
665 return current;
666 }
667 current = current->fNext;
668 }
669 return NULL;
670 }
671
672 class GenrbImporter : public icu::CollationRuleParser::Importer {
673 public:
GenrbImporter(const char * in,const char * out)674 GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
675 virtual ~GenrbImporter();
676 virtual void getRules(
677 const char *localeID, const char *collationType,
678 UnicodeString &rules,
679 const char *&errorReason, UErrorCode &errorCode);
680
681 private:
682 const char *inputDir;
683 const char *outputDir;
684 };
685
~GenrbImporter()686 GenrbImporter::~GenrbImporter() {}
687
688 void
getRules(const char * localeID,const char * collationType,UnicodeString & rules,const char * &,UErrorCode & errorCode)689 GenrbImporter::getRules(
690 const char *localeID, const char *collationType,
691 UnicodeString &rules,
692 const char *& /*errorReason*/, UErrorCode &errorCode) {
693 struct SRBRoot *data = NULL;
694 UCHARBUF *ucbuf = NULL;
695 int localeLength = strlen(localeID);
696 char* filename = (char*)uprv_malloc(localeLength+5);
697 char *inputDirBuf = NULL;
698 char *openFileName = NULL;
699 const char* cp = "";
700 int32_t i = 0;
701 int32_t dirlen = 0;
702 int32_t filelen = 0;
703 struct SResource* root;
704 struct SResource* collations;
705 struct SResource* collation;
706 struct SResource* sequence;
707
708 memcpy(filename, localeID, localeLength);
709 for(i = 0; i < localeLength; i++){
710 if(filename[i] == '-'){
711 filename[i] = '_';
712 }
713 }
714 filename[localeLength] = '.';
715 filename[localeLength+1] = 't';
716 filename[localeLength+2] = 'x';
717 filename[localeLength+3] = 't';
718 filename[localeLength+4] = 0;
719
720
721 if (U_FAILURE(errorCode)) {
722 return;
723 }
724 if(filename==NULL){
725 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
726 return;
727 }else{
728 filelen = (int32_t)uprv_strlen(filename);
729 }
730 if(inputDir == NULL) {
731 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
732 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
733 openFileName[0] = '\0';
734 if (filenameBegin != NULL) {
735 /*
736 * When a filename ../../../data/root.txt is specified,
737 * we presume that the input directory is ../../../data
738 * This is very important when the resource file includes
739 * another file, like UCARules.txt or thaidict.brk.
740 */
741 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
742 inputDirBuf = (char *)uprv_malloc(filenameSize);
743
744 /* test for NULL */
745 if(inputDirBuf == NULL) {
746 errorCode = U_MEMORY_ALLOCATION_ERROR;
747 goto finish;
748 }
749
750 uprv_strncpy(inputDirBuf, filename, filenameSize);
751 inputDirBuf[filenameSize - 1] = 0;
752 inputDir = inputDirBuf;
753 dirlen = (int32_t)uprv_strlen(inputDir);
754 }
755 }else{
756 dirlen = (int32_t)uprv_strlen(inputDir);
757
758 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
759 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
760
761 /* test for NULL */
762 if(openFileName == NULL) {
763 errorCode = U_MEMORY_ALLOCATION_ERROR;
764 goto finish;
765 }
766
767 openFileName[0] = '\0';
768 /*
769 * append the input dir to openFileName if the first char in
770 * filename is not file seperation char and the last char input directory is not '.'.
771 * This is to support :
772 * genrb -s. /home/icu/data
773 * genrb -s. icu/data
774 * The user cannot mix notations like
775 * genrb -s. /icu/data --- the absolute path specified. -s redundant
776 * user should use
777 * genrb -s. icu/data --- start from CWD and look in icu/data dir
778 */
779 if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
780 uprv_strcpy(openFileName, inputDir);
781 openFileName[dirlen] = U_FILE_SEP_CHAR;
782 }
783 openFileName[dirlen + 1] = '\0';
784 } else {
785 openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
786
787 /* test for NULL */
788 if(openFileName == NULL) {
789 errorCode = U_MEMORY_ALLOCATION_ERROR;
790 goto finish;
791 }
792
793 uprv_strcpy(openFileName, inputDir);
794
795 }
796 }
797 uprv_strcat(openFileName, filename);
798 /* printf("%s\n", openFileName); */
799 errorCode = U_ZERO_ERROR;
800 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, &errorCode);
801
802 if(errorCode == U_FILE_ACCESS_ERROR) {
803
804 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
805 goto finish;
806 }
807 if (ucbuf == NULL || U_FAILURE(errorCode)) {
808 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(errorCode));
809 goto finish;
810 }
811
812 /* Parse the data into an SRBRoot */
813 data = parse(ucbuf, inputDir, outputDir, filename, FALSE, FALSE, &errorCode);
814 if (U_FAILURE(errorCode)) {
815 goto finish;
816 }
817
818 root = data->fRoot;
819 collations = resLookup(root, "collations");
820 if (collations != NULL) {
821 collation = resLookup(collations, collationType);
822 if (collation != NULL) {
823 sequence = resLookup(collation, "Sequence");
824 if (sequence != NULL) {
825 // No string pointer aliasing so that we need not hold onto the resource bundle.
826 rules.setTo(sequence->u.fString.fChars, sequence->u.fString.fLength);
827 }
828 }
829 }
830
831 finish:
832 if (inputDirBuf != NULL) {
833 uprv_free(inputDirBuf);
834 }
835
836 if (openFileName != NULL) {
837 uprv_free(openFileName);
838 }
839
840 if(ucbuf) {
841 ucbuf_close(ucbuf);
842 }
843 }
844
845 // Quick-and-dirty escaping function.
846 // Assumes that we are on an ASCII-based platform.
847 static void
escape(const UChar * s,char * buffer)848 escape(const UChar *s, char *buffer) {
849 int32_t length = u_strlen(s);
850 int32_t i = 0;
851 for (;;) {
852 UChar32 c;
853 U16_NEXT(s, i, length, c);
854 if (c == 0) {
855 *buffer = 0;
856 return;
857 } else if (0x20 <= c && c <= 0x7e) {
858 // printable ASCII
859 *buffer++ = (char)c; // assumes ASCII-based platform
860 } else {
861 buffer += sprintf(buffer, "\\u%04X", (int)c);
862 }
863 }
864 }
865
866 } // namespace
867
868 #endif // !UCONFIG_NO_COLLATION
869
870 static struct SResource *
addCollation(ParseState * state,struct SResource * result,const char * collationType,uint32_t startline,UErrorCode * status)871 addCollation(ParseState* state, struct SResource *result, const char *collationType,
872 uint32_t startline, UErrorCode *status)
873 {
874 // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
875 struct SResource *member = NULL;
876 struct UString *tokenValue;
877 struct UString comment;
878 enum ETokenType token;
879 char subtag[1024];
880 UnicodeString rules;
881 UBool haveRules = FALSE;
882 UVersionInfo version;
883 uint32_t line;
884
885 /* '{' . (name resource)* '}' */
886 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
887
888 for (;;)
889 {
890 ustr_init(&comment);
891 token = getToken(state, &tokenValue, &comment, &line, status);
892
893 if (token == TOK_CLOSE_BRACE)
894 {
895 break;
896 }
897
898 if (token != TOK_STRING)
899 {
900 res_close(result);
901 *status = U_INVALID_FORMAT_ERROR;
902
903 if (token == TOK_EOF)
904 {
905 error(startline, "unterminated table");
906 }
907 else
908 {
909 error(line, "Unexpected token %s", tokenNames[token]);
910 }
911
912 return NULL;
913 }
914
915 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
916
917 if (U_FAILURE(*status))
918 {
919 res_close(result);
920 return NULL;
921 }
922
923 member = parseResource(state, subtag, NULL, status);
924
925 if (U_FAILURE(*status))
926 {
927 res_close(result);
928 return NULL;
929 }
930 if (result == NULL)
931 {
932 // Ignore the parsed resources, continue parsing.
933 }
934 else if (uprv_strcmp(subtag, "Version") == 0)
935 {
936 char ver[40];
937 int32_t length = member->u.fString.fLength;
938
939 if (length >= (int32_t) sizeof(ver))
940 {
941 length = (int32_t) sizeof(ver) - 1;
942 }
943
944 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
945 u_versionFromString(version, ver);
946
947 table_add(result, member, line, status);
948 member = NULL;
949 }
950 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
951 {
952 /* discard duplicate %%CollationBin if any*/
953 }
954 else if (uprv_strcmp(subtag, "Sequence") == 0)
955 {
956 rules.setTo(member->u.fString.fChars, member->u.fString.fLength);
957 haveRules = TRUE;
958 // Defer building the collator until we have seen
959 // all sub-elements of the collation table, including the Version.
960 /* in order to achieve smaller data files, we can direct genrb */
961 /* to omit collation rules */
962 if(!state->omitCollationRules) {
963 table_add(result, member, line, status);
964 member = NULL;
965 }
966 }
967 else // Just copy non-special items.
968 {
969 table_add(result, member, line, status);
970 member = NULL;
971 }
972 res_close(member); // TODO: use LocalPointer
973 if (U_FAILURE(*status))
974 {
975 res_close(result);
976 return NULL;
977 }
978 }
979
980 if (!haveRules) { return result; }
981
982 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
983 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
984 (void)collationType;
985 #else
986 // CLDR ticket #3949, ICU ticket #8082:
987 // Do not build collation binary data for for-import-only "private" collation rule strings.
988 if (uprv_strncmp(collationType, "private-", 8) == 0) {
989 if(isVerbose()) {
990 printf("Not building %s~%s collation binary\n", state->filename, collationType);
991 }
992 return result;
993 }
994
995 if(!state->makeBinaryCollation) {
996 if(isVerbose()) {
997 printf("Not building %s~%s collation binary\n", state->filename, collationType);
998 }
999 return result;
1000 }
1001 UErrorCode intStatus = U_ZERO_ERROR;
1002 UParseError parseError;
1003 uprv_memset(&parseError, 0, sizeof(parseError));
1004 GenrbImporter importer(state->inputdir, state->outputdir);
1005 const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
1006 if(U_FAILURE(intStatus)) {
1007 error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
1008 res_close(result);
1009 return NULL; // TODO: use LocalUResourceBundlePointer for result
1010 }
1011 icu::CollationBuilder builder(base, intStatus);
1012 if(uprv_strncmp(collationType, "search", 6) == 0) {
1013 builder.disableFastLatin(); // build fast-Latin table unless search collator
1014 }
1015 LocalPointer<icu::CollationTailoring> t(
1016 builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
1017 if(U_FAILURE(intStatus)) {
1018 const char *reason = builder.getErrorReason();
1019 if(reason == NULL) { reason = ""; }
1020 error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
1021 state->filename, collationType,
1022 (long)parseError.offset, u_errorName(intStatus), reason);
1023 if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
1024 // Print pre- and post-context.
1025 char preBuffer[100], postBuffer[100];
1026 escape(parseError.preContext, preBuffer);
1027 escape(parseError.postContext, postBuffer);
1028 error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
1029 }
1030 if(isStrict()) {
1031 *status = intStatus;
1032 res_close(result);
1033 return NULL;
1034 }
1035 }
1036 icu::LocalMemory<uint8_t> buffer;
1037 int32_t capacity = 100000;
1038 uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
1039 if(dest == NULL) {
1040 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
1041 (long)capacity);
1042 *status = U_MEMORY_ALLOCATION_ERROR;
1043 res_close(result);
1044 return NULL;
1045 }
1046 int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
1047 int32_t totalSize = icu::CollationDataWriter::writeTailoring(
1048 *t, *t->settings, indexes, dest, capacity, intStatus);
1049 if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
1050 intStatus = U_ZERO_ERROR;
1051 capacity = totalSize;
1052 dest = buffer.allocateInsteadAndCopy(capacity);
1053 if(dest == NULL) {
1054 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
1055 (long)capacity);
1056 *status = U_MEMORY_ALLOCATION_ERROR;
1057 res_close(result);
1058 return NULL;
1059 }
1060 totalSize = icu::CollationDataWriter::writeTailoring(
1061 *t, *t->settings, indexes, dest, capacity, intStatus);
1062 }
1063 if(U_FAILURE(intStatus)) {
1064 fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
1065 u_errorName(intStatus));
1066 res_close(result);
1067 return NULL;
1068 }
1069 if(isVerbose()) {
1070 printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
1071 icu::CollationInfo::printSizes(totalSize, indexes);
1072 if(t->settings->hasReordering()) {
1073 printf("%s~%s collation reordering ranges:\n", state->filename, collationType);
1074 icu::CollationInfo::printReorderRanges(
1075 *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength);
1076 }
1077 }
1078 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
1079 table_add(result, collationBin, line, status);
1080 if (U_FAILURE(*status)) {
1081 res_close(result);
1082 return NULL;
1083 }
1084 #endif
1085 return result;
1086 }
1087
1088 static UBool
keepCollationType(const char * type)1089 keepCollationType(const char *type) { // android-changed
1090 // BEGIN android-added
1091 if (uprv_strcmp(type, "big5han") == 0) { return FALSE; }
1092 if (uprv_strcmp(type, "gb2312han") == 0) { return FALSE; }
1093 // END android-added
1094 return TRUE;
1095 }
1096
1097 static struct SResource *
parseCollationElements(ParseState * state,char * tag,uint32_t startline,UBool newCollation,UErrorCode * status)1098 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1099 {
1100 struct SResource *result = NULL;
1101 struct SResource *member = NULL;
1102 struct SResource *collationRes = NULL;
1103 struct UString *tokenValue;
1104 struct UString comment;
1105 enum ETokenType token;
1106 char subtag[1024], typeKeyword[1024];
1107 uint32_t line;
1108
1109 result = table_open(state->bundle, tag, NULL, status);
1110
1111 if (result == NULL || U_FAILURE(*status))
1112 {
1113 return NULL;
1114 }
1115 if(isVerbose()){
1116 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1117 }
1118 if(!newCollation) {
1119 return addCollation(state, result, "(no type)", startline, status);
1120 }
1121 else {
1122 for(;;) {
1123 ustr_init(&comment);
1124 token = getToken(state, &tokenValue, &comment, &line, status);
1125
1126 if (token == TOK_CLOSE_BRACE)
1127 {
1128 return result;
1129 }
1130
1131 if (token != TOK_STRING)
1132 {
1133 res_close(result);
1134 *status = U_INVALID_FORMAT_ERROR;
1135
1136 if (token == TOK_EOF)
1137 {
1138 error(startline, "unterminated table");
1139 }
1140 else
1141 {
1142 error(line, "Unexpected token %s", tokenNames[token]);
1143 }
1144
1145 return NULL;
1146 }
1147
1148 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1149
1150 if (U_FAILURE(*status))
1151 {
1152 res_close(result);
1153 return NULL;
1154 }
1155
1156 if (uprv_strcmp(subtag, "default") == 0)
1157 {
1158 member = parseResource(state, subtag, NULL, status);
1159
1160 if (U_FAILURE(*status))
1161 {
1162 res_close(result);
1163 return NULL;
1164 }
1165
1166 table_add(result, member, line, status);
1167 }
1168 else
1169 {
1170 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1171 /* this probably needs to be refactored or recursively use the parser */
1172 /* first we assume that our collation table won't have the explicit type */
1173 /* then, we cannot handle aliases */
1174 if(token == TOK_OPEN_BRACE) {
1175 token = getToken(state, &tokenValue, &comment, &line, status);
1176 if (keepCollationType(subtag)) {
1177 collationRes = table_open(state->bundle, subtag, NULL, status);
1178 } else {
1179 collationRes = NULL;
1180 }
1181 // need to parse the collation data regardless
1182 collationRes = addCollation(state, collationRes, subtag, startline, status);
1183 if (collationRes != NULL) {
1184 table_add(result, collationRes, startline, status);
1185 }
1186 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1187 /* we could have a table too */
1188 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1189 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1190 if(uprv_strcmp(typeKeyword, "alias") == 0) {
1191 member = parseResource(state, subtag, NULL, status);
1192 if (U_FAILURE(*status))
1193 {
1194 res_close(result);
1195 return NULL;
1196 }
1197
1198 table_add(result, member, line, status);
1199 } else {
1200 res_close(result);
1201 *status = U_INVALID_FORMAT_ERROR;
1202 return NULL;
1203 }
1204 } else {
1205 res_close(result);
1206 *status = U_INVALID_FORMAT_ERROR;
1207 return NULL;
1208 }
1209 }
1210
1211 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1212
1213 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1214
1215 if (U_FAILURE(*status))
1216 {
1217 res_close(result);
1218 return NULL;
1219 }
1220 }
1221 }
1222 }
1223
1224 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1225 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1226 static struct SResource *
realParseTable(ParseState * state,struct SResource * table,char * tag,uint32_t startline,UErrorCode * status)1227 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
1228 {
1229 struct SResource *member = NULL;
1230 struct UString *tokenValue=NULL;
1231 struct UString comment;
1232 enum ETokenType token;
1233 char subtag[1024];
1234 uint32_t line;
1235 UBool readToken = FALSE;
1236
1237 /* '{' . (name resource)* '}' */
1238
1239 if(isVerbose()){
1240 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1241 }
1242 for (;;)
1243 {
1244 ustr_init(&comment);
1245 token = getToken(state, &tokenValue, &comment, &line, status);
1246
1247 if (token == TOK_CLOSE_BRACE)
1248 {
1249 if (!readToken) {
1250 warning(startline, "Encountered empty table");
1251 }
1252 return table;
1253 }
1254
1255 if (token != TOK_STRING)
1256 {
1257 *status = U_INVALID_FORMAT_ERROR;
1258
1259 if (token == TOK_EOF)
1260 {
1261 error(startline, "unterminated table");
1262 }
1263 else
1264 {
1265 error(line, "unexpected token %s", tokenNames[token]);
1266 }
1267
1268 return NULL;
1269 }
1270
1271 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1272 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1273 } else {
1274 *status = U_INVALID_FORMAT_ERROR;
1275 error(line, "invariant characters required for table keys");
1276 return NULL;
1277 }
1278
1279 if (U_FAILURE(*status))
1280 {
1281 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1282 return NULL;
1283 }
1284
1285 member = parseResource(state, subtag, &comment, status);
1286
1287 if (member == NULL || U_FAILURE(*status))
1288 {
1289 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1290 return NULL;
1291 }
1292
1293 table_add(table, member, line, status);
1294
1295 if (U_FAILURE(*status))
1296 {
1297 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1298 return NULL;
1299 }
1300 readToken = TRUE;
1301 ustr_deinit(&comment);
1302 }
1303
1304 /* not reached */
1305 /* A compiler warning will appear if all paths don't contain a return statement. */
1306 /* *status = U_INTERNAL_PROGRAM_ERROR;
1307 return NULL;*/
1308 }
1309
1310 static struct SResource *
parseTable(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1311 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1312 {
1313 struct SResource *result;
1314
1315 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1316 {
1317 return parseCollationElements(state, tag, startline, FALSE, status);
1318 }
1319 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1320 {
1321 return parseCollationElements(state, tag, startline, TRUE, status);
1322 }
1323 if(isVerbose()){
1324 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1325 }
1326
1327 result = table_open(state->bundle, tag, comment, status);
1328
1329 if (result == NULL || U_FAILURE(*status))
1330 {
1331 return NULL;
1332 }
1333 return realParseTable(state, result, tag, startline, status);
1334 }
1335
1336 static struct SResource *
parseArray(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1337 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1338 {
1339 struct SResource *result = NULL;
1340 struct SResource *member = NULL;
1341 struct UString *tokenValue;
1342 struct UString memberComments;
1343 enum ETokenType token;
1344 UBool readToken = FALSE;
1345
1346 result = array_open(state->bundle, tag, comment, status);
1347
1348 if (result == NULL || U_FAILURE(*status))
1349 {
1350 return NULL;
1351 }
1352 if(isVerbose()){
1353 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1354 }
1355
1356 ustr_init(&memberComments);
1357
1358 /* '{' . resource [','] '}' */
1359 for (;;)
1360 {
1361 /* reset length */
1362 ustr_setlen(&memberComments, 0, status);
1363
1364 /* check for end of array, but don't consume next token unless it really is the end */
1365 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1366
1367
1368 if (token == TOK_CLOSE_BRACE)
1369 {
1370 getToken(state, NULL, NULL, NULL, status);
1371 if (!readToken) {
1372 warning(startline, "Encountered empty array");
1373 }
1374 break;
1375 }
1376
1377 if (token == TOK_EOF)
1378 {
1379 res_close(result);
1380 *status = U_INVALID_FORMAT_ERROR;
1381 error(startline, "unterminated array");
1382 return NULL;
1383 }
1384
1385 /* string arrays are a special case */
1386 if (token == TOK_STRING)
1387 {
1388 getToken(state, &tokenValue, &memberComments, NULL, status);
1389 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1390 }
1391 else
1392 {
1393 member = parseResource(state, NULL, &memberComments, status);
1394 }
1395
1396 if (member == NULL || U_FAILURE(*status))
1397 {
1398 res_close(result);
1399 return NULL;
1400 }
1401
1402 array_add(result, member, status);
1403
1404 if (U_FAILURE(*status))
1405 {
1406 res_close(result);
1407 return NULL;
1408 }
1409
1410 /* eat optional comma if present */
1411 token = peekToken(state, 0, NULL, NULL, NULL, status);
1412
1413 if (token == TOK_COMMA)
1414 {
1415 getToken(state, NULL, NULL, NULL, status);
1416 }
1417
1418 if (U_FAILURE(*status))
1419 {
1420 res_close(result);
1421 return NULL;
1422 }
1423 readToken = TRUE;
1424 }
1425
1426 ustr_deinit(&memberComments);
1427 return result;
1428 }
1429
1430 static struct SResource *
parseIntVector(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1431 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1432 {
1433 struct SResource *result = NULL;
1434 enum ETokenType token;
1435 char *string;
1436 int32_t value;
1437 UBool readToken = FALSE;
1438 char *stopstring;
1439 uint32_t len;
1440 struct UString memberComments;
1441
1442 result = intvector_open(state->bundle, tag, comment, status);
1443
1444 if (result == NULL || U_FAILURE(*status))
1445 {
1446 return NULL;
1447 }
1448
1449 if(isVerbose()){
1450 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1451 }
1452 ustr_init(&memberComments);
1453 /* '{' . string [','] '}' */
1454 for (;;)
1455 {
1456 ustr_setlen(&memberComments, 0, status);
1457
1458 /* check for end of array, but don't consume next token unless it really is the end */
1459 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1460
1461 if (token == TOK_CLOSE_BRACE)
1462 {
1463 /* it's the end, consume the close brace */
1464 getToken(state, NULL, NULL, NULL, status);
1465 if (!readToken) {
1466 warning(startline, "Encountered empty int vector");
1467 }
1468 ustr_deinit(&memberComments);
1469 return result;
1470 }
1471
1472 string = getInvariantString(state, NULL, NULL, status);
1473
1474 if (U_FAILURE(*status))
1475 {
1476 res_close(result);
1477 return NULL;
1478 }
1479
1480 /* For handling illegal char in the Intvector */
1481 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1482 len=(uint32_t)(stopstring-string);
1483
1484 if(len==uprv_strlen(string))
1485 {
1486 intvector_add(result, value, status);
1487 uprv_free(string);
1488 token = peekToken(state, 0, NULL, NULL, NULL, status);
1489 }
1490 else
1491 {
1492 uprv_free(string);
1493 *status=U_INVALID_CHAR_FOUND;
1494 }
1495
1496 if (U_FAILURE(*status))
1497 {
1498 res_close(result);
1499 return NULL;
1500 }
1501
1502 /* the comma is optional (even though it is required to prevent the reader from concatenating
1503 consecutive entries) so that a missing comma on the last entry isn't an error */
1504 if (token == TOK_COMMA)
1505 {
1506 getToken(state, NULL, NULL, NULL, status);
1507 }
1508 readToken = TRUE;
1509 }
1510
1511 /* not reached */
1512 /* A compiler warning will appear if all paths don't contain a return statement. */
1513 /* intvector_close(result, status);
1514 *status = U_INTERNAL_PROGRAM_ERROR;
1515 return NULL;*/
1516 }
1517
1518 static struct SResource *
parseBinary(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1519 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1520 {
1521 struct SResource *result = NULL;
1522 uint8_t *value;
1523 char *string;
1524 char toConv[3] = {'\0', '\0', '\0'};
1525 uint32_t count;
1526 uint32_t i;
1527 uint32_t line;
1528 char *stopstring;
1529 uint32_t len;
1530
1531 string = getInvariantString(state, &line, NULL, status);
1532
1533 if (string == NULL || U_FAILURE(*status))
1534 {
1535 return NULL;
1536 }
1537
1538 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1539
1540 if (U_FAILURE(*status))
1541 {
1542 uprv_free(string);
1543 return NULL;
1544 }
1545
1546 if(isVerbose()){
1547 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1548 }
1549
1550 count = (uint32_t)uprv_strlen(string);
1551 if (count > 0){
1552 if((count % 2)==0){
1553 value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
1554
1555 if (value == NULL)
1556 {
1557 uprv_free(string);
1558 *status = U_MEMORY_ALLOCATION_ERROR;
1559 return NULL;
1560 }
1561
1562 for (i = 0; i < count; i += 2)
1563 {
1564 toConv[0] = string[i];
1565 toConv[1] = string[i + 1];
1566
1567 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1568 len=(uint32_t)(stopstring-toConv);
1569
1570 if(len!=uprv_strlen(toConv))
1571 {
1572 uprv_free(string);
1573 *status=U_INVALID_CHAR_FOUND;
1574 return NULL;
1575 }
1576 }
1577
1578 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
1579
1580 uprv_free(value);
1581 }
1582 else
1583 {
1584 *status = U_INVALID_CHAR_FOUND;
1585 uprv_free(string);
1586 error(line, "Encountered invalid binary string");
1587 return NULL;
1588 }
1589 }
1590 else
1591 {
1592 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
1593 warning(startline, "Encountered empty binary tag");
1594 }
1595 uprv_free(string);
1596
1597 return result;
1598 }
1599
1600 static struct SResource *
parseInteger(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1601 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1602 {
1603 struct SResource *result = NULL;
1604 int32_t value;
1605 char *string;
1606 char *stopstring;
1607 uint32_t len;
1608
1609 string = getInvariantString(state, NULL, NULL, status);
1610
1611 if (string == NULL || U_FAILURE(*status))
1612 {
1613 return NULL;
1614 }
1615
1616 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1617
1618 if (U_FAILURE(*status))
1619 {
1620 uprv_free(string);
1621 return NULL;
1622 }
1623
1624 if(isVerbose()){
1625 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1626 }
1627
1628 if (uprv_strlen(string) <= 0)
1629 {
1630 warning(startline, "Encountered empty integer. Default value is 0.");
1631 }
1632
1633 /* Allow integer support for hexdecimal, octal digit and decimal*/
1634 /* and handle illegal char in the integer*/
1635 value = uprv_strtoul(string, &stopstring, 0);
1636 len=(uint32_t)(stopstring-string);
1637 if(len==uprv_strlen(string))
1638 {
1639 result = int_open(state->bundle, tag, value, comment, status);
1640 }
1641 else
1642 {
1643 *status=U_INVALID_CHAR_FOUND;
1644 }
1645 uprv_free(string);
1646
1647 return result;
1648 }
1649
1650 static struct SResource *
parseImport(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1651 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1652 {
1653 struct SResource *result;
1654 FileStream *file;
1655 int32_t len;
1656 uint8_t *data;
1657 char *filename;
1658 uint32_t line;
1659 char *fullname = NULL;
1660 filename = getInvariantString(state, &line, NULL, status);
1661
1662 if (U_FAILURE(*status))
1663 {
1664 return NULL;
1665 }
1666
1667 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1668
1669 if (U_FAILURE(*status))
1670 {
1671 uprv_free(filename);
1672 return NULL;
1673 }
1674
1675 if(isVerbose()){
1676 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1677 }
1678
1679 /* Open the input file for reading */
1680 if (state->inputdir == NULL)
1681 {
1682 #if 1
1683 /*
1684 * Always save file file name, even if there's
1685 * no input directory specified. MIGHT BREAK SOMETHING
1686 */
1687 int32_t filenameLength = uprv_strlen(filename);
1688
1689 fullname = (char *) uprv_malloc(filenameLength + 1);
1690 uprv_strcpy(fullname, filename);
1691 #endif
1692
1693 file = T_FileStream_open(filename, "rb");
1694 }
1695 else
1696 {
1697
1698 int32_t count = (int32_t)uprv_strlen(filename);
1699
1700 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1701 {
1702 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1703
1704 /* test for NULL */
1705 if(fullname == NULL)
1706 {
1707 *status = U_MEMORY_ALLOCATION_ERROR;
1708 return NULL;
1709 }
1710
1711 uprv_strcpy(fullname, state->inputdir);
1712
1713 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1714 fullname[state->inputdirLength + 1] = '\0';
1715
1716 uprv_strcat(fullname, filename);
1717 }
1718 else
1719 {
1720 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
1721
1722 /* test for NULL */
1723 if(fullname == NULL)
1724 {
1725 *status = U_MEMORY_ALLOCATION_ERROR;
1726 return NULL;
1727 }
1728
1729 uprv_strcpy(fullname, state->inputdir);
1730 uprv_strcat(fullname, filename);
1731 }
1732
1733 file = T_FileStream_open(fullname, "rb");
1734
1735 }
1736
1737 if (file == NULL)
1738 {
1739 error(line, "couldn't open input file %s", filename);
1740 *status = U_FILE_ACCESS_ERROR;
1741 return NULL;
1742 }
1743
1744 len = T_FileStream_size(file);
1745 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1746 /* test for NULL */
1747 if(data == NULL)
1748 {
1749 *status = U_MEMORY_ALLOCATION_ERROR;
1750 T_FileStream_close (file);
1751 return NULL;
1752 }
1753
1754 /* int32_t numRead = */ T_FileStream_read (file, data, len);
1755 T_FileStream_close (file);
1756
1757 result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
1758
1759 uprv_free(data);
1760 uprv_free(filename);
1761 uprv_free(fullname);
1762
1763 return result;
1764 }
1765
1766 static struct SResource *
parseInclude(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1767 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1768 {
1769 struct SResource *result;
1770 int32_t len=0;
1771 char *filename;
1772 uint32_t line;
1773 UChar *pTarget = NULL;
1774
1775 UCHARBUF *ucbuf;
1776 char *fullname = NULL;
1777 int32_t count = 0;
1778 const char* cp = NULL;
1779 const UChar* uBuffer = NULL;
1780
1781 filename = getInvariantString(state, &line, NULL, status);
1782 count = (int32_t)uprv_strlen(filename);
1783
1784 if (U_FAILURE(*status))
1785 {
1786 return NULL;
1787 }
1788
1789 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1790
1791 if (U_FAILURE(*status))
1792 {
1793 uprv_free(filename);
1794 return NULL;
1795 }
1796
1797 if(isVerbose()){
1798 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1799 }
1800
1801 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1802 /* test for NULL */
1803 if(fullname == NULL)
1804 {
1805 *status = U_MEMORY_ALLOCATION_ERROR;
1806 uprv_free(filename);
1807 return NULL;
1808 }
1809
1810 if(state->inputdir!=NULL){
1811 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1812 {
1813
1814 uprv_strcpy(fullname, state->inputdir);
1815
1816 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1817 fullname[state->inputdirLength + 1] = '\0';
1818
1819 uprv_strcat(fullname, filename);
1820 }
1821 else
1822 {
1823 uprv_strcpy(fullname, state->inputdir);
1824 uprv_strcat(fullname, filename);
1825 }
1826 }else{
1827 uprv_strcpy(fullname,filename);
1828 }
1829
1830 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1831
1832 if (U_FAILURE(*status)) {
1833 error(line, "couldn't open input file %s\n", filename);
1834 return NULL;
1835 }
1836
1837 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1838 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1839
1840 ucbuf_close(ucbuf);
1841
1842 uprv_free(pTarget);
1843
1844 uprv_free(filename);
1845 uprv_free(fullname);
1846
1847 return result;
1848 }
1849
1850
1851
1852
1853
1854 U_STRING_DECL(k_type_string, "string", 6);
1855 U_STRING_DECL(k_type_binary, "binary", 6);
1856 U_STRING_DECL(k_type_bin, "bin", 3);
1857 U_STRING_DECL(k_type_table, "table", 5);
1858 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1859 U_STRING_DECL(k_type_int, "int", 3);
1860 U_STRING_DECL(k_type_integer, "integer", 7);
1861 U_STRING_DECL(k_type_array, "array", 5);
1862 U_STRING_DECL(k_type_alias, "alias", 5);
1863 U_STRING_DECL(k_type_intvector, "intvector", 9);
1864 U_STRING_DECL(k_type_import, "import", 6);
1865 U_STRING_DECL(k_type_include, "include", 7);
1866
1867 /* Various non-standard processing plugins that create one or more special resources. */
1868 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1869 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1870 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1871 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1872
1873 typedef enum EResourceType
1874 {
1875 RESTYPE_UNKNOWN,
1876 RESTYPE_STRING,
1877 RESTYPE_BINARY,
1878 RESTYPE_TABLE,
1879 RESTYPE_TABLE_NO_FALLBACK,
1880 RESTYPE_INTEGER,
1881 RESTYPE_ARRAY,
1882 RESTYPE_ALIAS,
1883 RESTYPE_INTVECTOR,
1884 RESTYPE_IMPORT,
1885 RESTYPE_INCLUDE,
1886 RESTYPE_PROCESS_UCA_RULES,
1887 RESTYPE_PROCESS_COLLATION,
1888 RESTYPE_PROCESS_TRANSLITERATOR,
1889 RESTYPE_PROCESS_DEPENDENCY,
1890 RESTYPE_RESERVED
1891 } EResourceType;
1892
1893 static struct {
1894 const char *nameChars; /* only used for debugging */
1895 const UChar *nameUChars;
1896 ParseResourceFunction *parseFunction;
1897 } gResourceTypes[] = {
1898 {"Unknown", NULL, NULL},
1899 {"string", k_type_string, parseString},
1900 {"binary", k_type_binary, parseBinary},
1901 {"table", k_type_table, parseTable},
1902 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1903 {"integer", k_type_integer, parseInteger},
1904 {"array", k_type_array, parseArray},
1905 {"alias", k_type_alias, parseAlias},
1906 {"intvector", k_type_intvector, parseIntVector},
1907 {"import", k_type_import, parseImport},
1908 {"include", k_type_include, parseInclude},
1909 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1910 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1911 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1912 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1913 {"reserved", NULL, NULL}
1914 };
1915
initParser()1916 void initParser()
1917 {
1918 U_STRING_INIT(k_type_string, "string", 6);
1919 U_STRING_INIT(k_type_binary, "binary", 6);
1920 U_STRING_INIT(k_type_bin, "bin", 3);
1921 U_STRING_INIT(k_type_table, "table", 5);
1922 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1923 U_STRING_INIT(k_type_int, "int", 3);
1924 U_STRING_INIT(k_type_integer, "integer", 7);
1925 U_STRING_INIT(k_type_array, "array", 5);
1926 U_STRING_INIT(k_type_alias, "alias", 5);
1927 U_STRING_INIT(k_type_intvector, "intvector", 9);
1928 U_STRING_INIT(k_type_import, "import", 6);
1929 U_STRING_INIT(k_type_include, "include", 7);
1930
1931 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1932 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1933 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1934 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
1935 }
1936
isTable(enum EResourceType type)1937 static inline UBool isTable(enum EResourceType type) {
1938 return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
1939 }
1940
1941 static enum EResourceType
parseResourceType(ParseState * state,UErrorCode * status)1942 parseResourceType(ParseState* state, UErrorCode *status)
1943 {
1944 struct UString *tokenValue;
1945 struct UString comment;
1946 enum EResourceType result = RESTYPE_UNKNOWN;
1947 uint32_t line=0;
1948 ustr_init(&comment);
1949 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1950
1951 if (U_FAILURE(*status))
1952 {
1953 return RESTYPE_UNKNOWN;
1954 }
1955
1956 *status = U_ZERO_ERROR;
1957
1958 /* Search for normal types */
1959 result=RESTYPE_UNKNOWN;
1960 while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
1961 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1962 break;
1963 }
1964 }
1965 /* Now search for the aliases */
1966 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1967 result = RESTYPE_INTEGER;
1968 }
1969 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1970 result = RESTYPE_BINARY;
1971 }
1972 else if (result == RESTYPE_RESERVED) {
1973 char tokenBuffer[1024];
1974 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1975 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1976 *status = U_INVALID_FORMAT_ERROR;
1977 error(line, "unknown resource type '%s'", tokenBuffer);
1978 }
1979
1980 return result;
1981 }
1982
1983 /* parse a non-top-level resource */
1984 static struct SResource *
parseResource(ParseState * state,char * tag,const struct UString * comment,UErrorCode * status)1985 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1986 {
1987 enum ETokenType token;
1988 enum EResourceType resType = RESTYPE_UNKNOWN;
1989 ParseResourceFunction *parseFunction = NULL;
1990 struct UString *tokenValue;
1991 uint32_t startline;
1992 uint32_t line;
1993
1994
1995 token = getToken(state, &tokenValue, NULL, &startline, status);
1996
1997 if(isVerbose()){
1998 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1999 }
2000
2001 /* name . [ ':' type ] '{' resource '}' */
2002 /* This function parses from the colon onwards. If the colon is present, parse the
2003 type then try to parse a resource of that type. If there is no explicit type,
2004 work it out using the lookahead tokens. */
2005 switch (token)
2006 {
2007 case TOK_EOF:
2008 *status = U_INVALID_FORMAT_ERROR;
2009 error(startline, "Unexpected EOF encountered");
2010 return NULL;
2011
2012 case TOK_ERROR:
2013 *status = U_INVALID_FORMAT_ERROR;
2014 return NULL;
2015
2016 case TOK_COLON:
2017 resType = parseResourceType(state, status);
2018 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
2019
2020 if (U_FAILURE(*status))
2021 {
2022 return NULL;
2023 }
2024
2025 break;
2026
2027 case TOK_OPEN_BRACE:
2028 break;
2029
2030 default:
2031 *status = U_INVALID_FORMAT_ERROR;
2032 error(startline, "syntax error while reading a resource, expected '{' or ':'");
2033 return NULL;
2034 }
2035
2036
2037 if (resType == RESTYPE_UNKNOWN)
2038 {
2039 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
2040 We could have any of the following:
2041 { { => array (nested)
2042 { :/} => array
2043 { string , => string array
2044
2045 { string { => table
2046
2047 { string :/{ => table
2048 { string } => string
2049 */
2050
2051 token = peekToken(state, 0, NULL, &line, NULL,status);
2052
2053 if (U_FAILURE(*status))
2054 {
2055 return NULL;
2056 }
2057
2058 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
2059 {
2060 resType = RESTYPE_ARRAY;
2061 }
2062 else if (token == TOK_STRING)
2063 {
2064 token = peekToken(state, 1, NULL, &line, NULL, status);
2065
2066 if (U_FAILURE(*status))
2067 {
2068 return NULL;
2069 }
2070
2071 switch (token)
2072 {
2073 case TOK_COMMA: resType = RESTYPE_ARRAY; break;
2074 case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break;
2075 case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break;
2076 case TOK_COLON: resType = RESTYPE_TABLE; break;
2077 default:
2078 *status = U_INVALID_FORMAT_ERROR;
2079 error(line, "Unexpected token after string, expected ',', '{' or '}'");
2080 return NULL;
2081 }
2082 }
2083 else
2084 {
2085 *status = U_INVALID_FORMAT_ERROR;
2086 error(line, "Unexpected token after '{'");
2087 return NULL;
2088 }
2089
2090 /* printf("Type guessed as %s\n", resourceNames[resType]); */
2091 } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
2092 *status = U_INVALID_FORMAT_ERROR;
2093 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2094 return NULL;
2095 }
2096
2097
2098 /* We should now know what we need to parse next, so call the appropriate parser
2099 function and return. */
2100 parseFunction = gResourceTypes[resType].parseFunction;
2101 if (parseFunction != NULL) {
2102 return parseFunction(state, tag, startline, comment, status);
2103 }
2104 else {
2105 *status = U_INTERNAL_PROGRAM_ERROR;
2106 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
2107 }
2108
2109 return NULL;
2110 }
2111
2112 /* parse the top-level resource */
2113 struct SRBRoot *
parse(UCHARBUF * buf,const char * inputDir,const char * outputDir,const char * filename,UBool makeBinaryCollation,UBool omitCollationRules,UErrorCode * status)2114 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
2115 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
2116 {
2117 struct UString *tokenValue;
2118 struct UString comment;
2119 uint32_t line;
2120 enum EResourceType bundleType;
2121 enum ETokenType token;
2122 ParseState state;
2123 uint32_t i;
2124
2125
2126 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2127 {
2128 ustr_init(&state.lookahead[i].value);
2129 ustr_init(&state.lookahead[i].comment);
2130 }
2131
2132 initLookahead(&state, buf, status);
2133
2134 state.inputdir = inputDir;
2135 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2136 state.outputdir = outputDir;
2137 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
2138 state.filename = filename;
2139 state.makeBinaryCollation = makeBinaryCollation;
2140 state.omitCollationRules = omitCollationRules;
2141
2142 ustr_init(&comment);
2143 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
2144
2145 state.bundle = bundle_open(&comment, FALSE, status);
2146
2147 if (state.bundle == NULL || U_FAILURE(*status))
2148 {
2149 return NULL;
2150 }
2151
2152
2153 bundle_setlocale(state.bundle, tokenValue->fChars, status);
2154
2155 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2156 token = getToken(&state, NULL, NULL, &line, status);
2157 if(token==TOK_COLON) {
2158 *status=U_ZERO_ERROR;
2159 bundleType=parseResourceType(&state, status);
2160
2161 if(isTable(bundleType))
2162 {
2163 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2164 }
2165 else
2166 {
2167 *status=U_PARSE_ERROR;
2168 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2169 }
2170 }
2171 else
2172 {
2173 /* not a colon */
2174 if(token==TOK_OPEN_BRACE)
2175 {
2176 *status=U_ZERO_ERROR;
2177 bundleType=RESTYPE_TABLE;
2178 }
2179 else
2180 {
2181 /* neither colon nor open brace */
2182 *status=U_PARSE_ERROR;
2183 bundleType=RESTYPE_UNKNOWN;
2184 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2185 }
2186 }
2187
2188 if (U_FAILURE(*status))
2189 {
2190 bundle_close(state.bundle, status);
2191 return NULL;
2192 }
2193
2194 if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
2195 /*
2196 * Parse a top-level table with the table(nofallback) declaration.
2197 * This is the same as a regular table, but also sets the
2198 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2199 */
2200 state.bundle->noFallback=TRUE;
2201 }
2202 /* top-level tables need not handle special table names like "collations" */
2203 realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2204 if(dependencyArray!=NULL){
2205 table_add(state.bundle->fRoot, dependencyArray, 0, status);
2206 dependencyArray = NULL;
2207 }
2208 if (U_FAILURE(*status))
2209 {
2210 bundle_close(state.bundle, status);
2211 res_close(dependencyArray);
2212 return NULL;
2213 }
2214
2215 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2216 {
2217 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2218 if(isStrict()){
2219 *status = U_INVALID_FORMAT_ERROR;
2220 return NULL;
2221 }
2222 }
2223
2224 cleanupLookahead(&state);
2225 ustr_deinit(&comment);
2226 return state.bundle;
2227 }
2228