1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1998-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File parse.cpp
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
19 */
20
21 // Safer use of UnicodeString.
22 #ifndef UNISTR_FROM_CHAR_EXPLICIT
23 # define UNISTR_FROM_CHAR_EXPLICIT explicit
24 #endif
25
26 // Less important, but still a good idea.
27 #ifndef UNISTR_FROM_STRING_EXPLICIT
28 # define UNISTR_FROM_STRING_EXPLICIT explicit
29 #endif
30
31 #include "parse.h"
32 #include "errmsg.h"
33 #include "uhash.h"
34 #include "cmemory.h"
35 #include "cstring.h"
36 #include "uinvchar.h"
37 #include "read.h"
38 #include "ustr.h"
39 #include "reslist.h"
40 #include "rbt_pars.h"
41 #include "genrb.h"
42 #include "unicode/ustring.h"
43 #include "unicode/uscript.h"
44 #include "unicode/utf16.h"
45 #include "unicode/putil.h"
46 #include "collationbuilder.h"
47 #include "collationdata.h"
48 #include "collationdatareader.h"
49 #include "collationdatawriter.h"
50 #include "collationfastlatinbuilder.h"
51 #include "collationinfo.h"
52 #include "collationroot.h"
53 #include "collationruleparser.h"
54 #include "collationtailoring.h"
55 #include <stdio.h>
56
57 /* Number of tokens to read ahead of the current stream position */
58 #define MAX_LOOKAHEAD 3
59
60 #define CR 0x000D
61 #define LF 0x000A
62 #define SPACE 0x0020
63 #define TAB 0x0009
64 #define ESCAPE 0x005C
65 #define HASH 0x0023
66 #define QUOTE 0x0027
67 #define ZERO 0x0030
68 #define STARTCOMMAND 0x005B
69 #define ENDCOMMAND 0x005D
70 #define OPENSQBRACKET 0x005B
71 #define CLOSESQBRACKET 0x005D
72
73 using icu::LocalPointer;
74 using icu::UnicodeString;
75
76 struct Lookahead
77 {
78 enum ETokenType type;
79 struct UString value;
80 struct UString comment;
81 uint32_t line;
82 };
83
84 /* keep in sync with token defines in read.h */
85 const char *tokenNames[TOK_TOKEN_COUNT] =
86 {
87 "string", /* A string token, such as "MonthNames" */
88 "'{'", /* An opening brace character */
89 "'}'", /* A closing brace character */
90 "','", /* A comma */
91 "':'", /* A colon */
92
93 "<end of file>", /* End of the file has been reached successfully */
94 "<end of line>"
95 };
96
97 /* Just to store "TRUE" */
98 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
99
100 typedef struct {
101 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
102 uint32_t lookaheadPosition;
103 UCHARBUF *buffer;
104 struct SRBRoot *bundle;
105 const char *inputdir;
106 uint32_t inputdirLength;
107 const char *outputdir;
108 uint32_t outputdirLength;
109 const char *filename;
110 UBool makeBinaryCollation;
111 UBool omitCollationRules;
112 } ParseState;
113
114 typedef struct SResource *
115 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
116
117 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
118
119 /* The nature of the lookahead buffer:
120 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
121 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
122 When getToken is called, the current pointer is moved to the next slot and the
123 old slot is filled with the next token from the reader by calling getNextToken.
124 The token values are stored in the slot, which means that token values don't
125 survive a call to getToken, ie.
126
127 UString *value;
128
129 getToken(&value, NULL, status);
130 getToken(NULL, NULL, status); bad - value is now a different string
131 */
132 static void
initLookahead(ParseState * state,UCHARBUF * buf,UErrorCode * status)133 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
134 {
135 static uint32_t initTypeStrings = 0;
136 uint32_t i;
137
138 if (!initTypeStrings)
139 {
140 initTypeStrings = 1;
141 }
142
143 state->lookaheadPosition = 0;
144 state->buffer = buf;
145
146 resetLineNumber();
147
148 for (i = 0; i < MAX_LOOKAHEAD; i++)
149 {
150 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
151 if (U_FAILURE(*status))
152 {
153 return;
154 }
155 }
156
157 *status = U_ZERO_ERROR;
158 }
159
160 static void
cleanupLookahead(ParseState * state)161 cleanupLookahead(ParseState* state)
162 {
163 uint32_t i;
164 for (i = 0; i <= MAX_LOOKAHEAD; i++)
165 {
166 ustr_deinit(&state->lookahead[i].value);
167 ustr_deinit(&state->lookahead[i].comment);
168 }
169
170 }
171
172 static enum ETokenType
getToken(ParseState * state,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)173 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
174 {
175 enum ETokenType result;
176 uint32_t i;
177
178 result = state->lookahead[state->lookaheadPosition].type;
179
180 if (tokenValue != NULL)
181 {
182 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
183 }
184
185 if (linenumber != NULL)
186 {
187 *linenumber = state->lookahead[state->lookaheadPosition].line;
188 }
189
190 if (comment != NULL)
191 {
192 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
193 }
194
195 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
196 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
197 ustr_setlen(&state->lookahead[i].comment, 0, status);
198 ustr_setlen(&state->lookahead[i].value, 0, status);
199 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
200
201 /* printf("getToken, returning %s\n", tokenNames[result]); */
202
203 return result;
204 }
205
206 static enum ETokenType
peekToken(ParseState * state,uint32_t lookaheadCount,struct UString ** tokenValue,uint32_t * linenumber,struct UString * comment,UErrorCode * status)207 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
208 {
209 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
210
211 if (U_FAILURE(*status))
212 {
213 return TOK_ERROR;
214 }
215
216 if (lookaheadCount >= MAX_LOOKAHEAD)
217 {
218 *status = U_INTERNAL_PROGRAM_ERROR;
219 return TOK_ERROR;
220 }
221
222 if (tokenValue != NULL)
223 {
224 *tokenValue = &state->lookahead[i].value;
225 }
226
227 if (linenumber != NULL)
228 {
229 *linenumber = state->lookahead[i].line;
230 }
231
232 if(comment != NULL){
233 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
234 }
235
236 return state->lookahead[i].type;
237 }
238
239 static void
expect(ParseState * state,enum ETokenType expectedToken,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)240 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
241 {
242 uint32_t line;
243
244 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
245
246 if (linenumber != NULL)
247 {
248 *linenumber = line;
249 }
250
251 if (U_FAILURE(*status))
252 {
253 return;
254 }
255
256 if (token != expectedToken)
257 {
258 *status = U_INVALID_FORMAT_ERROR;
259 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
260 }
261 else
262 {
263 *status = U_ZERO_ERROR;
264 }
265 }
266
getInvariantString(ParseState * state,uint32_t * line,struct UString * comment,UErrorCode * status)267 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
268 {
269 struct UString *tokenValue;
270 char *result;
271 uint32_t count;
272
273 expect(state, TOK_STRING, &tokenValue, comment, line, status);
274
275 if (U_FAILURE(*status))
276 {
277 return NULL;
278 }
279
280 count = u_strlen(tokenValue->fChars);
281 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
282 *status = U_INVALID_FORMAT_ERROR;
283 error(*line, "invariant characters required for table keys, binary data, etc.");
284 return NULL;
285 }
286
287 result = static_cast<char *>(uprv_malloc(count+1));
288
289 if (result == NULL)
290 {
291 *status = U_MEMORY_ALLOCATION_ERROR;
292 return NULL;
293 }
294
295 u_UCharsToChars(tokenValue->fChars, result, count+1);
296 return result;
297 }
298
299 static struct SResource *
parseUCARules(ParseState * state,char * tag,uint32_t startline,const struct UString *,UErrorCode * status)300 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
301 {
302 struct SResource *result = NULL;
303 struct UString *tokenValue;
304 FileStream *file = NULL;
305 char filename[256] = { '\0' };
306 char cs[128] = { '\0' };
307 uint32_t line;
308 UBool quoted = FALSE;
309 UCHARBUF *ucbuf=NULL;
310 UChar32 c = 0;
311 const char* cp = NULL;
312 UChar *pTarget = NULL;
313 UChar *target = NULL;
314 UChar *targetLimit = NULL;
315 int32_t size = 0;
316
317 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
318
319 if(isVerbose()){
320 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
321 }
322
323 if (U_FAILURE(*status))
324 {
325 return NULL;
326 }
327 /* make the filename including the directory */
328 if (state->inputdir != NULL)
329 {
330 uprv_strcat(filename, state->inputdir);
331
332 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
333 {
334 uprv_strcat(filename, U_FILE_SEP_STRING);
335 }
336 }
337
338 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
339
340 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
341
342 if (U_FAILURE(*status))
343 {
344 return NULL;
345 }
346 uprv_strcat(filename, cs);
347
348 if(state->omitCollationRules) {
349 return res_none();
350 }
351
352 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
353
354 if (U_FAILURE(*status)) {
355 error(line, "An error occured while opening the input file %s\n", filename);
356 return NULL;
357 }
358
359 /* We allocate more space than actually required
360 * since the actual size needed for storing UChars
361 * is not known in UTF-8 byte stream
362 */
363 size = ucbuf_size(ucbuf) + 1;
364 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
365 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
366 target = pTarget;
367 targetLimit = pTarget+size;
368
369 /* read the rules into the buffer */
370 while (target < targetLimit)
371 {
372 c = ucbuf_getc(ucbuf, status);
373 if(c == QUOTE) {
374 quoted = (UBool)!quoted;
375 }
376 /* weiv (06/26/2002): adding the following:
377 * - preserving spaces in commands [...]
378 * - # comments until the end of line
379 */
380 if (c == STARTCOMMAND && !quoted)
381 {
382 /* preserve commands
383 * closing bracket will be handled by the
384 * append at the end of the loop
385 */
386 while(c != ENDCOMMAND) {
387 U_APPEND_CHAR32_ONLY(c, target);
388 c = ucbuf_getc(ucbuf, status);
389 }
390 }
391 else if (c == HASH && !quoted) {
392 /* skip comments */
393 while(c != CR && c != LF) {
394 c = ucbuf_getc(ucbuf, status);
395 }
396 continue;
397 }
398 else if (c == ESCAPE)
399 {
400 c = unescape(ucbuf, status);
401
402 if (c == (UChar32)U_ERR)
403 {
404 uprv_free(pTarget);
405 T_FileStream_close(file);
406 return NULL;
407 }
408 }
409 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
410 {
411 /* ignore spaces carriage returns
412 * and line feed unless in the form \uXXXX
413 */
414 continue;
415 }
416
417 /* Append UChar * after dissembling if c > 0xffff*/
418 if (c != (UChar32)U_EOF)
419 {
420 U_APPEND_CHAR32_ONLY(c, target);
421 }
422 else
423 {
424 break;
425 }
426 }
427
428 /* terminate the string */
429 if(target < targetLimit){
430 *target = 0x0000;
431 }
432
433 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
434
435
436 ucbuf_close(ucbuf);
437 uprv_free(pTarget);
438 T_FileStream_close(file);
439
440 return result;
441 }
442
443 static struct SResource *
parseTransliterator(ParseState * state,char * tag,uint32_t startline,const struct UString *,UErrorCode * status)444 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
445 {
446 struct SResource *result = NULL;
447 struct UString *tokenValue;
448 FileStream *file = NULL;
449 char filename[256] = { '\0' };
450 char cs[128] = { '\0' };
451 uint32_t line;
452 UCHARBUF *ucbuf=NULL;
453 const char* cp = NULL;
454 UChar *pTarget = NULL;
455 const UChar *pSource = NULL;
456 int32_t size = 0;
457
458 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
459
460 if(isVerbose()){
461 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
462 }
463
464 if (U_FAILURE(*status))
465 {
466 return NULL;
467 }
468 /* make the filename including the directory */
469 if (state->inputdir != NULL)
470 {
471 uprv_strcat(filename, state->inputdir);
472
473 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
474 {
475 uprv_strcat(filename, U_FILE_SEP_STRING);
476 }
477 }
478
479 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
480
481 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
482
483 if (U_FAILURE(*status))
484 {
485 return NULL;
486 }
487 uprv_strcat(filename, cs);
488
489
490 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
491
492 if (U_FAILURE(*status)) {
493 error(line, "An error occured while opening the input file %s\n", filename);
494 return NULL;
495 }
496
497 /* We allocate more space than actually required
498 * since the actual size needed for storing UChars
499 * is not known in UTF-8 byte stream
500 */
501 pSource = ucbuf_getBuffer(ucbuf, &size, status);
502 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
503 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
504
505 #if !UCONFIG_NO_TRANSLITERATION
506 size = utrans_stripRules(pSource, size, pTarget, status);
507 #else
508 size = 0;
509 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
510 #endif
511 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
512
513 ucbuf_close(ucbuf);
514 uprv_free(pTarget);
515 T_FileStream_close(file);
516
517 return result;
518 }
519 static struct SResource* dependencyArray = NULL;
520
521 static struct SResource *
parseDependency(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)522 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
523 {
524 struct SResource *result = NULL;
525 struct SResource *elem = NULL;
526 struct UString *tokenValue;
527 uint32_t line;
528 char filename[256] = { '\0' };
529 char cs[128] = { '\0' };
530
531 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
532
533 if(isVerbose()){
534 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
535 }
536
537 if (U_FAILURE(*status))
538 {
539 return NULL;
540 }
541 /* make the filename including the directory */
542 if (state->outputdir != NULL)
543 {
544 uprv_strcat(filename, state->outputdir);
545
546 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
547 {
548 uprv_strcat(filename, U_FILE_SEP_STRING);
549 }
550 }
551
552 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
553
554 if (U_FAILURE(*status))
555 {
556 return NULL;
557 }
558 uprv_strcat(filename, cs);
559 if(!T_FileStream_file_exists(filename)){
560 if(isStrict()){
561 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
562 }else{
563 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
564 }
565 }
566 if(dependencyArray==NULL){
567 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
568 }
569 if(tag!=NULL){
570 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
571 }
572 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
573
574 array_add(dependencyArray, elem, status);
575
576 if (U_FAILURE(*status))
577 {
578 return NULL;
579 }
580 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
581 return result;
582 }
583 static struct SResource *
parseString(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)584 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
585 {
586 struct UString *tokenValue;
587 struct SResource *result = NULL;
588
589 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
590 {
591 return parseUCARules(tag, startline, status);
592 }*/
593 if(isVerbose()){
594 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
595 }
596 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
597
598 if (U_SUCCESS(*status))
599 {
600 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
601 doesn't survive expect either) */
602
603 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
604 if(U_SUCCESS(*status) && result) {
605 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
606
607 if (U_FAILURE(*status))
608 {
609 res_close(result);
610 return NULL;
611 }
612 }
613 }
614
615 return result;
616 }
617
618 static struct SResource *
parseAlias(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)619 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
620 {
621 struct UString *tokenValue;
622 struct SResource *result = NULL;
623
624 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
625
626 if(isVerbose()){
627 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
628 }
629
630 if (U_SUCCESS(*status))
631 {
632 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
633 doesn't survive expect either) */
634
635 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
636
637 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
638
639 if (U_FAILURE(*status))
640 {
641 res_close(result);
642 return NULL;
643 }
644 }
645
646 return result;
647 }
648
649 #if !UCONFIG_NO_COLLATION
650
651 namespace {
652
resLookup(struct SResource * res,const char * key)653 static struct SResource* resLookup(struct SResource* res, const char* key){
654 struct SResource *current = NULL;
655 struct SResTable *list;
656 if (res == res_none()) {
657 return NULL;
658 }
659
660 list = &(res->u.fTable);
661
662 current = list->fFirst;
663 while (current != NULL) {
664 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
665 return current;
666 }
667 current = current->fNext;
668 }
669 return NULL;
670 }
671
672 class GenrbImporter : public icu::CollationRuleParser::Importer {
673 public:
GenrbImporter(const char * in,const char * out)674 GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
675 virtual ~GenrbImporter();
676 virtual const UnicodeString *getRules(
677 const char *localeID, const char *collationType,
678 const char *&errorReason, UErrorCode &errorCode);
679
680 private:
681 const char *inputDir;
682 const char *outputDir;
683 UnicodeString rules;
684 };
685
~GenrbImporter()686 GenrbImporter::~GenrbImporter() {}
687
688 const UnicodeString *
getRules(const char * localeID,const char * collationType,const char * &,UErrorCode & errorCode)689 GenrbImporter::getRules(
690 const char *localeID, const char *collationType,
691 const char *& /*errorReason*/, UErrorCode &errorCode) {
692 struct SRBRoot *data = NULL;
693 UCHARBUF *ucbuf = NULL;
694 int localeLength = strlen(localeID);
695 char* filename = (char*)uprv_malloc(localeLength+5);
696 char *inputDirBuf = NULL;
697 char *openFileName = NULL;
698 const char* cp = "";
699 int32_t i = 0;
700 int32_t dirlen = 0;
701 int32_t filelen = 0;
702 struct SResource* root;
703 struct SResource* collations;
704 struct SResource* collation;
705 struct SResource* sequence;
706
707 memcpy(filename, localeID, localeLength);
708 for(i = 0; i < localeLength; i++){
709 if(filename[i] == '-'){
710 filename[i] = '_';
711 }
712 }
713 filename[localeLength] = '.';
714 filename[localeLength+1] = 't';
715 filename[localeLength+2] = 'x';
716 filename[localeLength+3] = 't';
717 filename[localeLength+4] = 0;
718
719
720 if (U_FAILURE(errorCode)) {
721 return NULL;
722 }
723 if(filename==NULL){
724 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
725 return NULL;
726 }else{
727 filelen = (int32_t)uprv_strlen(filename);
728 }
729 if(inputDir == NULL) {
730 const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
731 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
732 openFileName[0] = '\0';
733 if (filenameBegin != NULL) {
734 /*
735 * When a filename ../../../data/root.txt is specified,
736 * we presume that the input directory is ../../../data
737 * This is very important when the resource file includes
738 * another file, like UCARules.txt or thaidict.brk.
739 */
740 int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
741 inputDirBuf = (char *)uprv_malloc(filenameSize);
742
743 /* test for NULL */
744 if(inputDirBuf == NULL) {
745 errorCode = U_MEMORY_ALLOCATION_ERROR;
746 goto finish;
747 }
748
749 uprv_strncpy(inputDirBuf, filename, filenameSize);
750 inputDirBuf[filenameSize - 1] = 0;
751 inputDir = inputDirBuf;
752 dirlen = (int32_t)uprv_strlen(inputDir);
753 }
754 }else{
755 dirlen = (int32_t)uprv_strlen(inputDir);
756
757 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
758 openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
759
760 /* test for NULL */
761 if(openFileName == NULL) {
762 errorCode = U_MEMORY_ALLOCATION_ERROR;
763 goto finish;
764 }
765
766 openFileName[0] = '\0';
767 /*
768 * append the input dir to openFileName if the first char in
769 * filename is not file seperation char and the last char input directory is not '.'.
770 * This is to support :
771 * genrb -s. /home/icu/data
772 * genrb -s. icu/data
773 * The user cannot mix notations like
774 * genrb -s. /icu/data --- the absolute path specified. -s redundant
775 * user should use
776 * genrb -s. icu/data --- start from CWD and look in icu/data dir
777 */
778 if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
779 uprv_strcpy(openFileName, inputDir);
780 openFileName[dirlen] = U_FILE_SEP_CHAR;
781 }
782 openFileName[dirlen + 1] = '\0';
783 } else {
784 openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
785
786 /* test for NULL */
787 if(openFileName == NULL) {
788 errorCode = U_MEMORY_ALLOCATION_ERROR;
789 goto finish;
790 }
791
792 uprv_strcpy(openFileName, inputDir);
793
794 }
795 }
796 uprv_strcat(openFileName, filename);
797 /* printf("%s\n", openFileName); */
798 errorCode = U_ZERO_ERROR;
799 ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, &errorCode);
800
801 if(errorCode == U_FILE_ACCESS_ERROR) {
802
803 fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
804 goto finish;
805 }
806 if (ucbuf == NULL || U_FAILURE(errorCode)) {
807 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(errorCode));
808 goto finish;
809 }
810
811 /* Parse the data into an SRBRoot */
812 data = parse(ucbuf, inputDir, outputDir, filename, FALSE, FALSE, &errorCode);
813
814 root = data->fRoot;
815 collations = resLookup(root, "collations");
816 if (collations != NULL) {
817 collation = resLookup(collations, collationType);
818 if (collation != NULL) {
819 sequence = resLookup(collation, "Sequence");
820 if (sequence != NULL) {
821 rules.setTo(FALSE, sequence->u.fString.fChars, sequence->u.fString.fLength);
822 }
823 }
824 }
825
826 finish:
827 if (inputDirBuf != NULL) {
828 uprv_free(inputDirBuf);
829 }
830
831 if (openFileName != NULL) {
832 uprv_free(openFileName);
833 }
834
835 if(ucbuf) {
836 ucbuf_close(ucbuf);
837 }
838
839 return &rules;
840 }
841
842 // Quick-and-dirty escaping function.
843 // Assumes that we are on an ASCII-based platform.
844 static void
escape(const UChar * s,char * buffer)845 escape(const UChar *s, char *buffer) {
846 int32_t length = u_strlen(s);
847 int32_t i = 0;
848 for (;;) {
849 UChar32 c;
850 U16_NEXT(s, i, length, c);
851 if (c == 0) {
852 *buffer = 0;
853 return;
854 } else if (0x20 <= c && c <= 0x7e) {
855 // printable ASCII
856 *buffer++ = (char)c; // assumes ASCII-based platform
857 } else {
858 buffer += sprintf(buffer, "\\u%04X", (int)c);
859 }
860 }
861 }
862
863 } // namespace
864
865 #endif // !UCONFIG_NO_COLLATION
866
867 static struct SResource *
addCollation(ParseState * state,struct SResource * result,const char * collationType,uint32_t startline,UErrorCode * status)868 addCollation(ParseState* state, struct SResource *result, const char *collationType,
869 uint32_t startline, UErrorCode *status)
870 {
871 // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
872 struct SResource *member = NULL;
873 struct UString *tokenValue;
874 struct UString comment;
875 enum ETokenType token;
876 char subtag[1024];
877 UnicodeString rules;
878 UBool haveRules = FALSE;
879 UVersionInfo version;
880 uint32_t line;
881
882 /* '{' . (name resource)* '}' */
883 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
884
885 for (;;)
886 {
887 ustr_init(&comment);
888 token = getToken(state, &tokenValue, &comment, &line, status);
889
890 if (token == TOK_CLOSE_BRACE)
891 {
892 break;
893 }
894
895 if (token != TOK_STRING)
896 {
897 res_close(result);
898 *status = U_INVALID_FORMAT_ERROR;
899
900 if (token == TOK_EOF)
901 {
902 error(startline, "unterminated table");
903 }
904 else
905 {
906 error(line, "Unexpected token %s", tokenNames[token]);
907 }
908
909 return NULL;
910 }
911
912 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
913
914 if (U_FAILURE(*status))
915 {
916 res_close(result);
917 return NULL;
918 }
919
920 member = parseResource(state, subtag, NULL, status);
921
922 if (U_FAILURE(*status))
923 {
924 res_close(result);
925 return NULL;
926 }
927 if (result == NULL)
928 {
929 // Ignore the parsed resources, continue parsing.
930 }
931 else if (uprv_strcmp(subtag, "Version") == 0)
932 {
933 char ver[40];
934 int32_t length = member->u.fString.fLength;
935
936 if (length >= (int32_t) sizeof(ver))
937 {
938 length = (int32_t) sizeof(ver) - 1;
939 }
940
941 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
942 u_versionFromString(version, ver);
943
944 table_add(result, member, line, status);
945 member = NULL;
946 }
947 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
948 {
949 /* discard duplicate %%CollationBin if any*/
950 }
951 else if (uprv_strcmp(subtag, "Sequence") == 0)
952 {
953 rules.setTo(member->u.fString.fChars, member->u.fString.fLength);
954 haveRules = TRUE;
955 // Defer building the collator until we have seen
956 // all sub-elements of the collation table, including the Version.
957 /* in order to achieve smaller data files, we can direct genrb */
958 /* to omit collation rules */
959 if(!state->omitCollationRules) {
960 table_add(result, member, line, status);
961 member = NULL;
962 }
963 }
964 else // Just copy non-special items.
965 {
966 table_add(result, member, line, status);
967 member = NULL;
968 }
969 res_close(member); // TODO: use LocalPointer
970 if (U_FAILURE(*status))
971 {
972 res_close(result);
973 return NULL;
974 }
975 }
976
977 if (!haveRules) { return result; }
978
979 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
980 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
981 (void)collationType;
982 #else
983 if(!state->makeBinaryCollation) {
984 if(isVerbose()) {
985 printf("Not building %s~%s collation binary\n", state->filename, collationType);
986 }
987 return result;
988 }
989 UErrorCode intStatus = U_ZERO_ERROR;
990 UParseError parseError;
991 uprv_memset(&parseError, 0, sizeof(parseError));
992 GenrbImporter importer(state->inputdir, state->outputdir);
993 const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
994 if(U_FAILURE(intStatus)) {
995 error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
996 res_close(result);
997 return NULL; // TODO: use LocalUResourceBundlePointer for result
998 }
999 icu::CollationBuilder builder(base, intStatus);
1000 if(uprv_strncmp(collationType, "search", 6) == 0) {
1001 builder.disableFastLatin(); // build fast-Latin table unless search collator
1002 }
1003 LocalPointer<icu::CollationTailoring> t(
1004 builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
1005 if(U_FAILURE(intStatus)) {
1006 const char *reason = builder.getErrorReason();
1007 if(reason == NULL) { reason = ""; }
1008 error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
1009 state->filename, collationType,
1010 (long)parseError.offset, u_errorName(intStatus), reason);
1011 if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
1012 // Print pre- and post-context.
1013 char preBuffer[100], postBuffer[100];
1014 escape(parseError.preContext, preBuffer);
1015 escape(parseError.postContext, postBuffer);
1016 error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
1017 }
1018 if(isStrict()) {
1019 *status = intStatus;
1020 res_close(result);
1021 return NULL;
1022 }
1023 }
1024 icu::LocalMemory<uint8_t> buffer;
1025 int32_t capacity = 100000;
1026 uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
1027 if(dest == NULL) {
1028 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
1029 (long)capacity);
1030 *status = U_MEMORY_ALLOCATION_ERROR;
1031 res_close(result);
1032 return NULL;
1033 }
1034 int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
1035 int32_t totalSize = icu::CollationDataWriter::writeTailoring(
1036 *t, *t->settings, indexes, dest, capacity, intStatus);
1037 if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
1038 intStatus = U_ZERO_ERROR;
1039 capacity = totalSize;
1040 dest = buffer.allocateInsteadAndCopy(capacity);
1041 if(dest == NULL) {
1042 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
1043 (long)capacity);
1044 *status = U_MEMORY_ALLOCATION_ERROR;
1045 res_close(result);
1046 return NULL;
1047 }
1048 totalSize = icu::CollationDataWriter::writeTailoring(
1049 *t, *t->settings, indexes, dest, capacity, intStatus);
1050 }
1051 if(U_FAILURE(intStatus)) {
1052 fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
1053 u_errorName(intStatus));
1054 res_close(result);
1055 return NULL;
1056 }
1057 if(isVerbose()) {
1058 printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
1059 icu::CollationInfo::printSizes(totalSize, indexes);
1060 }
1061 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
1062 table_add(result, collationBin, line, status);
1063 if (U_FAILURE(*status)) {
1064 res_close(result);
1065 return NULL;
1066 }
1067 #endif
1068 return result;
1069 }
1070
1071 static UBool
keepCollationType(const char * type)1072 keepCollationType(const char *type) {
1073 // BEGIN android-added
1074 if (uprv_strcmp(type, "big5han") == 0 && !gIncludeBig5HanColl) { return FALSE; }
1075 if (uprv_strcmp(type, "gb2312han") == 0 && !gIncludeGb2312HanColl) { return FALSE; }
1076 if (uprv_strcmp(type, "zhuyin") == 0 && !gIncludeZhuyinHanColl) { return FALSE; }
1077 // END android-added
1078 return gIncludeUnihanColl || uprv_strcmp(type, "unihan") != 0;
1079 }
1080
1081 static struct SResource *
parseCollationElements(ParseState * state,char * tag,uint32_t startline,UBool newCollation,UErrorCode * status)1082 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1083 {
1084 struct SResource *result = NULL;
1085 struct SResource *member = NULL;
1086 struct SResource *collationRes = NULL;
1087 struct UString *tokenValue;
1088 struct UString comment;
1089 enum ETokenType token;
1090 char subtag[1024], typeKeyword[1024];
1091 uint32_t line;
1092
1093 result = table_open(state->bundle, tag, NULL, status);
1094
1095 if (result == NULL || U_FAILURE(*status))
1096 {
1097 return NULL;
1098 }
1099 if(isVerbose()){
1100 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1101 }
1102 if(!newCollation) {
1103 return addCollation(state, result, "(no type)", startline, status);
1104 }
1105 else {
1106 for(;;) {
1107 ustr_init(&comment);
1108 token = getToken(state, &tokenValue, &comment, &line, status);
1109
1110 if (token == TOK_CLOSE_BRACE)
1111 {
1112 return result;
1113 }
1114
1115 if (token != TOK_STRING)
1116 {
1117 res_close(result);
1118 *status = U_INVALID_FORMAT_ERROR;
1119
1120 if (token == TOK_EOF)
1121 {
1122 error(startline, "unterminated table");
1123 }
1124 else
1125 {
1126 error(line, "Unexpected token %s", tokenNames[token]);
1127 }
1128
1129 return NULL;
1130 }
1131
1132 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1133
1134 if (U_FAILURE(*status))
1135 {
1136 res_close(result);
1137 return NULL;
1138 }
1139
1140 if (uprv_strcmp(subtag, "default") == 0)
1141 {
1142 member = parseResource(state, subtag, NULL, status);
1143
1144 if (U_FAILURE(*status))
1145 {
1146 res_close(result);
1147 return NULL;
1148 }
1149
1150 table_add(result, member, line, status);
1151 }
1152 else
1153 {
1154 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1155 /* this probably needs to be refactored or recursively use the parser */
1156 /* first we assume that our collation table won't have the explicit type */
1157 /* then, we cannot handle aliases */
1158 if(token == TOK_OPEN_BRACE) {
1159 token = getToken(state, &tokenValue, &comment, &line, status);
1160 if (keepCollationType(subtag)) {
1161 collationRes = table_open(state->bundle, subtag, NULL, status);
1162 } else {
1163 collationRes = NULL;
1164 }
1165 // need to parse the collation data regardless
1166 collationRes = addCollation(state, collationRes, subtag, startline, status);
1167 if (collationRes != NULL) {
1168 table_add(result, collationRes, startline, status);
1169 }
1170 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1171 /* we could have a table too */
1172 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1173 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1174 if(uprv_strcmp(typeKeyword, "alias") == 0) {
1175 member = parseResource(state, subtag, NULL, status);
1176 if (U_FAILURE(*status))
1177 {
1178 res_close(result);
1179 return NULL;
1180 }
1181
1182 table_add(result, member, line, status);
1183 } else {
1184 res_close(result);
1185 *status = U_INVALID_FORMAT_ERROR;
1186 return NULL;
1187 }
1188 } else {
1189 res_close(result);
1190 *status = U_INVALID_FORMAT_ERROR;
1191 return NULL;
1192 }
1193 }
1194
1195 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1196
1197 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1198
1199 if (U_FAILURE(*status))
1200 {
1201 res_close(result);
1202 return NULL;
1203 }
1204 }
1205 }
1206 }
1207
1208 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1209 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1210 static struct SResource *
realParseTable(ParseState * state,struct SResource * table,char * tag,uint32_t startline,UErrorCode * status)1211 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
1212 {
1213 struct SResource *member = NULL;
1214 struct UString *tokenValue=NULL;
1215 struct UString comment;
1216 enum ETokenType token;
1217 char subtag[1024];
1218 uint32_t line;
1219 UBool readToken = FALSE;
1220
1221 /* '{' . (name resource)* '}' */
1222
1223 if(isVerbose()){
1224 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1225 }
1226 for (;;)
1227 {
1228 ustr_init(&comment);
1229 token = getToken(state, &tokenValue, &comment, &line, status);
1230
1231 if (token == TOK_CLOSE_BRACE)
1232 {
1233 if (!readToken) {
1234 warning(startline, "Encountered empty table");
1235 }
1236 return table;
1237 }
1238
1239 if (token != TOK_STRING)
1240 {
1241 *status = U_INVALID_FORMAT_ERROR;
1242
1243 if (token == TOK_EOF)
1244 {
1245 error(startline, "unterminated table");
1246 }
1247 else
1248 {
1249 error(line, "unexpected token %s", tokenNames[token]);
1250 }
1251
1252 return NULL;
1253 }
1254
1255 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1256 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1257 } else {
1258 *status = U_INVALID_FORMAT_ERROR;
1259 error(line, "invariant characters required for table keys");
1260 return NULL;
1261 }
1262
1263 if (U_FAILURE(*status))
1264 {
1265 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1266 return NULL;
1267 }
1268
1269 member = parseResource(state, subtag, &comment, status);
1270
1271 if (member == NULL || U_FAILURE(*status))
1272 {
1273 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1274 return NULL;
1275 }
1276
1277 table_add(table, member, line, status);
1278
1279 if (U_FAILURE(*status))
1280 {
1281 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1282 return NULL;
1283 }
1284 readToken = TRUE;
1285 ustr_deinit(&comment);
1286 }
1287
1288 /* not reached */
1289 /* A compiler warning will appear if all paths don't contain a return statement. */
1290 /* *status = U_INTERNAL_PROGRAM_ERROR;
1291 return NULL;*/
1292 }
1293
1294 static struct SResource *
parseTable(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1295 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1296 {
1297 struct SResource *result;
1298
1299 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1300 {
1301 return parseCollationElements(state, tag, startline, FALSE, status);
1302 }
1303 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1304 {
1305 return parseCollationElements(state, tag, startline, TRUE, status);
1306 }
1307 if(isVerbose()){
1308 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1309 }
1310
1311 result = table_open(state->bundle, tag, comment, status);
1312
1313 if (result == NULL || U_FAILURE(*status))
1314 {
1315 return NULL;
1316 }
1317 return realParseTable(state, result, tag, startline, status);
1318 }
1319
1320 static struct SResource *
parseArray(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1321 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1322 {
1323 struct SResource *result = NULL;
1324 struct SResource *member = NULL;
1325 struct UString *tokenValue;
1326 struct UString memberComments;
1327 enum ETokenType token;
1328 UBool readToken = FALSE;
1329
1330 result = array_open(state->bundle, tag, comment, status);
1331
1332 if (result == NULL || U_FAILURE(*status))
1333 {
1334 return NULL;
1335 }
1336 if(isVerbose()){
1337 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1338 }
1339
1340 ustr_init(&memberComments);
1341
1342 /* '{' . resource [','] '}' */
1343 for (;;)
1344 {
1345 /* reset length */
1346 ustr_setlen(&memberComments, 0, status);
1347
1348 /* check for end of array, but don't consume next token unless it really is the end */
1349 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1350
1351
1352 if (token == TOK_CLOSE_BRACE)
1353 {
1354 getToken(state, NULL, NULL, NULL, status);
1355 if (!readToken) {
1356 warning(startline, "Encountered empty array");
1357 }
1358 break;
1359 }
1360
1361 if (token == TOK_EOF)
1362 {
1363 res_close(result);
1364 *status = U_INVALID_FORMAT_ERROR;
1365 error(startline, "unterminated array");
1366 return NULL;
1367 }
1368
1369 /* string arrays are a special case */
1370 if (token == TOK_STRING)
1371 {
1372 getToken(state, &tokenValue, &memberComments, NULL, status);
1373 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1374 }
1375 else
1376 {
1377 member = parseResource(state, NULL, &memberComments, status);
1378 }
1379
1380 if (member == NULL || U_FAILURE(*status))
1381 {
1382 res_close(result);
1383 return NULL;
1384 }
1385
1386 array_add(result, member, status);
1387
1388 if (U_FAILURE(*status))
1389 {
1390 res_close(result);
1391 return NULL;
1392 }
1393
1394 /* eat optional comma if present */
1395 token = peekToken(state, 0, NULL, NULL, NULL, status);
1396
1397 if (token == TOK_COMMA)
1398 {
1399 getToken(state, NULL, NULL, NULL, status);
1400 }
1401
1402 if (U_FAILURE(*status))
1403 {
1404 res_close(result);
1405 return NULL;
1406 }
1407 readToken = TRUE;
1408 }
1409
1410 ustr_deinit(&memberComments);
1411 return result;
1412 }
1413
1414 static struct SResource *
parseIntVector(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1415 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1416 {
1417 struct SResource *result = NULL;
1418 enum ETokenType token;
1419 char *string;
1420 int32_t value;
1421 UBool readToken = FALSE;
1422 char *stopstring;
1423 uint32_t len;
1424 struct UString memberComments;
1425
1426 result = intvector_open(state->bundle, tag, comment, status);
1427
1428 if (result == NULL || U_FAILURE(*status))
1429 {
1430 return NULL;
1431 }
1432
1433 if(isVerbose()){
1434 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1435 }
1436 ustr_init(&memberComments);
1437 /* '{' . string [','] '}' */
1438 for (;;)
1439 {
1440 ustr_setlen(&memberComments, 0, status);
1441
1442 /* check for end of array, but don't consume next token unless it really is the end */
1443 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1444
1445 if (token == TOK_CLOSE_BRACE)
1446 {
1447 /* it's the end, consume the close brace */
1448 getToken(state, NULL, NULL, NULL, status);
1449 if (!readToken) {
1450 warning(startline, "Encountered empty int vector");
1451 }
1452 ustr_deinit(&memberComments);
1453 return result;
1454 }
1455
1456 string = getInvariantString(state, NULL, NULL, status);
1457
1458 if (U_FAILURE(*status))
1459 {
1460 res_close(result);
1461 return NULL;
1462 }
1463
1464 /* For handling illegal char in the Intvector */
1465 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1466 len=(uint32_t)(stopstring-string);
1467
1468 if(len==uprv_strlen(string))
1469 {
1470 intvector_add(result, value, status);
1471 uprv_free(string);
1472 token = peekToken(state, 0, NULL, NULL, NULL, status);
1473 }
1474 else
1475 {
1476 uprv_free(string);
1477 *status=U_INVALID_CHAR_FOUND;
1478 }
1479
1480 if (U_FAILURE(*status))
1481 {
1482 res_close(result);
1483 return NULL;
1484 }
1485
1486 /* the comma is optional (even though it is required to prevent the reader from concatenating
1487 consecutive entries) so that a missing comma on the last entry isn't an error */
1488 if (token == TOK_COMMA)
1489 {
1490 getToken(state, NULL, NULL, NULL, status);
1491 }
1492 readToken = TRUE;
1493 }
1494
1495 /* not reached */
1496 /* A compiler warning will appear if all paths don't contain a return statement. */
1497 /* intvector_close(result, status);
1498 *status = U_INTERNAL_PROGRAM_ERROR;
1499 return NULL;*/
1500 }
1501
1502 static struct SResource *
parseBinary(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1503 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1504 {
1505 struct SResource *result = NULL;
1506 uint8_t *value;
1507 char *string;
1508 char toConv[3] = {'\0', '\0', '\0'};
1509 uint32_t count;
1510 uint32_t i;
1511 uint32_t line;
1512 char *stopstring;
1513 uint32_t len;
1514
1515 string = getInvariantString(state, &line, NULL, status);
1516
1517 if (string == NULL || U_FAILURE(*status))
1518 {
1519 return NULL;
1520 }
1521
1522 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1523
1524 if (U_FAILURE(*status))
1525 {
1526 uprv_free(string);
1527 return NULL;
1528 }
1529
1530 if(isVerbose()){
1531 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1532 }
1533
1534 count = (uint32_t)uprv_strlen(string);
1535 if (count > 0){
1536 if((count % 2)==0){
1537 value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
1538
1539 if (value == NULL)
1540 {
1541 uprv_free(string);
1542 *status = U_MEMORY_ALLOCATION_ERROR;
1543 return NULL;
1544 }
1545
1546 for (i = 0; i < count; i += 2)
1547 {
1548 toConv[0] = string[i];
1549 toConv[1] = string[i + 1];
1550
1551 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1552 len=(uint32_t)(stopstring-toConv);
1553
1554 if(len!=uprv_strlen(toConv))
1555 {
1556 uprv_free(string);
1557 *status=U_INVALID_CHAR_FOUND;
1558 return NULL;
1559 }
1560 }
1561
1562 result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
1563
1564 uprv_free(value);
1565 }
1566 else
1567 {
1568 *status = U_INVALID_CHAR_FOUND;
1569 uprv_free(string);
1570 error(line, "Encountered invalid binary string");
1571 return NULL;
1572 }
1573 }
1574 else
1575 {
1576 result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
1577 warning(startline, "Encountered empty binary tag");
1578 }
1579 uprv_free(string);
1580
1581 return result;
1582 }
1583
1584 static struct SResource *
parseInteger(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1585 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1586 {
1587 struct SResource *result = NULL;
1588 int32_t value;
1589 char *string;
1590 char *stopstring;
1591 uint32_t len;
1592
1593 string = getInvariantString(state, NULL, NULL, status);
1594
1595 if (string == NULL || U_FAILURE(*status))
1596 {
1597 return NULL;
1598 }
1599
1600 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1601
1602 if (U_FAILURE(*status))
1603 {
1604 uprv_free(string);
1605 return NULL;
1606 }
1607
1608 if(isVerbose()){
1609 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1610 }
1611
1612 if (uprv_strlen(string) <= 0)
1613 {
1614 warning(startline, "Encountered empty integer. Default value is 0.");
1615 }
1616
1617 /* Allow integer support for hexdecimal, octal digit and decimal*/
1618 /* and handle illegal char in the integer*/
1619 value = uprv_strtoul(string, &stopstring, 0);
1620 len=(uint32_t)(stopstring-string);
1621 if(len==uprv_strlen(string))
1622 {
1623 result = int_open(state->bundle, tag, value, comment, status);
1624 }
1625 else
1626 {
1627 *status=U_INVALID_CHAR_FOUND;
1628 }
1629 uprv_free(string);
1630
1631 return result;
1632 }
1633
1634 static struct SResource *
parseImport(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1635 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1636 {
1637 struct SResource *result;
1638 FileStream *file;
1639 int32_t len;
1640 uint8_t *data;
1641 char *filename;
1642 uint32_t line;
1643 char *fullname = NULL;
1644 filename = getInvariantString(state, &line, NULL, status);
1645
1646 if (U_FAILURE(*status))
1647 {
1648 return NULL;
1649 }
1650
1651 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1652
1653 if (U_FAILURE(*status))
1654 {
1655 uprv_free(filename);
1656 return NULL;
1657 }
1658
1659 if(isVerbose()){
1660 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1661 }
1662
1663 /* Open the input file for reading */
1664 if (state->inputdir == NULL)
1665 {
1666 #if 1
1667 /*
1668 * Always save file file name, even if there's
1669 * no input directory specified. MIGHT BREAK SOMETHING
1670 */
1671 int32_t filenameLength = uprv_strlen(filename);
1672
1673 fullname = (char *) uprv_malloc(filenameLength + 1);
1674 uprv_strcpy(fullname, filename);
1675 #endif
1676
1677 file = T_FileStream_open(filename, "rb");
1678 }
1679 else
1680 {
1681
1682 int32_t count = (int32_t)uprv_strlen(filename);
1683
1684 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1685 {
1686 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1687
1688 /* test for NULL */
1689 if(fullname == NULL)
1690 {
1691 *status = U_MEMORY_ALLOCATION_ERROR;
1692 return NULL;
1693 }
1694
1695 uprv_strcpy(fullname, state->inputdir);
1696
1697 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1698 fullname[state->inputdirLength + 1] = '\0';
1699
1700 uprv_strcat(fullname, filename);
1701 }
1702 else
1703 {
1704 fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
1705
1706 /* test for NULL */
1707 if(fullname == NULL)
1708 {
1709 *status = U_MEMORY_ALLOCATION_ERROR;
1710 return NULL;
1711 }
1712
1713 uprv_strcpy(fullname, state->inputdir);
1714 uprv_strcat(fullname, filename);
1715 }
1716
1717 file = T_FileStream_open(fullname, "rb");
1718
1719 }
1720
1721 if (file == NULL)
1722 {
1723 error(line, "couldn't open input file %s", filename);
1724 *status = U_FILE_ACCESS_ERROR;
1725 return NULL;
1726 }
1727
1728 len = T_FileStream_size(file);
1729 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1730 /* test for NULL */
1731 if(data == NULL)
1732 {
1733 *status = U_MEMORY_ALLOCATION_ERROR;
1734 T_FileStream_close (file);
1735 return NULL;
1736 }
1737
1738 /* int32_t numRead = */ T_FileStream_read (file, data, len);
1739 T_FileStream_close (file);
1740
1741 result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
1742
1743 uprv_free(data);
1744 uprv_free(filename);
1745 uprv_free(fullname);
1746
1747 return result;
1748 }
1749
1750 static struct SResource *
parseInclude(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1751 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1752 {
1753 struct SResource *result;
1754 int32_t len=0;
1755 char *filename;
1756 uint32_t line;
1757 UChar *pTarget = NULL;
1758
1759 UCHARBUF *ucbuf;
1760 char *fullname = NULL;
1761 int32_t count = 0;
1762 const char* cp = NULL;
1763 const UChar* uBuffer = NULL;
1764
1765 filename = getInvariantString(state, &line, NULL, status);
1766 count = (int32_t)uprv_strlen(filename);
1767
1768 if (U_FAILURE(*status))
1769 {
1770 return NULL;
1771 }
1772
1773 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1774
1775 if (U_FAILURE(*status))
1776 {
1777 uprv_free(filename);
1778 return NULL;
1779 }
1780
1781 if(isVerbose()){
1782 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1783 }
1784
1785 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1786 /* test for NULL */
1787 if(fullname == NULL)
1788 {
1789 *status = U_MEMORY_ALLOCATION_ERROR;
1790 uprv_free(filename);
1791 return NULL;
1792 }
1793
1794 if(state->inputdir!=NULL){
1795 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1796 {
1797
1798 uprv_strcpy(fullname, state->inputdir);
1799
1800 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1801 fullname[state->inputdirLength + 1] = '\0';
1802
1803 uprv_strcat(fullname, filename);
1804 }
1805 else
1806 {
1807 uprv_strcpy(fullname, state->inputdir);
1808 uprv_strcat(fullname, filename);
1809 }
1810 }else{
1811 uprv_strcpy(fullname,filename);
1812 }
1813
1814 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1815
1816 if (U_FAILURE(*status)) {
1817 error(line, "couldn't open input file %s\n", filename);
1818 return NULL;
1819 }
1820
1821 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1822 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1823
1824 ucbuf_close(ucbuf);
1825
1826 uprv_free(pTarget);
1827
1828 uprv_free(filename);
1829 uprv_free(fullname);
1830
1831 return result;
1832 }
1833
1834
1835
1836
1837
1838 U_STRING_DECL(k_type_string, "string", 6);
1839 U_STRING_DECL(k_type_binary, "binary", 6);
1840 U_STRING_DECL(k_type_bin, "bin", 3);
1841 U_STRING_DECL(k_type_table, "table", 5);
1842 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1843 U_STRING_DECL(k_type_int, "int", 3);
1844 U_STRING_DECL(k_type_integer, "integer", 7);
1845 U_STRING_DECL(k_type_array, "array", 5);
1846 U_STRING_DECL(k_type_alias, "alias", 5);
1847 U_STRING_DECL(k_type_intvector, "intvector", 9);
1848 U_STRING_DECL(k_type_import, "import", 6);
1849 U_STRING_DECL(k_type_include, "include", 7);
1850
1851 /* Various non-standard processing plugins that create one or more special resources. */
1852 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1853 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1854 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1855 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1856
1857 typedef enum EResourceType
1858 {
1859 RESTYPE_UNKNOWN,
1860 RESTYPE_STRING,
1861 RESTYPE_BINARY,
1862 RESTYPE_TABLE,
1863 RESTYPE_TABLE_NO_FALLBACK,
1864 RESTYPE_INTEGER,
1865 RESTYPE_ARRAY,
1866 RESTYPE_ALIAS,
1867 RESTYPE_INTVECTOR,
1868 RESTYPE_IMPORT,
1869 RESTYPE_INCLUDE,
1870 RESTYPE_PROCESS_UCA_RULES,
1871 RESTYPE_PROCESS_COLLATION,
1872 RESTYPE_PROCESS_TRANSLITERATOR,
1873 RESTYPE_PROCESS_DEPENDENCY,
1874 RESTYPE_RESERVED
1875 } EResourceType;
1876
1877 static struct {
1878 const char *nameChars; /* only used for debugging */
1879 const UChar *nameUChars;
1880 ParseResourceFunction *parseFunction;
1881 } gResourceTypes[] = {
1882 {"Unknown", NULL, NULL},
1883 {"string", k_type_string, parseString},
1884 {"binary", k_type_binary, parseBinary},
1885 {"table", k_type_table, parseTable},
1886 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1887 {"integer", k_type_integer, parseInteger},
1888 {"array", k_type_array, parseArray},
1889 {"alias", k_type_alias, parseAlias},
1890 {"intvector", k_type_intvector, parseIntVector},
1891 {"import", k_type_import, parseImport},
1892 {"include", k_type_include, parseInclude},
1893 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1894 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1895 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1896 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1897 {"reserved", NULL, NULL}
1898 };
1899
initParser()1900 void initParser()
1901 {
1902 U_STRING_INIT(k_type_string, "string", 6);
1903 U_STRING_INIT(k_type_binary, "binary", 6);
1904 U_STRING_INIT(k_type_bin, "bin", 3);
1905 U_STRING_INIT(k_type_table, "table", 5);
1906 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1907 U_STRING_INIT(k_type_int, "int", 3);
1908 U_STRING_INIT(k_type_integer, "integer", 7);
1909 U_STRING_INIT(k_type_array, "array", 5);
1910 U_STRING_INIT(k_type_alias, "alias", 5);
1911 U_STRING_INIT(k_type_intvector, "intvector", 9);
1912 U_STRING_INIT(k_type_import, "import", 6);
1913 U_STRING_INIT(k_type_include, "include", 7);
1914
1915 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1916 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1917 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1918 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
1919 }
1920
isTable(enum EResourceType type)1921 static inline UBool isTable(enum EResourceType type) {
1922 return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
1923 }
1924
1925 static enum EResourceType
parseResourceType(ParseState * state,UErrorCode * status)1926 parseResourceType(ParseState* state, UErrorCode *status)
1927 {
1928 struct UString *tokenValue;
1929 struct UString comment;
1930 enum EResourceType result = RESTYPE_UNKNOWN;
1931 uint32_t line=0;
1932 ustr_init(&comment);
1933 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1934
1935 if (U_FAILURE(*status))
1936 {
1937 return RESTYPE_UNKNOWN;
1938 }
1939
1940 *status = U_ZERO_ERROR;
1941
1942 /* Search for normal types */
1943 result=RESTYPE_UNKNOWN;
1944 while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
1945 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1946 break;
1947 }
1948 }
1949 /* Now search for the aliases */
1950 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1951 result = RESTYPE_INTEGER;
1952 }
1953 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1954 result = RESTYPE_BINARY;
1955 }
1956 else if (result == RESTYPE_RESERVED) {
1957 char tokenBuffer[1024];
1958 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1959 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1960 *status = U_INVALID_FORMAT_ERROR;
1961 error(line, "unknown resource type '%s'", tokenBuffer);
1962 }
1963
1964 return result;
1965 }
1966
1967 /* parse a non-top-level resource */
1968 static struct SResource *
parseResource(ParseState * state,char * tag,const struct UString * comment,UErrorCode * status)1969 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1970 {
1971 enum ETokenType token;
1972 enum EResourceType resType = RESTYPE_UNKNOWN;
1973 ParseResourceFunction *parseFunction = NULL;
1974 struct UString *tokenValue;
1975 uint32_t startline;
1976 uint32_t line;
1977
1978
1979 token = getToken(state, &tokenValue, NULL, &startline, status);
1980
1981 if(isVerbose()){
1982 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1983 }
1984
1985 /* name . [ ':' type ] '{' resource '}' */
1986 /* This function parses from the colon onwards. If the colon is present, parse the
1987 type then try to parse a resource of that type. If there is no explicit type,
1988 work it out using the lookahead tokens. */
1989 switch (token)
1990 {
1991 case TOK_EOF:
1992 *status = U_INVALID_FORMAT_ERROR;
1993 error(startline, "Unexpected EOF encountered");
1994 return NULL;
1995
1996 case TOK_ERROR:
1997 *status = U_INVALID_FORMAT_ERROR;
1998 return NULL;
1999
2000 case TOK_COLON:
2001 resType = parseResourceType(state, status);
2002 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
2003
2004 if (U_FAILURE(*status))
2005 {
2006 return NULL;
2007 }
2008
2009 break;
2010
2011 case TOK_OPEN_BRACE:
2012 break;
2013
2014 default:
2015 *status = U_INVALID_FORMAT_ERROR;
2016 error(startline, "syntax error while reading a resource, expected '{' or ':'");
2017 return NULL;
2018 }
2019
2020
2021 if (resType == RESTYPE_UNKNOWN)
2022 {
2023 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
2024 We could have any of the following:
2025 { { => array (nested)
2026 { :/} => array
2027 { string , => string array
2028
2029 { string { => table
2030
2031 { string :/{ => table
2032 { string } => string
2033 */
2034
2035 token = peekToken(state, 0, NULL, &line, NULL,status);
2036
2037 if (U_FAILURE(*status))
2038 {
2039 return NULL;
2040 }
2041
2042 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
2043 {
2044 resType = RESTYPE_ARRAY;
2045 }
2046 else if (token == TOK_STRING)
2047 {
2048 token = peekToken(state, 1, NULL, &line, NULL, status);
2049
2050 if (U_FAILURE(*status))
2051 {
2052 return NULL;
2053 }
2054
2055 switch (token)
2056 {
2057 case TOK_COMMA: resType = RESTYPE_ARRAY; break;
2058 case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break;
2059 case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break;
2060 case TOK_COLON: resType = RESTYPE_TABLE; break;
2061 default:
2062 *status = U_INVALID_FORMAT_ERROR;
2063 error(line, "Unexpected token after string, expected ',', '{' or '}'");
2064 return NULL;
2065 }
2066 }
2067 else
2068 {
2069 *status = U_INVALID_FORMAT_ERROR;
2070 error(line, "Unexpected token after '{'");
2071 return NULL;
2072 }
2073
2074 /* printf("Type guessed as %s\n", resourceNames[resType]); */
2075 } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
2076 *status = U_INVALID_FORMAT_ERROR;
2077 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2078 return NULL;
2079 }
2080
2081
2082 /* We should now know what we need to parse next, so call the appropriate parser
2083 function and return. */
2084 parseFunction = gResourceTypes[resType].parseFunction;
2085 if (parseFunction != NULL) {
2086 return parseFunction(state, tag, startline, comment, status);
2087 }
2088 else {
2089 *status = U_INTERNAL_PROGRAM_ERROR;
2090 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
2091 }
2092
2093 return NULL;
2094 }
2095
2096 /* parse the top-level resource */
2097 struct SRBRoot *
parse(UCHARBUF * buf,const char * inputDir,const char * outputDir,const char * filename,UBool makeBinaryCollation,UBool omitCollationRules,UErrorCode * status)2098 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
2099 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
2100 {
2101 struct UString *tokenValue;
2102 struct UString comment;
2103 uint32_t line;
2104 enum EResourceType bundleType;
2105 enum ETokenType token;
2106 ParseState state;
2107 uint32_t i;
2108
2109
2110 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2111 {
2112 ustr_init(&state.lookahead[i].value);
2113 ustr_init(&state.lookahead[i].comment);
2114 }
2115
2116 initLookahead(&state, buf, status);
2117
2118 state.inputdir = inputDir;
2119 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2120 state.outputdir = outputDir;
2121 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
2122 state.filename = filename;
2123 state.makeBinaryCollation = makeBinaryCollation;
2124 state.omitCollationRules = omitCollationRules;
2125
2126 ustr_init(&comment);
2127 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
2128
2129 state.bundle = bundle_open(&comment, FALSE, status);
2130
2131 if (state.bundle == NULL || U_FAILURE(*status))
2132 {
2133 return NULL;
2134 }
2135
2136
2137 bundle_setlocale(state.bundle, tokenValue->fChars, status);
2138
2139 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2140 token = getToken(&state, NULL, NULL, &line, status);
2141 if(token==TOK_COLON) {
2142 *status=U_ZERO_ERROR;
2143 bundleType=parseResourceType(&state, status);
2144
2145 if(isTable(bundleType))
2146 {
2147 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2148 }
2149 else
2150 {
2151 *status=U_PARSE_ERROR;
2152 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2153 }
2154 }
2155 else
2156 {
2157 /* not a colon */
2158 if(token==TOK_OPEN_BRACE)
2159 {
2160 *status=U_ZERO_ERROR;
2161 bundleType=RESTYPE_TABLE;
2162 }
2163 else
2164 {
2165 /* neither colon nor open brace */
2166 *status=U_PARSE_ERROR;
2167 bundleType=RESTYPE_UNKNOWN;
2168 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2169 }
2170 }
2171
2172 if (U_FAILURE(*status))
2173 {
2174 bundle_close(state.bundle, status);
2175 return NULL;
2176 }
2177
2178 if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
2179 /*
2180 * Parse a top-level table with the table(nofallback) declaration.
2181 * This is the same as a regular table, but also sets the
2182 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2183 */
2184 state.bundle->noFallback=TRUE;
2185 }
2186 /* top-level tables need not handle special table names like "collations" */
2187 realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2188 if(dependencyArray!=NULL){
2189 table_add(state.bundle->fRoot, dependencyArray, 0, status);
2190 dependencyArray = NULL;
2191 }
2192 if (U_FAILURE(*status))
2193 {
2194 bundle_close(state.bundle, status);
2195 res_close(dependencyArray);
2196 return NULL;
2197 }
2198
2199 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2200 {
2201 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2202 if(isStrict()){
2203 *status = U_INVALID_FORMAT_ERROR;
2204 return NULL;
2205 }
2206 }
2207
2208 cleanupLookahead(&state);
2209 ustr_deinit(&comment);
2210 return state.bundle;
2211 }
2212