• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1998-2012, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File parse.cpp
10 *
11 * Modification History:
12 *
13 *   Date          Name          Description
14 *   05/26/99     stephen       Creation.
15 *   02/25/00     weiv          Overhaul to write udata
16 *   5/10/01      Ram           removed ustdio dependency
17 *   06/10/2001  Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
19 */
20 
21 #include "ucol_imp.h"
22 #include "parse.h"
23 #include "errmsg.h"
24 #include "uhash.h"
25 #include "cmemory.h"
26 #include "cstring.h"
27 #include "uinvchar.h"
28 #include "read.h"
29 #include "ustr.h"
30 #include "reslist.h"
31 #include "rbt_pars.h"
32 #include "genrb.h"
33 #include "unicode/ustring.h"
34 #include "unicode/uscript.h"
35 #include "unicode/putil.h"
36 #include <stdio.h>
37 
38 /* Number of tokens to read ahead of the current stream position */
39 #define MAX_LOOKAHEAD   3
40 
41 #define CR               0x000D
42 #define LF               0x000A
43 #define SPACE            0x0020
44 #define TAB              0x0009
45 #define ESCAPE           0x005C
46 #define HASH             0x0023
47 #define QUOTE            0x0027
48 #define ZERO             0x0030
49 #define STARTCOMMAND     0x005B
50 #define ENDCOMMAND       0x005D
51 #define OPENSQBRACKET    0x005B
52 #define CLOSESQBRACKET   0x005D
53 
54 struct Lookahead
55 {
56      enum   ETokenType type;
57      struct UString    value;
58      struct UString    comment;
59      uint32_t          line;
60 };
61 
62 /* keep in sync with token defines in read.h */
63 const char *tokenNames[TOK_TOKEN_COUNT] =
64 {
65      "string",             /* A string token, such as "MonthNames" */
66      "'{'",                 /* An opening brace character */
67      "'}'",                 /* A closing brace character */
68      "','",                 /* A comma */
69      "':'",                 /* A colon */
70 
71      "<end of file>",     /* End of the file has been reached successfully */
72      "<end of line>"
73 };
74 
75 /* Just to store "TRUE" */
76 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
77 
78 typedef struct {
79     struct Lookahead  lookahead[MAX_LOOKAHEAD + 1];
80     uint32_t          lookaheadPosition;
81     UCHARBUF         *buffer;
82     struct SRBRoot *bundle;
83     const char     *inputdir;
84     uint32_t        inputdirLength;
85     const char     *outputdir;
86     uint32_t        outputdirLength;
87     UBool           makeBinaryCollation;
88 } ParseState;
89 
90 static UBool gOmitCollationRules  = FALSE;
91 
92 typedef struct SResource *
93 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
94 
95 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
96 
97 /* The nature of the lookahead buffer:
98    There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer.  This provides
99    MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
100    When getToken is called, the current pointer is moved to the next slot and the
101    old slot is filled with the next token from the reader by calling getNextToken.
102    The token values are stored in the slot, which means that token values don't
103    survive a call to getToken, ie.
104 
105    UString *value;
106 
107    getToken(&value, NULL, status);
108    getToken(NULL,   NULL, status);       bad - value is now a different string
109 */
110 static void
initLookahead(ParseState * state,UCHARBUF * buf,UErrorCode * status)111 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
112 {
113     static uint32_t initTypeStrings = 0;
114     uint32_t i;
115 
116     if (!initTypeStrings)
117     {
118         initTypeStrings = 1;
119     }
120 
121     state->lookaheadPosition   = 0;
122     state->buffer              = buf;
123 
124     resetLineNumber();
125 
126     for (i = 0; i < MAX_LOOKAHEAD; i++)
127     {
128         state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
129         if (U_FAILURE(*status))
130         {
131             return;
132         }
133     }
134 
135     *status = U_ZERO_ERROR;
136 }
137 
138 static void
cleanupLookahead(ParseState * state)139 cleanupLookahead(ParseState* state)
140 {
141     uint32_t i;
142     for (i = 0; i <= MAX_LOOKAHEAD; i++)
143     {
144         ustr_deinit(&state->lookahead[i].value);
145         ustr_deinit(&state->lookahead[i].comment);
146     }
147 
148 }
149 
150 static enum ETokenType
getToken(ParseState * state,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)151 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
152 {
153     enum ETokenType result;
154     uint32_t          i;
155 
156     result = state->lookahead[state->lookaheadPosition].type;
157 
158     if (tokenValue != NULL)
159     {
160         *tokenValue = &state->lookahead[state->lookaheadPosition].value;
161     }
162 
163     if (linenumber != NULL)
164     {
165         *linenumber = state->lookahead[state->lookaheadPosition].line;
166     }
167 
168     if (comment != NULL)
169     {
170         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
171     }
172 
173     i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
174     state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
175     ustr_setlen(&state->lookahead[i].comment, 0, status);
176     ustr_setlen(&state->lookahead[i].value, 0, status);
177     state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
178 
179     /* printf("getToken, returning %s\n", tokenNames[result]); */
180 
181     return result;
182 }
183 
184 static enum ETokenType
peekToken(ParseState * state,uint32_t lookaheadCount,struct UString ** tokenValue,uint32_t * linenumber,struct UString * comment,UErrorCode * status)185 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
186 {
187     uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
188 
189     if (U_FAILURE(*status))
190     {
191         return TOK_ERROR;
192     }
193 
194     if (lookaheadCount >= MAX_LOOKAHEAD)
195     {
196         *status = U_INTERNAL_PROGRAM_ERROR;
197         return TOK_ERROR;
198     }
199 
200     if (tokenValue != NULL)
201     {
202         *tokenValue = &state->lookahead[i].value;
203     }
204 
205     if (linenumber != NULL)
206     {
207         *linenumber = state->lookahead[i].line;
208     }
209 
210     if(comment != NULL){
211         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
212     }
213 
214     return state->lookahead[i].type;
215 }
216 
217 static void
expect(ParseState * state,enum ETokenType expectedToken,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)218 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
219 {
220     uint32_t        line;
221 
222     enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
223 
224     if (linenumber != NULL)
225     {
226         *linenumber = line;
227     }
228 
229     if (U_FAILURE(*status))
230     {
231         return;
232     }
233 
234     if (token != expectedToken)
235     {
236         *status = U_INVALID_FORMAT_ERROR;
237         error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
238     }
239     else
240     {
241         *status = U_ZERO_ERROR;
242     }
243 }
244 
getInvariantString(ParseState * state,uint32_t * line,struct UString * comment,UErrorCode * status)245 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
246 {
247     struct UString *tokenValue;
248     char           *result;
249     uint32_t        count;
250 
251     expect(state, TOK_STRING, &tokenValue, comment, line, status);
252 
253     if (U_FAILURE(*status))
254     {
255         return NULL;
256     }
257 
258     count = u_strlen(tokenValue->fChars);
259     if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
260         *status = U_INVALID_FORMAT_ERROR;
261         error(*line, "invariant characters required for table keys, binary data, etc.");
262         return NULL;
263     }
264 
265     result = static_cast<char *>(uprv_malloc(count+1));
266 
267     if (result == NULL)
268     {
269         *status = U_MEMORY_ALLOCATION_ERROR;
270         return NULL;
271     }
272 
273     u_UCharsToChars(tokenValue->fChars, result, count+1);
274     return result;
275 }
276 
277 static struct SResource *
parseUCARules(ParseState * state,char * tag,uint32_t startline,const struct UString *,UErrorCode * status)278 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
279 {
280     struct SResource *result = NULL;
281     struct UString   *tokenValue;
282     FileStream       *file          = NULL;
283     char              filename[256] = { '\0' };
284     char              cs[128]       = { '\0' };
285     uint32_t          line;
286     UBool quoted = FALSE;
287     UCHARBUF *ucbuf=NULL;
288     UChar32   c     = 0;
289     const char* cp  = NULL;
290     UChar *pTarget     = NULL;
291     UChar *target      = NULL;
292     UChar *targetLimit = NULL;
293     int32_t size = 0;
294 
295     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
296 
297     if(isVerbose()){
298         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
299     }
300 
301     if (U_FAILURE(*status))
302     {
303         return NULL;
304     }
305     /* make the filename including the directory */
306     if (state->inputdir != NULL)
307     {
308         uprv_strcat(filename, state->inputdir);
309 
310         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
311         {
312             uprv_strcat(filename, U_FILE_SEP_STRING);
313         }
314     }
315 
316     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
317 
318     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
319 
320     if (U_FAILURE(*status))
321     {
322         return NULL;
323     }
324     uprv_strcat(filename, cs);
325 
326     if(gOmitCollationRules) {
327         return res_none();
328     }
329 
330     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
331 
332     if (U_FAILURE(*status)) {
333         error(line, "An error occured while opening the input file %s\n", filename);
334         return NULL;
335     }
336 
337     /* We allocate more space than actually required
338     * since the actual size needed for storing UChars
339     * is not known in UTF-8 byte stream
340     */
341     size        = ucbuf_size(ucbuf) + 1;
342     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
343     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
344     target      = pTarget;
345     targetLimit = pTarget+size;
346 
347     /* read the rules into the buffer */
348     while (target < targetLimit)
349     {
350         c = ucbuf_getc(ucbuf, status);
351         if(c == QUOTE) {
352             quoted = (UBool)!quoted;
353         }
354         /* weiv (06/26/2002): adding the following:
355          * - preserving spaces in commands [...]
356          * - # comments until the end of line
357          */
358         if (c == STARTCOMMAND && !quoted)
359         {
360             /* preserve commands
361              * closing bracket will be handled by the
362              * append at the end of the loop
363              */
364             while(c != ENDCOMMAND) {
365                 U_APPEND_CHAR32_ONLY(c, target);
366                 c = ucbuf_getc(ucbuf, status);
367             }
368         }
369         else if (c == HASH && !quoted) {
370             /* skip comments */
371             while(c != CR && c != LF) {
372                 c = ucbuf_getc(ucbuf, status);
373             }
374             continue;
375         }
376         else if (c == ESCAPE)
377         {
378             c = unescape(ucbuf, status);
379 
380             if (c == (UChar32)U_ERR)
381             {
382                 uprv_free(pTarget);
383                 T_FileStream_close(file);
384                 return NULL;
385             }
386         }
387         else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
388         {
389             /* ignore spaces carriage returns
390             * and line feed unless in the form \uXXXX
391             */
392             continue;
393         }
394 
395         /* Append UChar * after dissembling if c > 0xffff*/
396         if (c != (UChar32)U_EOF)
397         {
398             U_APPEND_CHAR32_ONLY(c, target);
399         }
400         else
401         {
402             break;
403         }
404     }
405 
406     /* terminate the string */
407     if(target < targetLimit){
408         *target = 0x0000;
409     }
410 
411     result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
412 
413 
414     ucbuf_close(ucbuf);
415     uprv_free(pTarget);
416     T_FileStream_close(file);
417 
418     return result;
419 }
420 
421 static struct SResource *
parseTransliterator(ParseState * state,char * tag,uint32_t startline,const struct UString *,UErrorCode * status)422 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
423 {
424     struct SResource *result = NULL;
425     struct UString   *tokenValue;
426     FileStream       *file          = NULL;
427     char              filename[256] = { '\0' };
428     char              cs[128]       = { '\0' };
429     uint32_t          line;
430     UCHARBUF *ucbuf=NULL;
431     const char* cp  = NULL;
432     UChar *pTarget     = NULL;
433     const UChar *pSource     = NULL;
434     int32_t size = 0;
435 
436     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
437 
438     if(isVerbose()){
439         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
440     }
441 
442     if (U_FAILURE(*status))
443     {
444         return NULL;
445     }
446     /* make the filename including the directory */
447     if (state->inputdir != NULL)
448     {
449         uprv_strcat(filename, state->inputdir);
450 
451         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
452         {
453             uprv_strcat(filename, U_FILE_SEP_STRING);
454         }
455     }
456 
457     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
458 
459     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
460 
461     if (U_FAILURE(*status))
462     {
463         return NULL;
464     }
465     uprv_strcat(filename, cs);
466 
467 
468     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
469 
470     if (U_FAILURE(*status)) {
471         error(line, "An error occured while opening the input file %s\n", filename);
472         return NULL;
473     }
474 
475     /* We allocate more space than actually required
476     * since the actual size needed for storing UChars
477     * is not known in UTF-8 byte stream
478     */
479     pSource = ucbuf_getBuffer(ucbuf, &size, status);
480     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
481     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
482 
483 #if !UCONFIG_NO_TRANSLITERATION
484     size = utrans_stripRules(pSource, size, pTarget, status);
485 #else
486     size = 0;
487     fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
488 #endif
489     result = string_open(state->bundle, tag, pTarget, size, NULL, status);
490 
491     ucbuf_close(ucbuf);
492     uprv_free(pTarget);
493     T_FileStream_close(file);
494 
495     return result;
496 }
497 static struct SResource* dependencyArray = NULL;
498 
499 static struct SResource *
parseDependency(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)500 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
501 {
502     struct SResource *result = NULL;
503     struct SResource *elem = NULL;
504     struct UString   *tokenValue;
505     uint32_t          line;
506     char              filename[256] = { '\0' };
507     char              cs[128]       = { '\0' };
508 
509     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
510 
511     if(isVerbose()){
512         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
513     }
514 
515     if (U_FAILURE(*status))
516     {
517         return NULL;
518     }
519     /* make the filename including the directory */
520     if (state->outputdir != NULL)
521     {
522         uprv_strcat(filename, state->outputdir);
523 
524         if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
525         {
526             uprv_strcat(filename, U_FILE_SEP_STRING);
527         }
528     }
529 
530     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
531 
532     if (U_FAILURE(*status))
533     {
534         return NULL;
535     }
536     uprv_strcat(filename, cs);
537     if(!T_FileStream_file_exists(filename)){
538         if(isStrict()){
539             error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
540         }else{
541             warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
542         }
543     }
544     if(dependencyArray==NULL){
545         dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
546     }
547     if(tag!=NULL){
548         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
549     }
550     elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
551 
552     array_add(dependencyArray, elem, status);
553 
554     if (U_FAILURE(*status))
555     {
556         return NULL;
557     }
558     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
559     return result;
560 }
561 static struct SResource *
parseString(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)562 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
563 {
564     struct UString   *tokenValue;
565     struct SResource *result = NULL;
566 
567 /*    if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
568     {
569         return parseUCARules(tag, startline, status);
570     }*/
571     if(isVerbose()){
572         printf(" string %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
573     }
574     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
575 
576     if (U_SUCCESS(*status))
577     {
578         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
579         doesn't survive expect either) */
580 
581         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
582         if(U_SUCCESS(*status) && result) {
583             expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
584 
585             if (U_FAILURE(*status))
586             {
587                 res_close(result);
588                 return NULL;
589             }
590         }
591     }
592 
593     return result;
594 }
595 
596 static struct SResource *
parseAlias(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)597 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
598 {
599     struct UString   *tokenValue;
600     struct SResource *result  = NULL;
601 
602     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
603 
604     if(isVerbose()){
605         printf(" alias %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
606     }
607 
608     if (U_SUCCESS(*status))
609     {
610         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
611         doesn't survive expect either) */
612 
613         result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
614 
615         expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
616 
617         if (U_FAILURE(*status))
618         {
619             res_close(result);
620             return NULL;
621         }
622     }
623 
624     return result;
625 }
626 
627 typedef struct{
628     const char* inputDir;
629     const char* outputDir;
630 } GenrbData;
631 
resLookup(struct SResource * res,const char * key)632 static struct SResource* resLookup(struct SResource* res, const char* key){
633     struct SResource *current = NULL;
634     struct SResTable *list;
635     if (res == res_none()) {
636         return NULL;
637     }
638 
639     list = &(res->u.fTable);
640 
641     current = list->fFirst;
642     while (current != NULL) {
643         if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
644             return current;
645         }
646         current = current->fNext;
647     }
648     return NULL;
649 }
650 
importFromDataFile(void * context,const char * locale,const char * type,int32_t * pLength,UErrorCode * status)651 static const UChar* importFromDataFile(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status){
652     struct SRBRoot *data         = NULL;
653     UCHARBUF       *ucbuf        = NULL;
654     GenrbData* genrbdata = (GenrbData*) context;
655     int localeLength = strlen(locale);
656     char* filename = (char*)uprv_malloc(localeLength+5);
657     char           *inputDirBuf  = NULL;
658     char           *openFileName = NULL;
659     const char* cp = "";
660     UChar* urules = NULL;
661     int32_t urulesLength = 0;
662     int32_t i = 0;
663     int32_t dirlen  = 0;
664     int32_t filelen = 0;
665     struct SResource* root;
666     struct SResource* collations;
667     struct SResource* collation;
668     struct SResource* sequence;
669 
670     memcpy(filename, locale, localeLength);
671     for(i = 0; i < localeLength; i++){
672         if(filename[i] == '-'){
673             filename[i] = '_';
674         }
675     }
676     filename[localeLength]   = '.';
677     filename[localeLength+1] = 't';
678     filename[localeLength+2] = 'x';
679     filename[localeLength+3] = 't';
680     filename[localeLength+4] = 0;
681 
682 
683     if (status==NULL || U_FAILURE(*status)) {
684         return NULL;
685     }
686     if(filename==NULL){
687         *status=U_ILLEGAL_ARGUMENT_ERROR;
688         return NULL;
689     }else{
690         filelen = (int32_t)uprv_strlen(filename);
691     }
692     if(genrbdata->inputDir == NULL) {
693         const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
694         openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
695         openFileName[0] = '\0';
696         if (filenameBegin != NULL) {
697             /*
698              * When a filename ../../../data/root.txt is specified,
699              * we presume that the input directory is ../../../data
700              * This is very important when the resource file includes
701              * another file, like UCARules.txt or thaidict.brk.
702              */
703             int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
704             inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
705 
706             /* test for NULL */
707             if(inputDirBuf == NULL) {
708                 *status = U_MEMORY_ALLOCATION_ERROR;
709                 goto finish;
710             }
711 
712             inputDirBuf[filenameSize - 1] = 0;
713             genrbdata->inputDir = inputDirBuf;
714             dirlen  = (int32_t)uprv_strlen(genrbdata->inputDir);
715         }
716     }else{
717         dirlen  = (int32_t)uprv_strlen(genrbdata->inputDir);
718 
719         if(genrbdata->inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
720             openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
721 
722             /* test for NULL */
723             if(openFileName == NULL) {
724                 *status = U_MEMORY_ALLOCATION_ERROR;
725                 goto finish;
726             }
727 
728             openFileName[0] = '\0';
729             /*
730              * append the input dir to openFileName if the first char in
731              * filename is not file seperation char and the last char input directory is  not '.'.
732              * This is to support :
733              * genrb -s. /home/icu/data
734              * genrb -s. icu/data
735              * The user cannot mix notations like
736              * genrb -s. /icu/data --- the absolute path specified. -s redundant
737              * user should use
738              * genrb -s. icu/data  --- start from CWD and look in icu/data dir
739              */
740             if( (filename[0] != U_FILE_SEP_CHAR) && (genrbdata->inputDir[dirlen-1] !='.')){
741                 uprv_strcpy(openFileName, genrbdata->inputDir);
742                 openFileName[dirlen]     = U_FILE_SEP_CHAR;
743             }
744             openFileName[dirlen + 1] = '\0';
745         } else {
746             openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
747 
748             /* test for NULL */
749             if(openFileName == NULL) {
750                 *status = U_MEMORY_ALLOCATION_ERROR;
751                 goto finish;
752             }
753 
754             uprv_strcpy(openFileName, genrbdata->inputDir);
755 
756         }
757     }
758     uprv_strcat(openFileName, filename);
759     /* printf("%s\n", openFileName);  */
760     *status = U_ZERO_ERROR;
761     ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status);
762 
763     if(*status == U_FILE_ACCESS_ERROR) {
764 
765         fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
766         goto finish;
767     }
768     if (ucbuf == NULL || U_FAILURE(*status)) {
769         fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(*status));
770         goto finish;
771     }
772 
773     /* Parse the data into an SRBRoot */
774     data = parse(ucbuf, genrbdata->inputDir, genrbdata->outputDir, FALSE, status);
775 
776     root = data->fRoot;
777     collations = resLookup(root, "collations");
778     if (collations != NULL) {
779       collation = resLookup(collations, type);
780       if (collation != NULL) {
781         sequence = resLookup(collation, "Sequence");
782         if (sequence != NULL) {
783           urules = sequence->u.fString.fChars;
784           urulesLength = sequence->u.fString.fLength;
785           *pLength = urulesLength;
786         }
787       }
788     }
789 
790 finish:
791     if (inputDirBuf != NULL) {
792         uprv_free(inputDirBuf);
793     }
794 
795     if (openFileName != NULL) {
796         uprv_free(openFileName);
797     }
798 
799     if(ucbuf) {
800         ucbuf_close(ucbuf);
801     }
802 
803     return urules;
804 }
805 
806 // Quick-and-dirty escaping function.
807 // Assumes that we are on an ASCII-based platform.
808 static void
escape(const UChar * s,char * buffer)809 escape(const UChar *s, char *buffer) {
810     int32_t length = u_strlen(s);
811     int32_t i = 0;
812     for (;;) {
813         UChar32 c;
814         U16_NEXT(s, i, length, c);
815         if (c == 0) {
816             *buffer = 0;
817             return;
818         } else if (0x20 <= c && c <= 0x7e) {
819             // printable ASCII
820             *buffer++ = (char)c;  // assumes ASCII-based platform
821         } else {
822             buffer += sprintf(buffer, "\\u%04X", (int)c);
823         }
824     }
825 }
826 
827 static struct SResource *
addCollation(ParseState * state,struct SResource * result,uint32_t startline,UErrorCode * status)828 addCollation(ParseState* state, struct SResource  *result, uint32_t startline, UErrorCode *status)
829 {
830     struct SResource  *member = NULL;
831     struct UString    *tokenValue;
832     struct UString     comment;
833     enum   ETokenType  token;
834     char               subtag[1024];
835     UVersionInfo       version;
836     uint32_t           line;
837     GenrbData genrbdata;
838     /* '{' . (name resource)* '}' */
839     version[0]=0; version[1]=0; version[2]=0; version[3]=0;
840 
841     for (;;)
842     {
843         ustr_init(&comment);
844         token = getToken(state, &tokenValue, &comment, &line, status);
845 
846         if (token == TOK_CLOSE_BRACE)
847         {
848             return result;
849         }
850 
851         if (token != TOK_STRING)
852         {
853             res_close(result);
854             *status = U_INVALID_FORMAT_ERROR;
855 
856             if (token == TOK_EOF)
857             {
858                 error(startline, "unterminated table");
859             }
860             else
861             {
862                 error(line, "Unexpected token %s", tokenNames[token]);
863             }
864 
865             return NULL;
866         }
867 
868         u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
869 
870         if (U_FAILURE(*status))
871         {
872             res_close(result);
873             return NULL;
874         }
875 
876         member = parseResource(state, subtag, NULL, status);
877 
878         if (U_FAILURE(*status))
879         {
880             res_close(result);
881             return NULL;
882         }
883 
884         if (uprv_strcmp(subtag, "Version") == 0)
885         {
886             char     ver[40];
887             int32_t length = member->u.fString.fLength;
888 
889             if (length >= (int32_t) sizeof(ver))
890             {
891                 length = (int32_t) sizeof(ver) - 1;
892             }
893 
894             u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
895             u_versionFromString(version, ver);
896 
897             table_add(result, member, line, status);
898 
899         }
900         else if (uprv_strcmp(subtag, "Override") == 0)
901         {
902             // UBool override = (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0);
903             table_add(result, member, line, status);
904 
905         }
906         else if(uprv_strcmp(subtag, "%%CollationBin")==0)
907         {
908             /* discard duplicate %%CollationBin if any*/
909         }
910         else if (uprv_strcmp(subtag, "Sequence") == 0)
911         {
912 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
913             warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
914 #else
915             if(state->makeBinaryCollation) {
916 
917                 /* do the collation elements */
918                 int32_t     len   = 0;
919                 uint8_t   *data  = NULL;
920                 UCollator *coll  = NULL;
921                 int32_t reorderCodes[USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)];
922                 int32_t reorderCodeCount;
923                 int32_t reorderCodeIndex;
924                 UParseError parseError;
925 
926                 genrbdata.inputDir = state->inputdir;
927                 genrbdata.outputDir = state->outputdir;
928 
929                 UErrorCode intStatus = U_ZERO_ERROR;
930                 uprv_memset(&parseError, 0, sizeof(parseError));
931                 coll = ucol_openRulesForImport(member->u.fString.fChars, member->u.fString.fLength,
932                                                UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, importFromDataFile, &genrbdata, &intStatus);
933 
934                 if (U_SUCCESS(intStatus) && coll != NULL)
935                 {
936                     len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
937                     data = (uint8_t *)uprv_malloc(len);
938                     intStatus = U_ZERO_ERROR;
939                     len = ucol_cloneBinary(coll, data, len, &intStatus);
940                     /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
941 
942                     /* tailoring rules version */
943                     /* This is wrong! */
944                     /*coll->dataInfo.dataVersion[1] = version[0];*/
945                     /* Copy tailoring version. Builder version already */
946                     /* set in ucol_openRules */
947                     ((UCATableHeader *)data)->version[1] = version[0];
948                     ((UCATableHeader *)data)->version[2] = version[1];
949                     ((UCATableHeader *)data)->version[3] = version[2];
950 
951                     if (U_SUCCESS(intStatus) && data != NULL)
952                     {
953                         struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", len, data, NULL, NULL, status);
954                         table_add(result, collationBin, line, status);
955                         uprv_free(data);
956 
957                         reorderCodeCount = ucol_getReorderCodes(
958                             coll, reorderCodes, USCRIPT_CODE_LIMIT + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST), &intStatus);
959                         if (U_SUCCESS(intStatus) && reorderCodeCount > 0) {
960                             struct SResource *reorderCodeRes = intvector_open(state->bundle, "%%ReorderCodes", NULL, status);
961                             for (reorderCodeIndex = 0; reorderCodeIndex < reorderCodeCount; reorderCodeIndex++) {
962                                 intvector_add(reorderCodeRes, reorderCodes[reorderCodeIndex], status);
963                             }
964                             table_add(result, reorderCodeRes, line, status);
965                         }
966                     }
967                     else
968                     {
969                         warning(line, "could not obtain rules from collator");
970                         if(isStrict()){
971                             *status = U_INVALID_FORMAT_ERROR;
972                             return NULL;
973                         }
974                     }
975 
976                     ucol_close(coll);
977                 }
978                 else
979                 {
980                     if(intStatus == U_FILE_ACCESS_ERROR) {
981                         error(startline, "Collation could not be built- U_FILE_ACCESS_ERROR. Make sure ICU's data has been built and is loading properly.");
982                         *status = intStatus;
983                         return NULL;
984                     }
985                     char preBuffer[100], postBuffer[100];
986                     escape(parseError.preContext, preBuffer);
987                     escape(parseError.postContext, postBuffer);
988                     warning(line,
989                             "%%%%CollationBin could not be constructed from CollationElements\n"
990                             "  check context, check that the FractionalUCA.txt UCA version "
991                             "matches the current UCD version\n"
992                             "  UErrorCode=%s  UParseError={ line=%d offset=%d pre=<> post=<> }",
993                             u_errorName(intStatus),
994                             parseError.line,
995                             parseError.offset,
996                             preBuffer,
997                             postBuffer);
998                     if(isStrict()){
999                         *status = intStatus;
1000                         return NULL;
1001                     }
1002                 }
1003             } else {
1004                 if(isVerbose()) {
1005                     printf("Not building Collation binary\n");
1006                 }
1007             }
1008 #endif
1009             /* in order to achieve smaller data files, we can direct genrb */
1010             /* to omit collation rules */
1011             if(gOmitCollationRules) {
1012                 bundle_closeString(state->bundle, member);
1013             } else {
1014                 table_add(result, member, line, status);
1015             }
1016         }
1017         if (U_FAILURE(*status))
1018         {
1019             res_close(result);
1020             return NULL;
1021         }
1022     }
1023 
1024     // Reached the end without a TOK_CLOSE_BRACE.  Should be an error.
1025     *status = U_INTERNAL_PROGRAM_ERROR;
1026     return NULL;
1027 }
1028 
1029 static struct SResource *
parseCollationElements(ParseState * state,char * tag,uint32_t startline,UBool newCollation,UErrorCode * status)1030 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1031 {
1032     struct SResource  *result = NULL;
1033     struct SResource  *member = NULL;
1034     struct SResource  *collationRes = NULL;
1035     struct UString    *tokenValue;
1036     struct UString     comment;
1037     enum   ETokenType  token;
1038     char               subtag[1024], typeKeyword[1024];
1039     uint32_t           line;
1040 
1041     result = table_open(state->bundle, tag, NULL, status);
1042 
1043     if (result == NULL || U_FAILURE(*status))
1044     {
1045         return NULL;
1046     }
1047     if(isVerbose()){
1048         printf(" collation elements %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1049     }
1050     if(!newCollation) {
1051         return addCollation(state, result, startline, status);
1052     }
1053     else {
1054         for(;;) {
1055             ustr_init(&comment);
1056             token = getToken(state, &tokenValue, &comment, &line, status);
1057 
1058             if (token == TOK_CLOSE_BRACE)
1059             {
1060                 return result;
1061             }
1062 
1063             if (token != TOK_STRING)
1064             {
1065                 res_close(result);
1066                 *status = U_INVALID_FORMAT_ERROR;
1067 
1068                 if (token == TOK_EOF)
1069                 {
1070                     error(startline, "unterminated table");
1071                 }
1072                 else
1073                 {
1074                     error(line, "Unexpected token %s", tokenNames[token]);
1075                 }
1076 
1077                 return NULL;
1078             }
1079 
1080             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1081 
1082             if (U_FAILURE(*status))
1083             {
1084                 res_close(result);
1085                 return NULL;
1086             }
1087 
1088             if (uprv_strcmp(subtag, "default") == 0)
1089             {
1090                 member = parseResource(state, subtag, NULL, status);
1091 
1092                 if (U_FAILURE(*status))
1093                 {
1094                     res_close(result);
1095                     return NULL;
1096                 }
1097 
1098                 table_add(result, member, line, status);
1099             }
1100             else
1101             {
1102                 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1103                 /* this probably needs to be refactored or recursively use the parser */
1104                 /* first we assume that our collation table won't have the explicit type */
1105                 /* then, we cannot handle aliases */
1106                 if(token == TOK_OPEN_BRACE) {
1107                     token = getToken(state, &tokenValue, &comment, &line, status);
1108                     collationRes = table_open(state->bundle, subtag, NULL, status);
1109                     collationRes = addCollation(state, collationRes, startline, status); /* need to parse the collation data regardless */
1110                     if (gIncludeUnihanColl || uprv_strcmp(subtag, "unihan") != 0) {
1111                         table_add(result, collationRes, startline, status);
1112                     }
1113                 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1114                     /* we could have a table too */
1115                     token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1116                     u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1117                     if(uprv_strcmp(typeKeyword, "alias") == 0) {
1118                         member = parseResource(state, subtag, NULL, status);
1119                         if (U_FAILURE(*status))
1120                         {
1121                             res_close(result);
1122                             return NULL;
1123                         }
1124 
1125                         table_add(result, member, line, status);
1126                     } else {
1127                         res_close(result);
1128                         *status = U_INVALID_FORMAT_ERROR;
1129                         return NULL;
1130                     }
1131                 } else {
1132                     res_close(result);
1133                     *status = U_INVALID_FORMAT_ERROR;
1134                     return NULL;
1135                 }
1136             }
1137 
1138             /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1139 
1140             /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1141 
1142             if (U_FAILURE(*status))
1143             {
1144                 res_close(result);
1145                 return NULL;
1146             }
1147         }
1148     }
1149 }
1150 
1151 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1152    if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1153 static struct SResource *
realParseTable(ParseState * state,struct SResource * table,char * tag,uint32_t startline,UErrorCode * status)1154 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
1155 {
1156     struct SResource  *member = NULL;
1157     struct UString    *tokenValue=NULL;
1158     struct UString    comment;
1159     enum   ETokenType token;
1160     char              subtag[1024];
1161     uint32_t          line;
1162     UBool             readToken = FALSE;
1163 
1164     /* '{' . (name resource)* '}' */
1165 
1166     if(isVerbose()){
1167         printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1168     }
1169     for (;;)
1170     {
1171         ustr_init(&comment);
1172         token = getToken(state, &tokenValue, &comment, &line, status);
1173 
1174         if (token == TOK_CLOSE_BRACE)
1175         {
1176             if (!readToken) {
1177                 warning(startline, "Encountered empty table");
1178             }
1179             return table;
1180         }
1181 
1182         if (token != TOK_STRING)
1183         {
1184             *status = U_INVALID_FORMAT_ERROR;
1185 
1186             if (token == TOK_EOF)
1187             {
1188                 error(startline, "unterminated table");
1189             }
1190             else
1191             {
1192                 error(line, "unexpected token %s", tokenNames[token]);
1193             }
1194 
1195             return NULL;
1196         }
1197 
1198         if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1199             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1200         } else {
1201             *status = U_INVALID_FORMAT_ERROR;
1202             error(line, "invariant characters required for table keys");
1203             return NULL;
1204         }
1205 
1206         if (U_FAILURE(*status))
1207         {
1208             error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1209             return NULL;
1210         }
1211 
1212         member = parseResource(state, subtag, &comment, status);
1213 
1214         if (member == NULL || U_FAILURE(*status))
1215         {
1216             error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1217             return NULL;
1218         }
1219 
1220         table_add(table, member, line, status);
1221 
1222         if (U_FAILURE(*status))
1223         {
1224             error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1225             return NULL;
1226         }
1227         readToken = TRUE;
1228         ustr_deinit(&comment);
1229    }
1230 
1231     /* not reached */
1232     /* A compiler warning will appear if all paths don't contain a return statement. */
1233 /*     *status = U_INTERNAL_PROGRAM_ERROR;
1234      return NULL;*/
1235 }
1236 
1237 static struct SResource *
parseTable(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1238 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1239 {
1240     struct SResource *result;
1241 
1242     if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1243     {
1244         return parseCollationElements(state, tag, startline, FALSE, status);
1245     }
1246     if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1247     {
1248         return parseCollationElements(state, tag, startline, TRUE, status);
1249     }
1250     if(isVerbose()){
1251         printf(" table %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1252     }
1253 
1254     result = table_open(state->bundle, tag, comment, status);
1255 
1256     if (result == NULL || U_FAILURE(*status))
1257     {
1258         return NULL;
1259     }
1260     return realParseTable(state, result, tag, startline,  status);
1261 }
1262 
1263 static struct SResource *
parseArray(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1264 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1265 {
1266     struct SResource  *result = NULL;
1267     struct SResource  *member = NULL;
1268     struct UString    *tokenValue;
1269     struct UString    memberComments;
1270     enum   ETokenType token;
1271     UBool             readToken = FALSE;
1272 
1273     result = array_open(state->bundle, tag, comment, status);
1274 
1275     if (result == NULL || U_FAILURE(*status))
1276     {
1277         return NULL;
1278     }
1279     if(isVerbose()){
1280         printf(" array %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1281     }
1282 
1283     ustr_init(&memberComments);
1284 
1285     /* '{' . resource [','] '}' */
1286     for (;;)
1287     {
1288         /* reset length */
1289         ustr_setlen(&memberComments, 0, status);
1290 
1291         /* check for end of array, but don't consume next token unless it really is the end */
1292         token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1293 
1294 
1295         if (token == TOK_CLOSE_BRACE)
1296         {
1297             getToken(state, NULL, NULL, NULL, status);
1298             if (!readToken) {
1299                 warning(startline, "Encountered empty array");
1300             }
1301             break;
1302         }
1303 
1304         if (token == TOK_EOF)
1305         {
1306             res_close(result);
1307             *status = U_INVALID_FORMAT_ERROR;
1308             error(startline, "unterminated array");
1309             return NULL;
1310         }
1311 
1312         /* string arrays are a special case */
1313         if (token == TOK_STRING)
1314         {
1315             getToken(state, &tokenValue, &memberComments, NULL, status);
1316             member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1317         }
1318         else
1319         {
1320             member = parseResource(state, NULL, &memberComments, status);
1321         }
1322 
1323         if (member == NULL || U_FAILURE(*status))
1324         {
1325             res_close(result);
1326             return NULL;
1327         }
1328 
1329         array_add(result, member, status);
1330 
1331         if (U_FAILURE(*status))
1332         {
1333             res_close(result);
1334             return NULL;
1335         }
1336 
1337         /* eat optional comma if present */
1338         token = peekToken(state, 0, NULL, NULL, NULL, status);
1339 
1340         if (token == TOK_COMMA)
1341         {
1342             getToken(state, NULL, NULL, NULL, status);
1343         }
1344 
1345         if (U_FAILURE(*status))
1346         {
1347             res_close(result);
1348             return NULL;
1349         }
1350         readToken = TRUE;
1351     }
1352 
1353     ustr_deinit(&memberComments);
1354     return result;
1355 }
1356 
1357 static struct SResource *
parseIntVector(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1358 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1359 {
1360     struct SResource  *result = NULL;
1361     enum   ETokenType  token;
1362     char              *string;
1363     int32_t            value;
1364     UBool              readToken = FALSE;
1365     char              *stopstring;
1366     uint32_t           len;
1367     struct UString     memberComments;
1368 
1369     result = intvector_open(state->bundle, tag, comment, status);
1370 
1371     if (result == NULL || U_FAILURE(*status))
1372     {
1373         return NULL;
1374     }
1375 
1376     if(isVerbose()){
1377         printf(" vector %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1378     }
1379     ustr_init(&memberComments);
1380     /* '{' . string [','] '}' */
1381     for (;;)
1382     {
1383         ustr_setlen(&memberComments, 0, status);
1384 
1385         /* check for end of array, but don't consume next token unless it really is the end */
1386         token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1387 
1388         if (token == TOK_CLOSE_BRACE)
1389         {
1390             /* it's the end, consume the close brace */
1391             getToken(state, NULL, NULL, NULL, status);
1392             if (!readToken) {
1393                 warning(startline, "Encountered empty int vector");
1394             }
1395             ustr_deinit(&memberComments);
1396             return result;
1397         }
1398 
1399         string = getInvariantString(state, NULL, NULL, status);
1400 
1401         if (U_FAILURE(*status))
1402         {
1403             res_close(result);
1404             return NULL;
1405         }
1406 
1407         /* For handling illegal char in the Intvector */
1408         value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1409         len=(uint32_t)(stopstring-string);
1410 
1411         if(len==uprv_strlen(string))
1412         {
1413             intvector_add(result, value, status);
1414             uprv_free(string);
1415             token = peekToken(state, 0, NULL, NULL, NULL, status);
1416         }
1417         else
1418         {
1419             uprv_free(string);
1420             *status=U_INVALID_CHAR_FOUND;
1421         }
1422 
1423         if (U_FAILURE(*status))
1424         {
1425             res_close(result);
1426             return NULL;
1427         }
1428 
1429         /* the comma is optional (even though it is required to prevent the reader from concatenating
1430         consecutive entries) so that a missing comma on the last entry isn't an error */
1431         if (token == TOK_COMMA)
1432         {
1433             getToken(state, NULL, NULL, NULL, status);
1434         }
1435         readToken = TRUE;
1436     }
1437 
1438     /* not reached */
1439     /* A compiler warning will appear if all paths don't contain a return statement. */
1440 /*    intvector_close(result, status);
1441     *status = U_INTERNAL_PROGRAM_ERROR;
1442     return NULL;*/
1443 }
1444 
1445 static struct SResource *
parseBinary(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1446 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1447 {
1448     struct SResource *result = NULL;
1449     uint8_t          *value;
1450     char             *string;
1451     char              toConv[3] = {'\0', '\0', '\0'};
1452     uint32_t          count;
1453     uint32_t          i;
1454     uint32_t          line;
1455     char             *stopstring;
1456     uint32_t          len;
1457 
1458     string = getInvariantString(state, &line, NULL, status);
1459 
1460     if (string == NULL || U_FAILURE(*status))
1461     {
1462         return NULL;
1463     }
1464 
1465     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1466 
1467     if (U_FAILURE(*status))
1468     {
1469         uprv_free(string);
1470         return NULL;
1471     }
1472 
1473     if(isVerbose()){
1474         printf(" binary %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1475     }
1476 
1477     count = (uint32_t)uprv_strlen(string);
1478     if (count > 0){
1479         if((count % 2)==0){
1480             value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
1481 
1482             if (value == NULL)
1483             {
1484                 uprv_free(string);
1485                 *status = U_MEMORY_ALLOCATION_ERROR;
1486                 return NULL;
1487             }
1488 
1489             for (i = 0; i < count; i += 2)
1490             {
1491                 toConv[0] = string[i];
1492                 toConv[1] = string[i + 1];
1493 
1494                 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1495                 len=(uint32_t)(stopstring-toConv);
1496 
1497                 if(len!=uprv_strlen(toConv))
1498                 {
1499                     uprv_free(string);
1500                     *status=U_INVALID_CHAR_FOUND;
1501                     return NULL;
1502                 }
1503             }
1504 
1505             result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
1506 
1507             uprv_free(value);
1508         }
1509         else
1510         {
1511             *status = U_INVALID_CHAR_FOUND;
1512             uprv_free(string);
1513             error(line, "Encountered invalid binary string");
1514             return NULL;
1515         }
1516     }
1517     else
1518     {
1519         result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
1520         warning(startline, "Encountered empty binary tag");
1521     }
1522     uprv_free(string);
1523 
1524     return result;
1525 }
1526 
1527 static struct SResource *
parseInteger(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1528 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1529 {
1530     struct SResource *result = NULL;
1531     int32_t           value;
1532     char             *string;
1533     char             *stopstring;
1534     uint32_t          len;
1535 
1536     string = getInvariantString(state, NULL, NULL, status);
1537 
1538     if (string == NULL || U_FAILURE(*status))
1539     {
1540         return NULL;
1541     }
1542 
1543     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1544 
1545     if (U_FAILURE(*status))
1546     {
1547         uprv_free(string);
1548         return NULL;
1549     }
1550 
1551     if(isVerbose()){
1552         printf(" integer %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1553     }
1554 
1555     if (uprv_strlen(string) <= 0)
1556     {
1557         warning(startline, "Encountered empty integer. Default value is 0.");
1558     }
1559 
1560     /* Allow integer support for hexdecimal, octal digit and decimal*/
1561     /* and handle illegal char in the integer*/
1562     value = uprv_strtoul(string, &stopstring, 0);
1563     len=(uint32_t)(stopstring-string);
1564     if(len==uprv_strlen(string))
1565     {
1566         result = int_open(state->bundle, tag, value, comment, status);
1567     }
1568     else
1569     {
1570         *status=U_INVALID_CHAR_FOUND;
1571     }
1572     uprv_free(string);
1573 
1574     return result;
1575 }
1576 
1577 static struct SResource *
parseImport(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1578 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1579 {
1580     struct SResource *result;
1581     FileStream       *file;
1582     int32_t           len;
1583     uint8_t          *data;
1584     char             *filename;
1585     uint32_t          line;
1586     char     *fullname = NULL;
1587     filename = getInvariantString(state, &line, NULL, status);
1588 
1589     if (U_FAILURE(*status))
1590     {
1591         return NULL;
1592     }
1593 
1594     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1595 
1596     if (U_FAILURE(*status))
1597     {
1598         uprv_free(filename);
1599         return NULL;
1600     }
1601 
1602     if(isVerbose()){
1603         printf(" import %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1604     }
1605 
1606     /* Open the input file for reading */
1607     if (state->inputdir == NULL)
1608     {
1609 #if 1
1610         /*
1611          * Always save file file name, even if there's
1612          * no input directory specified. MIGHT BREAK SOMETHING
1613          */
1614         int32_t filenameLength = uprv_strlen(filename);
1615 
1616         fullname = (char *) uprv_malloc(filenameLength + 1);
1617         uprv_strcpy(fullname, filename);
1618 #endif
1619 
1620         file = T_FileStream_open(filename, "rb");
1621     }
1622     else
1623     {
1624 
1625         int32_t  count     = (int32_t)uprv_strlen(filename);
1626 
1627         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1628         {
1629             fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1630 
1631             /* test for NULL */
1632             if(fullname == NULL)
1633             {
1634                 *status = U_MEMORY_ALLOCATION_ERROR;
1635                 return NULL;
1636             }
1637 
1638             uprv_strcpy(fullname, state->inputdir);
1639 
1640             fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
1641             fullname[state->inputdirLength + 1] = '\0';
1642 
1643             uprv_strcat(fullname, filename);
1644         }
1645         else
1646         {
1647             fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
1648 
1649             /* test for NULL */
1650             if(fullname == NULL)
1651             {
1652                 *status = U_MEMORY_ALLOCATION_ERROR;
1653                 return NULL;
1654             }
1655 
1656             uprv_strcpy(fullname, state->inputdir);
1657             uprv_strcat(fullname, filename);
1658         }
1659 
1660         file = T_FileStream_open(fullname, "rb");
1661 
1662     }
1663 
1664     if (file == NULL)
1665     {
1666         error(line, "couldn't open input file %s", filename);
1667         *status = U_FILE_ACCESS_ERROR;
1668         return NULL;
1669     }
1670 
1671     len  = T_FileStream_size(file);
1672     data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1673     /* test for NULL */
1674     if(data == NULL)
1675     {
1676         *status = U_MEMORY_ALLOCATION_ERROR;
1677         T_FileStream_close (file);
1678         return NULL;
1679     }
1680 
1681     /* int32_t numRead = */ T_FileStream_read  (file, data, len);
1682     T_FileStream_close (file);
1683 
1684     result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
1685 
1686     uprv_free(data);
1687     uprv_free(filename);
1688     uprv_free(fullname);
1689 
1690     return result;
1691 }
1692 
1693 static struct SResource *
parseInclude(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1694 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1695 {
1696     struct SResource *result;
1697     int32_t           len=0;
1698     char             *filename;
1699     uint32_t          line;
1700     UChar *pTarget     = NULL;
1701 
1702     UCHARBUF *ucbuf;
1703     char     *fullname = NULL;
1704     int32_t  count     = 0;
1705     const char* cp = NULL;
1706     const UChar* uBuffer = NULL;
1707 
1708     filename = getInvariantString(state, &line, NULL, status);
1709     count     = (int32_t)uprv_strlen(filename);
1710 
1711     if (U_FAILURE(*status))
1712     {
1713         return NULL;
1714     }
1715 
1716     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1717 
1718     if (U_FAILURE(*status))
1719     {
1720         uprv_free(filename);
1721         return NULL;
1722     }
1723 
1724     if(isVerbose()){
1725         printf(" include %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1726     }
1727 
1728     fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1729     /* test for NULL */
1730     if(fullname == NULL)
1731     {
1732         *status = U_MEMORY_ALLOCATION_ERROR;
1733         uprv_free(filename);
1734         return NULL;
1735     }
1736 
1737     if(state->inputdir!=NULL){
1738         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1739         {
1740 
1741             uprv_strcpy(fullname, state->inputdir);
1742 
1743             fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
1744             fullname[state->inputdirLength + 1] = '\0';
1745 
1746             uprv_strcat(fullname, filename);
1747         }
1748         else
1749         {
1750             uprv_strcpy(fullname, state->inputdir);
1751             uprv_strcat(fullname, filename);
1752         }
1753     }else{
1754         uprv_strcpy(fullname,filename);
1755     }
1756 
1757     ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1758 
1759     if (U_FAILURE(*status)) {
1760         error(line, "couldn't open input file %s\n", filename);
1761         return NULL;
1762     }
1763 
1764     uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1765     result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1766 
1767     ucbuf_close(ucbuf);
1768 
1769     uprv_free(pTarget);
1770 
1771     uprv_free(filename);
1772     uprv_free(fullname);
1773 
1774     return result;
1775 }
1776 
1777 
1778 
1779 
1780 
1781 U_STRING_DECL(k_type_string,    "string",    6);
1782 U_STRING_DECL(k_type_binary,    "binary",    6);
1783 U_STRING_DECL(k_type_bin,       "bin",       3);
1784 U_STRING_DECL(k_type_table,     "table",     5);
1785 U_STRING_DECL(k_type_table_no_fallback,     "table(nofallback)",         17);
1786 U_STRING_DECL(k_type_int,       "int",       3);
1787 U_STRING_DECL(k_type_integer,   "integer",   7);
1788 U_STRING_DECL(k_type_array,     "array",     5);
1789 U_STRING_DECL(k_type_alias,     "alias",     5);
1790 U_STRING_DECL(k_type_intvector, "intvector", 9);
1791 U_STRING_DECL(k_type_import,    "import",    6);
1792 U_STRING_DECL(k_type_include,   "include",   7);
1793 
1794 /* Various non-standard processing plugins that create one or more special resources. */
1795 U_STRING_DECL(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
1796 U_STRING_DECL(k_type_plugin_collation,      "process(collation)",        18);
1797 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)",   23);
1798 U_STRING_DECL(k_type_plugin_dependency,     "process(dependency)",       19);
1799 
1800 typedef enum EResourceType
1801 {
1802     RT_UNKNOWN,
1803     RT_STRING,
1804     RT_BINARY,
1805     RT_TABLE,
1806     RT_TABLE_NO_FALLBACK,
1807     RT_INTEGER,
1808     RT_ARRAY,
1809     RT_ALIAS,
1810     RT_INTVECTOR,
1811     RT_IMPORT,
1812     RT_INCLUDE,
1813     RT_PROCESS_UCA_RULES,
1814     RT_PROCESS_COLLATION,
1815     RT_PROCESS_TRANSLITERATOR,
1816     RT_PROCESS_DEPENDENCY,
1817     RT_RESERVED
1818 } EResourceType;
1819 
1820 static struct {
1821     const char *nameChars;   /* only used for debugging */
1822     const UChar *nameUChars;
1823     ParseResourceFunction *parseFunction;
1824 } gResourceTypes[] = {
1825     {"Unknown", NULL, NULL},
1826     {"string", k_type_string, parseString},
1827     {"binary", k_type_binary, parseBinary},
1828     {"table", k_type_table, parseTable},
1829     {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1830     {"integer", k_type_integer, parseInteger},
1831     {"array", k_type_array, parseArray},
1832     {"alias", k_type_alias, parseAlias},
1833     {"intvector", k_type_intvector, parseIntVector},
1834     {"import", k_type_import, parseImport},
1835     {"include", k_type_include, parseInclude},
1836     {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1837     {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1838     {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1839     {"process(dependency)", k_type_plugin_dependency, parseDependency},
1840     {"reserved", NULL, NULL}
1841 };
1842 
initParser(UBool omitCollationRules)1843 void initParser(UBool omitCollationRules)
1844 {
1845     U_STRING_INIT(k_type_string,    "string",    6);
1846     U_STRING_INIT(k_type_binary,    "binary",    6);
1847     U_STRING_INIT(k_type_bin,       "bin",       3);
1848     U_STRING_INIT(k_type_table,     "table",     5);
1849     U_STRING_INIT(k_type_table_no_fallback,     "table(nofallback)",         17);
1850     U_STRING_INIT(k_type_int,       "int",       3);
1851     U_STRING_INIT(k_type_integer,   "integer",   7);
1852     U_STRING_INIT(k_type_array,     "array",     5);
1853     U_STRING_INIT(k_type_alias,     "alias",     5);
1854     U_STRING_INIT(k_type_intvector, "intvector", 9);
1855     U_STRING_INIT(k_type_import,    "import",    6);
1856     U_STRING_INIT(k_type_include,   "include",   7);
1857 
1858     U_STRING_INIT(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
1859     U_STRING_INIT(k_type_plugin_collation,      "process(collation)",        18);
1860     U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)",   23);
1861     U_STRING_INIT(k_type_plugin_dependency,     "process(dependency)",       19);
1862 
1863     gOmitCollationRules = omitCollationRules;
1864 }
1865 
isTable(enum EResourceType type)1866 static inline UBool isTable(enum EResourceType type) {
1867     return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
1868 }
1869 
1870 static enum EResourceType
parseResourceType(ParseState * state,UErrorCode * status)1871 parseResourceType(ParseState* state, UErrorCode *status)
1872 {
1873     struct UString        *tokenValue;
1874     struct UString        comment;
1875     enum   EResourceType  result = RT_UNKNOWN;
1876     uint32_t              line=0;
1877     ustr_init(&comment);
1878     expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1879 
1880     if (U_FAILURE(*status))
1881     {
1882         return RT_UNKNOWN;
1883     }
1884 
1885     *status = U_ZERO_ERROR;
1886 
1887     /* Search for normal types */
1888     result=RT_UNKNOWN;
1889     while ((result=(EResourceType)(result+1)) < RT_RESERVED) {
1890         if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1891             break;
1892         }
1893     }
1894     /* Now search for the aliases */
1895     if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1896         result = RT_INTEGER;
1897     }
1898     else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1899         result = RT_BINARY;
1900     }
1901     else if (result == RT_RESERVED) {
1902         char tokenBuffer[1024];
1903         u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1904         tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1905         *status = U_INVALID_FORMAT_ERROR;
1906         error(line, "unknown resource type '%s'", tokenBuffer);
1907     }
1908 
1909     return result;
1910 }
1911 
1912 /* parse a non-top-level resource */
1913 static struct SResource *
parseResource(ParseState * state,char * tag,const struct UString * comment,UErrorCode * status)1914 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1915 {
1916     enum   ETokenType      token;
1917     enum   EResourceType  resType = RT_UNKNOWN;
1918     ParseResourceFunction *parseFunction = NULL;
1919     struct UString        *tokenValue;
1920     uint32_t                 startline;
1921     uint32_t                 line;
1922 
1923 
1924     token = getToken(state, &tokenValue, NULL, &startline, status);
1925 
1926     if(isVerbose()){
1927         printf(" resource %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1928     }
1929 
1930     /* name . [ ':' type ] '{' resource '}' */
1931     /* This function parses from the colon onwards.  If the colon is present, parse the
1932     type then try to parse a resource of that type.  If there is no explicit type,
1933     work it out using the lookahead tokens. */
1934     switch (token)
1935     {
1936     case TOK_EOF:
1937         *status = U_INVALID_FORMAT_ERROR;
1938         error(startline, "Unexpected EOF encountered");
1939         return NULL;
1940 
1941     case TOK_ERROR:
1942         *status = U_INVALID_FORMAT_ERROR;
1943         return NULL;
1944 
1945     case TOK_COLON:
1946         resType = parseResourceType(state, status);
1947         expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1948 
1949         if (U_FAILURE(*status))
1950         {
1951             return NULL;
1952         }
1953 
1954         break;
1955 
1956     case TOK_OPEN_BRACE:
1957         break;
1958 
1959     default:
1960         *status = U_INVALID_FORMAT_ERROR;
1961         error(startline, "syntax error while reading a resource, expected '{' or ':'");
1962         return NULL;
1963     }
1964 
1965 
1966     if (resType == RT_UNKNOWN)
1967     {
1968         /* No explicit type, so try to work it out.  At this point, we've read the first '{'.
1969         We could have any of the following:
1970         { {         => array (nested)
1971         { :/}       => array
1972         { string ,  => string array
1973 
1974         { string {  => table
1975 
1976         { string :/{    => table
1977         { string }      => string
1978         */
1979 
1980         token = peekToken(state, 0, NULL, &line, NULL,status);
1981 
1982         if (U_FAILURE(*status))
1983         {
1984             return NULL;
1985         }
1986 
1987         if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1988         {
1989             resType = RT_ARRAY;
1990         }
1991         else if (token == TOK_STRING)
1992         {
1993             token = peekToken(state, 1, NULL, &line, NULL, status);
1994 
1995             if (U_FAILURE(*status))
1996             {
1997                 return NULL;
1998             }
1999 
2000             switch (token)
2001             {
2002             case TOK_COMMA:         resType = RT_ARRAY;  break;
2003             case TOK_OPEN_BRACE:    resType = RT_TABLE;  break;
2004             case TOK_CLOSE_BRACE:   resType = RT_STRING; break;
2005             case TOK_COLON:         resType = RT_TABLE;  break;
2006             default:
2007                 *status = U_INVALID_FORMAT_ERROR;
2008                 error(line, "Unexpected token after string, expected ',', '{' or '}'");
2009                 return NULL;
2010             }
2011         }
2012         else
2013         {
2014             *status = U_INVALID_FORMAT_ERROR;
2015             error(line, "Unexpected token after '{'");
2016             return NULL;
2017         }
2018 
2019         /* printf("Type guessed as %s\n", resourceNames[resType]); */
2020     } else if(resType == RT_TABLE_NO_FALLBACK) {
2021         *status = U_INVALID_FORMAT_ERROR;
2022         error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2023         return NULL;
2024     }
2025 
2026 
2027     /* We should now know what we need to parse next, so call the appropriate parser
2028     function and return. */
2029     parseFunction = gResourceTypes[resType].parseFunction;
2030     if (parseFunction != NULL) {
2031         return parseFunction(state, tag, startline, comment, status);
2032     }
2033     else {
2034         *status = U_INTERNAL_PROGRAM_ERROR;
2035         error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
2036     }
2037 
2038     return NULL;
2039 }
2040 
2041 /* parse the top-level resource */
2042 struct SRBRoot *
parse(UCHARBUF * buf,const char * inputDir,const char * outputDir,UBool makeBinaryCollation,UErrorCode * status)2043 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UBool makeBinaryCollation,
2044       UErrorCode *status)
2045 {
2046     struct UString    *tokenValue;
2047     struct UString    comment;
2048     uint32_t           line;
2049     enum EResourceType bundleType;
2050     enum ETokenType    token;
2051     ParseState state;
2052     uint32_t i;
2053 
2054 
2055     for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2056     {
2057         ustr_init(&state.lookahead[i].value);
2058         ustr_init(&state.lookahead[i].comment);
2059     }
2060 
2061     initLookahead(&state, buf, status);
2062 
2063     state.inputdir       = inputDir;
2064     state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2065     state.outputdir       = outputDir;
2066     state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
2067     state.makeBinaryCollation = makeBinaryCollation;
2068 
2069     ustr_init(&comment);
2070     expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
2071 
2072     state.bundle = bundle_open(&comment, FALSE, status);
2073 
2074     if (state.bundle == NULL || U_FAILURE(*status))
2075     {
2076         return NULL;
2077     }
2078 
2079 
2080     bundle_setlocale(state.bundle, tokenValue->fChars, status);
2081 
2082     /* The following code is to make Empty bundle work no matter with :table specifer or not */
2083     token = getToken(&state, NULL, NULL, &line, status);
2084     if(token==TOK_COLON) {
2085         *status=U_ZERO_ERROR;
2086         bundleType=parseResourceType(&state, status);
2087 
2088         if(isTable(bundleType))
2089         {
2090             expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2091         }
2092         else
2093         {
2094             *status=U_PARSE_ERROR;
2095              error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2096         }
2097     }
2098     else
2099     {
2100         /* not a colon */
2101         if(token==TOK_OPEN_BRACE)
2102         {
2103             *status=U_ZERO_ERROR;
2104             bundleType=RT_TABLE;
2105         }
2106         else
2107         {
2108             /* neither colon nor open brace */
2109             *status=U_PARSE_ERROR;
2110             bundleType=RT_UNKNOWN;
2111             error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2112         }
2113     }
2114 
2115     if (U_FAILURE(*status))
2116     {
2117         bundle_close(state.bundle, status);
2118         return NULL;
2119     }
2120 
2121     if(bundleType==RT_TABLE_NO_FALLBACK) {
2122         /*
2123          * Parse a top-level table with the table(nofallback) declaration.
2124          * This is the same as a regular table, but also sets the
2125          * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2126          */
2127         state.bundle->noFallback=TRUE;
2128     }
2129     /* top-level tables need not handle special table names like "collations" */
2130     realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2131     if(dependencyArray!=NULL){
2132         table_add(state.bundle->fRoot, dependencyArray, 0, status);
2133         dependencyArray = NULL;
2134     }
2135    if (U_FAILURE(*status))
2136     {
2137         bundle_close(state.bundle, status);
2138         res_close(dependencyArray);
2139         return NULL;
2140     }
2141 
2142     if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2143     {
2144         warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2145         if(isStrict()){
2146             *status = U_INVALID_FORMAT_ERROR;
2147             return NULL;
2148         }
2149     }
2150 
2151     cleanupLookahead(&state);
2152     ustr_deinit(&comment);
2153     return state.bundle;
2154 }
2155