• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1998-2015, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File parse.cpp
10 *
11 * Modification History:
12 *
13 *   Date          Name          Description
14 *   05/26/99     stephen       Creation.
15 *   02/25/00     weiv          Overhaul to write udata
16 *   5/10/01      Ram           removed ustdio dependency
17 *   06/10/2001  Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
19 */
20 
21 // Safer use of UnicodeString.
22 #ifndef UNISTR_FROM_CHAR_EXPLICIT
23 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
24 #endif
25 
26 // Less important, but still a good idea.
27 #ifndef UNISTR_FROM_STRING_EXPLICIT
28 #   define UNISTR_FROM_STRING_EXPLICIT explicit
29 #endif
30 
31 #include "parse.h"
32 #include "errmsg.h"
33 #include "uhash.h"
34 #include "cmemory.h"
35 #include "cstring.h"
36 #include "uinvchar.h"
37 #include "read.h"
38 #include "ustr.h"
39 #include "reslist.h"
40 #include "rbt_pars.h"
41 #include "genrb.h"
42 #include "unicode/ustring.h"
43 #include "unicode/uscript.h"
44 #include "unicode/utf16.h"
45 #include "unicode/putil.h"
46 #include "collationbuilder.h"
47 #include "collationdata.h"
48 #include "collationdatareader.h"
49 #include "collationdatawriter.h"
50 #include "collationfastlatinbuilder.h"
51 #include "collationinfo.h"
52 #include "collationroot.h"
53 #include "collationruleparser.h"
54 #include "collationtailoring.h"
55 #include <stdio.h>
56 
57 /* Number of tokens to read ahead of the current stream position */
58 #define MAX_LOOKAHEAD   3
59 
60 #define CR               0x000D
61 #define LF               0x000A
62 #define SPACE            0x0020
63 #define TAB              0x0009
64 #define ESCAPE           0x005C
65 #define HASH             0x0023
66 #define QUOTE            0x0027
67 #define ZERO             0x0030
68 #define STARTCOMMAND     0x005B
69 #define ENDCOMMAND       0x005D
70 #define OPENSQBRACKET    0x005B
71 #define CLOSESQBRACKET   0x005D
72 
73 using icu::LocalPointer;
74 using icu::UnicodeString;
75 
76 struct Lookahead
77 {
78      enum   ETokenType type;
79      struct UString    value;
80      struct UString    comment;
81      uint32_t          line;
82 };
83 
84 /* keep in sync with token defines in read.h */
85 const char *tokenNames[TOK_TOKEN_COUNT] =
86 {
87      "string",             /* A string token, such as "MonthNames" */
88      "'{'",                 /* An opening brace character */
89      "'}'",                 /* A closing brace character */
90      "','",                 /* A comma */
91      "':'",                 /* A colon */
92 
93      "<end of file>",     /* End of the file has been reached successfully */
94      "<end of line>"
95 };
96 
97 /* Just to store "TRUE" */
98 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
99 
100 typedef struct {
101     struct Lookahead  lookahead[MAX_LOOKAHEAD + 1];
102     uint32_t          lookaheadPosition;
103     UCHARBUF         *buffer;
104     struct SRBRoot *bundle;
105     const char     *inputdir;
106     uint32_t        inputdirLength;
107     const char     *outputdir;
108     uint32_t        outputdirLength;
109     const char     *filename;
110     UBool           makeBinaryCollation;
111     UBool           omitCollationRules;
112 } ParseState;
113 
114 typedef struct SResource *
115 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
116 
117 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
118 
119 /* The nature of the lookahead buffer:
120    There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer.  This provides
121    MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
122    When getToken is called, the current pointer is moved to the next slot and the
123    old slot is filled with the next token from the reader by calling getNextToken.
124    The token values are stored in the slot, which means that token values don't
125    survive a call to getToken, ie.
126 
127    UString *value;
128 
129    getToken(&value, NULL, status);
130    getToken(NULL,   NULL, status);       bad - value is now a different string
131 */
132 static void
initLookahead(ParseState * state,UCHARBUF * buf,UErrorCode * status)133 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
134 {
135     static uint32_t initTypeStrings = 0;
136     uint32_t i;
137 
138     if (!initTypeStrings)
139     {
140         initTypeStrings = 1;
141     }
142 
143     state->lookaheadPosition   = 0;
144     state->buffer              = buf;
145 
146     resetLineNumber();
147 
148     for (i = 0; i < MAX_LOOKAHEAD; i++)
149     {
150         state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
151         if (U_FAILURE(*status))
152         {
153             return;
154         }
155     }
156 
157     *status = U_ZERO_ERROR;
158 }
159 
160 static void
cleanupLookahead(ParseState * state)161 cleanupLookahead(ParseState* state)
162 {
163     uint32_t i;
164     for (i = 0; i <= MAX_LOOKAHEAD; i++)
165     {
166         ustr_deinit(&state->lookahead[i].value);
167         ustr_deinit(&state->lookahead[i].comment);
168     }
169 
170 }
171 
172 static enum ETokenType
getToken(ParseState * state,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)173 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
174 {
175     enum ETokenType result;
176     uint32_t          i;
177 
178     result = state->lookahead[state->lookaheadPosition].type;
179 
180     if (tokenValue != NULL)
181     {
182         *tokenValue = &state->lookahead[state->lookaheadPosition].value;
183     }
184 
185     if (linenumber != NULL)
186     {
187         *linenumber = state->lookahead[state->lookaheadPosition].line;
188     }
189 
190     if (comment != NULL)
191     {
192         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
193     }
194 
195     i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
196     state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
197     ustr_setlen(&state->lookahead[i].comment, 0, status);
198     ustr_setlen(&state->lookahead[i].value, 0, status);
199     state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
200 
201     /* printf("getToken, returning %s\n", tokenNames[result]); */
202 
203     return result;
204 }
205 
206 static enum ETokenType
peekToken(ParseState * state,uint32_t lookaheadCount,struct UString ** tokenValue,uint32_t * linenumber,struct UString * comment,UErrorCode * status)207 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
208 {
209     uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
210 
211     if (U_FAILURE(*status))
212     {
213         return TOK_ERROR;
214     }
215 
216     if (lookaheadCount >= MAX_LOOKAHEAD)
217     {
218         *status = U_INTERNAL_PROGRAM_ERROR;
219         return TOK_ERROR;
220     }
221 
222     if (tokenValue != NULL)
223     {
224         *tokenValue = &state->lookahead[i].value;
225     }
226 
227     if (linenumber != NULL)
228     {
229         *linenumber = state->lookahead[i].line;
230     }
231 
232     if(comment != NULL){
233         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
234     }
235 
236     return state->lookahead[i].type;
237 }
238 
239 static void
expect(ParseState * state,enum ETokenType expectedToken,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)240 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
241 {
242     uint32_t        line;
243 
244     enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
245 
246     if (linenumber != NULL)
247     {
248         *linenumber = line;
249     }
250 
251     if (U_FAILURE(*status))
252     {
253         return;
254     }
255 
256     if (token != expectedToken)
257     {
258         *status = U_INVALID_FORMAT_ERROR;
259         error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
260     }
261     else
262     {
263         *status = U_ZERO_ERROR;
264     }
265 }
266 
getInvariantString(ParseState * state,uint32_t * line,struct UString * comment,UErrorCode * status)267 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
268 {
269     struct UString *tokenValue;
270     char           *result;
271     uint32_t        count;
272 
273     expect(state, TOK_STRING, &tokenValue, comment, line, status);
274 
275     if (U_FAILURE(*status))
276     {
277         return NULL;
278     }
279 
280     count = u_strlen(tokenValue->fChars);
281     if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
282         *status = U_INVALID_FORMAT_ERROR;
283         error(*line, "invariant characters required for table keys, binary data, etc.");
284         return NULL;
285     }
286 
287     result = static_cast<char *>(uprv_malloc(count+1));
288 
289     if (result == NULL)
290     {
291         *status = U_MEMORY_ALLOCATION_ERROR;
292         return NULL;
293     }
294 
295     u_UCharsToChars(tokenValue->fChars, result, count+1);
296     return result;
297 }
298 
299 static struct SResource *
parseUCARules(ParseState * state,char * tag,uint32_t startline,const struct UString *,UErrorCode * status)300 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
301 {
302     struct SResource *result = NULL;
303     struct UString   *tokenValue;
304     FileStream       *file          = NULL;
305     char              filename[256] = { '\0' };
306     char              cs[128]       = { '\0' };
307     uint32_t          line;
308     UBool quoted = FALSE;
309     UCHARBUF *ucbuf=NULL;
310     UChar32   c     = 0;
311     const char* cp  = NULL;
312     UChar *pTarget     = NULL;
313     UChar *target      = NULL;
314     UChar *targetLimit = NULL;
315     int32_t size = 0;
316 
317     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
318 
319     if(isVerbose()){
320         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
321     }
322 
323     if (U_FAILURE(*status))
324     {
325         return NULL;
326     }
327     /* make the filename including the directory */
328     if (state->inputdir != NULL)
329     {
330         uprv_strcat(filename, state->inputdir);
331 
332         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
333         {
334             uprv_strcat(filename, U_FILE_SEP_STRING);
335         }
336     }
337 
338     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
339 
340     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
341 
342     if (U_FAILURE(*status))
343     {
344         return NULL;
345     }
346     uprv_strcat(filename, cs);
347 
348     if(state->omitCollationRules) {
349         return res_none();
350     }
351 
352     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
353 
354     if (U_FAILURE(*status)) {
355         error(line, "An error occured while opening the input file %s\n", filename);
356         return NULL;
357     }
358 
359     /* We allocate more space than actually required
360     * since the actual size needed for storing UChars
361     * is not known in UTF-8 byte stream
362     */
363     size        = ucbuf_size(ucbuf) + 1;
364     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
365     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
366     target      = pTarget;
367     targetLimit = pTarget+size;
368 
369     /* read the rules into the buffer */
370     while (target < targetLimit)
371     {
372         c = ucbuf_getc(ucbuf, status);
373         if(c == QUOTE) {
374             quoted = (UBool)!quoted;
375         }
376         /* weiv (06/26/2002): adding the following:
377          * - preserving spaces in commands [...]
378          * - # comments until the end of line
379          */
380         if (c == STARTCOMMAND && !quoted)
381         {
382             /* preserve commands
383              * closing bracket will be handled by the
384              * append at the end of the loop
385              */
386             while(c != ENDCOMMAND) {
387                 U_APPEND_CHAR32_ONLY(c, target);
388                 c = ucbuf_getc(ucbuf, status);
389             }
390         }
391         else if (c == HASH && !quoted) {
392             /* skip comments */
393             while(c != CR && c != LF) {
394                 c = ucbuf_getc(ucbuf, status);
395             }
396             continue;
397         }
398         else if (c == ESCAPE)
399         {
400             c = unescape(ucbuf, status);
401 
402             if (c == (UChar32)U_ERR)
403             {
404                 uprv_free(pTarget);
405                 T_FileStream_close(file);
406                 return NULL;
407             }
408         }
409         else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
410         {
411             /* ignore spaces carriage returns
412             * and line feed unless in the form \uXXXX
413             */
414             continue;
415         }
416 
417         /* Append UChar * after dissembling if c > 0xffff*/
418         if (c != (UChar32)U_EOF)
419         {
420             U_APPEND_CHAR32_ONLY(c, target);
421         }
422         else
423         {
424             break;
425         }
426     }
427 
428     /* terminate the string */
429     if(target < targetLimit){
430         *target = 0x0000;
431     }
432 
433     result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
434 
435 
436     ucbuf_close(ucbuf);
437     uprv_free(pTarget);
438     T_FileStream_close(file);
439 
440     return result;
441 }
442 
443 static struct SResource *
parseTransliterator(ParseState * state,char * tag,uint32_t startline,const struct UString *,UErrorCode * status)444 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
445 {
446     struct SResource *result = NULL;
447     struct UString   *tokenValue;
448     FileStream       *file          = NULL;
449     char              filename[256] = { '\0' };
450     char              cs[128]       = { '\0' };
451     uint32_t          line;
452     UCHARBUF *ucbuf=NULL;
453     const char* cp  = NULL;
454     UChar *pTarget     = NULL;
455     const UChar *pSource     = NULL;
456     int32_t size = 0;
457 
458     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
459 
460     if(isVerbose()){
461         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
462     }
463 
464     if (U_FAILURE(*status))
465     {
466         return NULL;
467     }
468     /* make the filename including the directory */
469     if (state->inputdir != NULL)
470     {
471         uprv_strcat(filename, state->inputdir);
472 
473         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
474         {
475             uprv_strcat(filename, U_FILE_SEP_STRING);
476         }
477     }
478 
479     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
480 
481     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
482 
483     if (U_FAILURE(*status))
484     {
485         return NULL;
486     }
487     uprv_strcat(filename, cs);
488 
489 
490     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
491 
492     if (U_FAILURE(*status)) {
493         error(line, "An error occured while opening the input file %s\n", filename);
494         return NULL;
495     }
496 
497     /* We allocate more space than actually required
498     * since the actual size needed for storing UChars
499     * is not known in UTF-8 byte stream
500     */
501     pSource = ucbuf_getBuffer(ucbuf, &size, status);
502     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
503     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
504 
505 #if !UCONFIG_NO_TRANSLITERATION
506     size = utrans_stripRules(pSource, size, pTarget, status);
507 #else
508     size = 0;
509     fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
510 #endif
511     result = string_open(state->bundle, tag, pTarget, size, NULL, status);
512 
513     ucbuf_close(ucbuf);
514     uprv_free(pTarget);
515     T_FileStream_close(file);
516 
517     return result;
518 }
519 static struct SResource* dependencyArray = NULL;
520 
521 static struct SResource *
parseDependency(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)522 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
523 {
524     struct SResource *result = NULL;
525     struct SResource *elem = NULL;
526     struct UString   *tokenValue;
527     uint32_t          line;
528     char              filename[256] = { '\0' };
529     char              cs[128]       = { '\0' };
530 
531     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
532 
533     if(isVerbose()){
534         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
535     }
536 
537     if (U_FAILURE(*status))
538     {
539         return NULL;
540     }
541     /* make the filename including the directory */
542     if (state->outputdir != NULL)
543     {
544         uprv_strcat(filename, state->outputdir);
545 
546         if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
547         {
548             uprv_strcat(filename, U_FILE_SEP_STRING);
549         }
550     }
551 
552     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
553 
554     if (U_FAILURE(*status))
555     {
556         return NULL;
557     }
558     uprv_strcat(filename, cs);
559     if(!T_FileStream_file_exists(filename)){
560         if(isStrict()){
561             error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
562         }else{
563             warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
564         }
565     }
566     if(dependencyArray==NULL){
567         dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
568     }
569     if(tag!=NULL){
570         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
571     }
572     elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
573 
574     array_add(dependencyArray, elem, status);
575 
576     if (U_FAILURE(*status))
577     {
578         return NULL;
579     }
580     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
581     return result;
582 }
583 static struct SResource *
parseString(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)584 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
585 {
586     struct UString   *tokenValue;
587     struct SResource *result = NULL;
588 
589 /*    if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
590     {
591         return parseUCARules(tag, startline, status);
592     }*/
593     if(isVerbose()){
594         printf(" string %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
595     }
596     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
597 
598     if (U_SUCCESS(*status))
599     {
600         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
601         doesn't survive expect either) */
602 
603         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
604         if(U_SUCCESS(*status) && result) {
605             expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
606 
607             if (U_FAILURE(*status))
608             {
609                 res_close(result);
610                 return NULL;
611             }
612         }
613     }
614 
615     return result;
616 }
617 
618 static struct SResource *
parseAlias(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)619 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
620 {
621     struct UString   *tokenValue;
622     struct SResource *result  = NULL;
623 
624     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
625 
626     if(isVerbose()){
627         printf(" alias %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
628     }
629 
630     if (U_SUCCESS(*status))
631     {
632         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
633         doesn't survive expect either) */
634 
635         result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
636 
637         expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
638 
639         if (U_FAILURE(*status))
640         {
641             res_close(result);
642             return NULL;
643         }
644     }
645 
646     return result;
647 }
648 
649 #if !UCONFIG_NO_COLLATION
650 
651 namespace {
652 
resLookup(struct SResource * res,const char * key)653 static struct SResource* resLookup(struct SResource* res, const char* key){
654     struct SResource *current = NULL;
655     struct SResTable *list;
656     if (res == res_none()) {
657         return NULL;
658     }
659 
660     list = &(res->u.fTable);
661 
662     current = list->fFirst;
663     while (current != NULL) {
664         if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
665             return current;
666         }
667         current = current->fNext;
668     }
669     return NULL;
670 }
671 
672 class GenrbImporter : public icu::CollationRuleParser::Importer {
673 public:
GenrbImporter(const char * in,const char * out)674     GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
675     virtual ~GenrbImporter();
676     virtual void getRules(
677             const char *localeID, const char *collationType,
678             UnicodeString &rules,
679             const char *&errorReason, UErrorCode &errorCode);
680 
681 private:
682     const char *inputDir;
683     const char *outputDir;
684 };
685 
~GenrbImporter()686 GenrbImporter::~GenrbImporter() {}
687 
688 void
getRules(const char * localeID,const char * collationType,UnicodeString & rules,const char * &,UErrorCode & errorCode)689 GenrbImporter::getRules(
690         const char *localeID, const char *collationType,
691         UnicodeString &rules,
692         const char *& /*errorReason*/, UErrorCode &errorCode) {
693     struct SRBRoot *data         = NULL;
694     UCHARBUF       *ucbuf        = NULL;
695     int localeLength = strlen(localeID);
696     char* filename = (char*)uprv_malloc(localeLength+5);
697     char           *inputDirBuf  = NULL;
698     char           *openFileName = NULL;
699     const char* cp = "";
700     int32_t i = 0;
701     int32_t dirlen  = 0;
702     int32_t filelen = 0;
703     struct SResource* root;
704     struct SResource* collations;
705     struct SResource* collation;
706     struct SResource* sequence;
707 
708     memcpy(filename, localeID, localeLength);
709     for(i = 0; i < localeLength; i++){
710         if(filename[i] == '-'){
711             filename[i] = '_';
712         }
713     }
714     filename[localeLength]   = '.';
715     filename[localeLength+1] = 't';
716     filename[localeLength+2] = 'x';
717     filename[localeLength+3] = 't';
718     filename[localeLength+4] = 0;
719 
720 
721     if (U_FAILURE(errorCode)) {
722         return;
723     }
724     if(filename==NULL){
725         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
726         return;
727     }else{
728         filelen = (int32_t)uprv_strlen(filename);
729     }
730     if(inputDir == NULL) {
731         const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
732         openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
733         openFileName[0] = '\0';
734         if (filenameBegin != NULL) {
735             /*
736              * When a filename ../../../data/root.txt is specified,
737              * we presume that the input directory is ../../../data
738              * This is very important when the resource file includes
739              * another file, like UCARules.txt or thaidict.brk.
740              */
741             int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
742             inputDirBuf = (char *)uprv_malloc(filenameSize);
743 
744             /* test for NULL */
745             if(inputDirBuf == NULL) {
746                 errorCode = U_MEMORY_ALLOCATION_ERROR;
747                 goto finish;
748             }
749 
750             uprv_strncpy(inputDirBuf, filename, filenameSize);
751             inputDirBuf[filenameSize - 1] = 0;
752             inputDir = inputDirBuf;
753             dirlen  = (int32_t)uprv_strlen(inputDir);
754         }
755     }else{
756         dirlen  = (int32_t)uprv_strlen(inputDir);
757 
758         if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
759             openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
760 
761             /* test for NULL */
762             if(openFileName == NULL) {
763                 errorCode = U_MEMORY_ALLOCATION_ERROR;
764                 goto finish;
765             }
766 
767             openFileName[0] = '\0';
768             /*
769              * append the input dir to openFileName if the first char in
770              * filename is not file seperation char and the last char input directory is  not '.'.
771              * This is to support :
772              * genrb -s. /home/icu/data
773              * genrb -s. icu/data
774              * The user cannot mix notations like
775              * genrb -s. /icu/data --- the absolute path specified. -s redundant
776              * user should use
777              * genrb -s. icu/data  --- start from CWD and look in icu/data dir
778              */
779             if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
780                 uprv_strcpy(openFileName, inputDir);
781                 openFileName[dirlen]     = U_FILE_SEP_CHAR;
782             }
783             openFileName[dirlen + 1] = '\0';
784         } else {
785             openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
786 
787             /* test for NULL */
788             if(openFileName == NULL) {
789                 errorCode = U_MEMORY_ALLOCATION_ERROR;
790                 goto finish;
791             }
792 
793             uprv_strcpy(openFileName, inputDir);
794 
795         }
796     }
797     uprv_strcat(openFileName, filename);
798     /* printf("%s\n", openFileName);  */
799     errorCode = U_ZERO_ERROR;
800     ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, &errorCode);
801 
802     if(errorCode == U_FILE_ACCESS_ERROR) {
803 
804         fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
805         goto finish;
806     }
807     if (ucbuf == NULL || U_FAILURE(errorCode)) {
808         fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(errorCode));
809         goto finish;
810     }
811 
812     /* Parse the data into an SRBRoot */
813     data = parse(ucbuf, inputDir, outputDir, filename, FALSE, FALSE, &errorCode);
814     if (U_FAILURE(errorCode)) {
815         goto finish;
816     }
817 
818     root = data->fRoot;
819     collations = resLookup(root, "collations");
820     if (collations != NULL) {
821       collation = resLookup(collations, collationType);
822       if (collation != NULL) {
823         sequence = resLookup(collation, "Sequence");
824         if (sequence != NULL) {
825           // No string pointer aliasing so that we need not hold onto the resource bundle.
826           rules.setTo(sequence->u.fString.fChars, sequence->u.fString.fLength);
827         }
828       }
829     }
830 
831 finish:
832     if (inputDirBuf != NULL) {
833         uprv_free(inputDirBuf);
834     }
835 
836     if (openFileName != NULL) {
837         uprv_free(openFileName);
838     }
839 
840     if(ucbuf) {
841         ucbuf_close(ucbuf);
842     }
843 }
844 
845 // Quick-and-dirty escaping function.
846 // Assumes that we are on an ASCII-based platform.
847 static void
escape(const UChar * s,char * buffer)848 escape(const UChar *s, char *buffer) {
849     int32_t length = u_strlen(s);
850     int32_t i = 0;
851     for (;;) {
852         UChar32 c;
853         U16_NEXT(s, i, length, c);
854         if (c == 0) {
855             *buffer = 0;
856             return;
857         } else if (0x20 <= c && c <= 0x7e) {
858             // printable ASCII
859             *buffer++ = (char)c;  // assumes ASCII-based platform
860         } else {
861             buffer += sprintf(buffer, "\\u%04X", (int)c);
862         }
863     }
864 }
865 
866 }  // namespace
867 
868 #endif  // !UCONFIG_NO_COLLATION
869 
870 static struct SResource *
addCollation(ParseState * state,struct SResource * result,const char * collationType,uint32_t startline,UErrorCode * status)871 addCollation(ParseState* state, struct SResource  *result, const char *collationType,
872              uint32_t startline, UErrorCode *status)
873 {
874     // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
875     struct SResource  *member = NULL;
876     struct UString    *tokenValue;
877     struct UString     comment;
878     enum   ETokenType  token;
879     char               subtag[1024];
880     UnicodeString      rules;
881     UBool              haveRules = FALSE;
882     UVersionInfo       version;
883     uint32_t           line;
884 
885     /* '{' . (name resource)* '}' */
886     version[0]=0; version[1]=0; version[2]=0; version[3]=0;
887 
888     for (;;)
889     {
890         ustr_init(&comment);
891         token = getToken(state, &tokenValue, &comment, &line, status);
892 
893         if (token == TOK_CLOSE_BRACE)
894         {
895             break;
896         }
897 
898         if (token != TOK_STRING)
899         {
900             res_close(result);
901             *status = U_INVALID_FORMAT_ERROR;
902 
903             if (token == TOK_EOF)
904             {
905                 error(startline, "unterminated table");
906             }
907             else
908             {
909                 error(line, "Unexpected token %s", tokenNames[token]);
910             }
911 
912             return NULL;
913         }
914 
915         u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
916 
917         if (U_FAILURE(*status))
918         {
919             res_close(result);
920             return NULL;
921         }
922 
923         member = parseResource(state, subtag, NULL, status);
924 
925         if (U_FAILURE(*status))
926         {
927             res_close(result);
928             return NULL;
929         }
930         if (result == NULL)
931         {
932             // Ignore the parsed resources, continue parsing.
933         }
934         else if (uprv_strcmp(subtag, "Version") == 0)
935         {
936             char     ver[40];
937             int32_t length = member->u.fString.fLength;
938 
939             if (length >= (int32_t) sizeof(ver))
940             {
941                 length = (int32_t) sizeof(ver) - 1;
942             }
943 
944             u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
945             u_versionFromString(version, ver);
946 
947             table_add(result, member, line, status);
948             member = NULL;
949         }
950         else if(uprv_strcmp(subtag, "%%CollationBin")==0)
951         {
952             /* discard duplicate %%CollationBin if any*/
953         }
954         else if (uprv_strcmp(subtag, "Sequence") == 0)
955         {
956             rules.setTo(member->u.fString.fChars, member->u.fString.fLength);
957             haveRules = TRUE;
958             // Defer building the collator until we have seen
959             // all sub-elements of the collation table, including the Version.
960             /* in order to achieve smaller data files, we can direct genrb */
961             /* to omit collation rules */
962             if(!state->omitCollationRules) {
963                 table_add(result, member, line, status);
964                 member = NULL;
965             }
966         }
967         else  // Just copy non-special items.
968         {
969             table_add(result, member, line, status);
970             member = NULL;
971         }
972         res_close(member);  // TODO: use LocalPointer
973         if (U_FAILURE(*status))
974         {
975             res_close(result);
976             return NULL;
977         }
978     }
979 
980     if (!haveRules) { return result; }
981 
982 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
983     warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
984     (void)collationType;
985 #else
986     // CLDR ticket #3949, ICU ticket #8082:
987     // Do not build collation binary data for for-import-only "private" collation rule strings.
988     if (uprv_strncmp(collationType, "private-", 8) == 0) {
989         if(isVerbose()) {
990             printf("Not building %s~%s collation binary\n", state->filename, collationType);
991         }
992         return result;
993     }
994 
995     if(!state->makeBinaryCollation) {
996         if(isVerbose()) {
997             printf("Not building %s~%s collation binary\n", state->filename, collationType);
998         }
999         return result;
1000     }
1001     UErrorCode intStatus = U_ZERO_ERROR;
1002     UParseError parseError;
1003     uprv_memset(&parseError, 0, sizeof(parseError));
1004     GenrbImporter importer(state->inputdir, state->outputdir);
1005     const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
1006     if(U_FAILURE(intStatus)) {
1007         error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
1008         res_close(result);
1009         return NULL;  // TODO: use LocalUResourceBundlePointer for result
1010     }
1011     icu::CollationBuilder builder(base, intStatus);
1012     if(uprv_strncmp(collationType, "search", 6) == 0) {
1013         builder.disableFastLatin();  // build fast-Latin table unless search collator
1014     }
1015     LocalPointer<icu::CollationTailoring> t(
1016             builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
1017     if(U_FAILURE(intStatus)) {
1018         const char *reason = builder.getErrorReason();
1019         if(reason == NULL) { reason = ""; }
1020         error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s  %s",
1021                 state->filename, collationType,
1022                 (long)parseError.offset, u_errorName(intStatus), reason);
1023         if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
1024             // Print pre- and post-context.
1025             char preBuffer[100], postBuffer[100];
1026             escape(parseError.preContext, preBuffer);
1027             escape(parseError.postContext, postBuffer);
1028             error(line, "  error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
1029         }
1030         if(isStrict()) {
1031             *status = intStatus;
1032             res_close(result);
1033             return NULL;
1034         }
1035     }
1036     icu::LocalMemory<uint8_t> buffer;
1037     int32_t capacity = 100000;
1038     uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
1039     if(dest == NULL) {
1040         fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
1041                 (long)capacity);
1042         *status = U_MEMORY_ALLOCATION_ERROR;
1043         res_close(result);
1044         return NULL;
1045     }
1046     int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
1047     int32_t totalSize = icu::CollationDataWriter::writeTailoring(
1048             *t, *t->settings, indexes, dest, capacity, intStatus);
1049     if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
1050         intStatus = U_ZERO_ERROR;
1051         capacity = totalSize;
1052         dest = buffer.allocateInsteadAndCopy(capacity);
1053         if(dest == NULL) {
1054             fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
1055                     (long)capacity);
1056             *status = U_MEMORY_ALLOCATION_ERROR;
1057             res_close(result);
1058             return NULL;
1059         }
1060         totalSize = icu::CollationDataWriter::writeTailoring(
1061                 *t, *t->settings, indexes, dest, capacity, intStatus);
1062     }
1063     if(U_FAILURE(intStatus)) {
1064         fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
1065                 u_errorName(intStatus));
1066         res_close(result);
1067         return NULL;
1068     }
1069     if(isVerbose()) {
1070         printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
1071         icu::CollationInfo::printSizes(totalSize, indexes);
1072         if(t->settings->hasReordering()) {
1073             printf("%s~%s collation reordering ranges:\n", state->filename, collationType);
1074             icu::CollationInfo::printReorderRanges(
1075                     *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength);
1076         }
1077     }
1078     struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
1079     table_add(result, collationBin, line, status);
1080     if (U_FAILURE(*status)) {
1081         res_close(result);
1082         return NULL;
1083     }
1084 #endif
1085     return result;
1086 }
1087 
1088 static UBool
keepCollationType(const char * type)1089 keepCollationType(const char *type) {  // android-changed
1090     // BEGIN android-added
1091     if (uprv_strcmp(type, "big5han") == 0) { return FALSE; }
1092     if (uprv_strcmp(type, "gb2312han") == 0) { return FALSE; }
1093     // END android-added
1094     return TRUE;
1095 }
1096 
1097 static struct SResource *
parseCollationElements(ParseState * state,char * tag,uint32_t startline,UBool newCollation,UErrorCode * status)1098 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1099 {
1100     struct SResource  *result = NULL;
1101     struct SResource  *member = NULL;
1102     struct SResource  *collationRes = NULL;
1103     struct UString    *tokenValue;
1104     struct UString     comment;
1105     enum   ETokenType  token;
1106     char               subtag[1024], typeKeyword[1024];
1107     uint32_t           line;
1108 
1109     result = table_open(state->bundle, tag, NULL, status);
1110 
1111     if (result == NULL || U_FAILURE(*status))
1112     {
1113         return NULL;
1114     }
1115     if(isVerbose()){
1116         printf(" collation elements %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1117     }
1118     if(!newCollation) {
1119         return addCollation(state, result, "(no type)", startline, status);
1120     }
1121     else {
1122         for(;;) {
1123             ustr_init(&comment);
1124             token = getToken(state, &tokenValue, &comment, &line, status);
1125 
1126             if (token == TOK_CLOSE_BRACE)
1127             {
1128                 return result;
1129             }
1130 
1131             if (token != TOK_STRING)
1132             {
1133                 res_close(result);
1134                 *status = U_INVALID_FORMAT_ERROR;
1135 
1136                 if (token == TOK_EOF)
1137                 {
1138                     error(startline, "unterminated table");
1139                 }
1140                 else
1141                 {
1142                     error(line, "Unexpected token %s", tokenNames[token]);
1143                 }
1144 
1145                 return NULL;
1146             }
1147 
1148             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1149 
1150             if (U_FAILURE(*status))
1151             {
1152                 res_close(result);
1153                 return NULL;
1154             }
1155 
1156             if (uprv_strcmp(subtag, "default") == 0)
1157             {
1158                 member = parseResource(state, subtag, NULL, status);
1159 
1160                 if (U_FAILURE(*status))
1161                 {
1162                     res_close(result);
1163                     return NULL;
1164                 }
1165 
1166                 table_add(result, member, line, status);
1167             }
1168             else
1169             {
1170                 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1171                 /* this probably needs to be refactored or recursively use the parser */
1172                 /* first we assume that our collation table won't have the explicit type */
1173                 /* then, we cannot handle aliases */
1174                 if(token == TOK_OPEN_BRACE) {
1175                     token = getToken(state, &tokenValue, &comment, &line, status);
1176                     if (keepCollationType(subtag)) {
1177                         collationRes = table_open(state->bundle, subtag, NULL, status);
1178                     } else {
1179                         collationRes = NULL;
1180                     }
1181                     // need to parse the collation data regardless
1182                     collationRes = addCollation(state, collationRes, subtag, startline, status);
1183                     if (collationRes != NULL) {
1184                         table_add(result, collationRes, startline, status);
1185                     }
1186                 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1187                     /* we could have a table too */
1188                     token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1189                     u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1190                     if(uprv_strcmp(typeKeyword, "alias") == 0) {
1191                         member = parseResource(state, subtag, NULL, status);
1192                         if (U_FAILURE(*status))
1193                         {
1194                             res_close(result);
1195                             return NULL;
1196                         }
1197 
1198                         table_add(result, member, line, status);
1199                     } else {
1200                         res_close(result);
1201                         *status = U_INVALID_FORMAT_ERROR;
1202                         return NULL;
1203                     }
1204                 } else {
1205                     res_close(result);
1206                     *status = U_INVALID_FORMAT_ERROR;
1207                     return NULL;
1208                 }
1209             }
1210 
1211             /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1212 
1213             /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1214 
1215             if (U_FAILURE(*status))
1216             {
1217                 res_close(result);
1218                 return NULL;
1219             }
1220         }
1221     }
1222 }
1223 
1224 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1225    if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1226 static struct SResource *
realParseTable(ParseState * state,struct SResource * table,char * tag,uint32_t startline,UErrorCode * status)1227 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
1228 {
1229     struct SResource  *member = NULL;
1230     struct UString    *tokenValue=NULL;
1231     struct UString    comment;
1232     enum   ETokenType token;
1233     char              subtag[1024];
1234     uint32_t          line;
1235     UBool             readToken = FALSE;
1236 
1237     /* '{' . (name resource)* '}' */
1238 
1239     if(isVerbose()){
1240         printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1241     }
1242     for (;;)
1243     {
1244         ustr_init(&comment);
1245         token = getToken(state, &tokenValue, &comment, &line, status);
1246 
1247         if (token == TOK_CLOSE_BRACE)
1248         {
1249             if (!readToken) {
1250                 warning(startline, "Encountered empty table");
1251             }
1252             return table;
1253         }
1254 
1255         if (token != TOK_STRING)
1256         {
1257             *status = U_INVALID_FORMAT_ERROR;
1258 
1259             if (token == TOK_EOF)
1260             {
1261                 error(startline, "unterminated table");
1262             }
1263             else
1264             {
1265                 error(line, "unexpected token %s", tokenNames[token]);
1266             }
1267 
1268             return NULL;
1269         }
1270 
1271         if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1272             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1273         } else {
1274             *status = U_INVALID_FORMAT_ERROR;
1275             error(line, "invariant characters required for table keys");
1276             return NULL;
1277         }
1278 
1279         if (U_FAILURE(*status))
1280         {
1281             error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1282             return NULL;
1283         }
1284 
1285         member = parseResource(state, subtag, &comment, status);
1286 
1287         if (member == NULL || U_FAILURE(*status))
1288         {
1289             error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1290             return NULL;
1291         }
1292 
1293         table_add(table, member, line, status);
1294 
1295         if (U_FAILURE(*status))
1296         {
1297             error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1298             return NULL;
1299         }
1300         readToken = TRUE;
1301         ustr_deinit(&comment);
1302    }
1303 
1304     /* not reached */
1305     /* A compiler warning will appear if all paths don't contain a return statement. */
1306 /*     *status = U_INTERNAL_PROGRAM_ERROR;
1307      return NULL;*/
1308 }
1309 
1310 static struct SResource *
parseTable(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1311 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1312 {
1313     struct SResource *result;
1314 
1315     if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1316     {
1317         return parseCollationElements(state, tag, startline, FALSE, status);
1318     }
1319     if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1320     {
1321         return parseCollationElements(state, tag, startline, TRUE, status);
1322     }
1323     if(isVerbose()){
1324         printf(" table %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1325     }
1326 
1327     result = table_open(state->bundle, tag, comment, status);
1328 
1329     if (result == NULL || U_FAILURE(*status))
1330     {
1331         return NULL;
1332     }
1333     return realParseTable(state, result, tag, startline,  status);
1334 }
1335 
1336 static struct SResource *
parseArray(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1337 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1338 {
1339     struct SResource  *result = NULL;
1340     struct SResource  *member = NULL;
1341     struct UString    *tokenValue;
1342     struct UString    memberComments;
1343     enum   ETokenType token;
1344     UBool             readToken = FALSE;
1345 
1346     result = array_open(state->bundle, tag, comment, status);
1347 
1348     if (result == NULL || U_FAILURE(*status))
1349     {
1350         return NULL;
1351     }
1352     if(isVerbose()){
1353         printf(" array %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1354     }
1355 
1356     ustr_init(&memberComments);
1357 
1358     /* '{' . resource [','] '}' */
1359     for (;;)
1360     {
1361         /* reset length */
1362         ustr_setlen(&memberComments, 0, status);
1363 
1364         /* check for end of array, but don't consume next token unless it really is the end */
1365         token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1366 
1367 
1368         if (token == TOK_CLOSE_BRACE)
1369         {
1370             getToken(state, NULL, NULL, NULL, status);
1371             if (!readToken) {
1372                 warning(startline, "Encountered empty array");
1373             }
1374             break;
1375         }
1376 
1377         if (token == TOK_EOF)
1378         {
1379             res_close(result);
1380             *status = U_INVALID_FORMAT_ERROR;
1381             error(startline, "unterminated array");
1382             return NULL;
1383         }
1384 
1385         /* string arrays are a special case */
1386         if (token == TOK_STRING)
1387         {
1388             getToken(state, &tokenValue, &memberComments, NULL, status);
1389             member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1390         }
1391         else
1392         {
1393             member = parseResource(state, NULL, &memberComments, status);
1394         }
1395 
1396         if (member == NULL || U_FAILURE(*status))
1397         {
1398             res_close(result);
1399             return NULL;
1400         }
1401 
1402         array_add(result, member, status);
1403 
1404         if (U_FAILURE(*status))
1405         {
1406             res_close(result);
1407             return NULL;
1408         }
1409 
1410         /* eat optional comma if present */
1411         token = peekToken(state, 0, NULL, NULL, NULL, status);
1412 
1413         if (token == TOK_COMMA)
1414         {
1415             getToken(state, NULL, NULL, NULL, status);
1416         }
1417 
1418         if (U_FAILURE(*status))
1419         {
1420             res_close(result);
1421             return NULL;
1422         }
1423         readToken = TRUE;
1424     }
1425 
1426     ustr_deinit(&memberComments);
1427     return result;
1428 }
1429 
1430 static struct SResource *
parseIntVector(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1431 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1432 {
1433     struct SResource  *result = NULL;
1434     enum   ETokenType  token;
1435     char              *string;
1436     int32_t            value;
1437     UBool              readToken = FALSE;
1438     char              *stopstring;
1439     uint32_t           len;
1440     struct UString     memberComments;
1441 
1442     result = intvector_open(state->bundle, tag, comment, status);
1443 
1444     if (result == NULL || U_FAILURE(*status))
1445     {
1446         return NULL;
1447     }
1448 
1449     if(isVerbose()){
1450         printf(" vector %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1451     }
1452     ustr_init(&memberComments);
1453     /* '{' . string [','] '}' */
1454     for (;;)
1455     {
1456         ustr_setlen(&memberComments, 0, status);
1457 
1458         /* check for end of array, but don't consume next token unless it really is the end */
1459         token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1460 
1461         if (token == TOK_CLOSE_BRACE)
1462         {
1463             /* it's the end, consume the close brace */
1464             getToken(state, NULL, NULL, NULL, status);
1465             if (!readToken) {
1466                 warning(startline, "Encountered empty int vector");
1467             }
1468             ustr_deinit(&memberComments);
1469             return result;
1470         }
1471 
1472         string = getInvariantString(state, NULL, NULL, status);
1473 
1474         if (U_FAILURE(*status))
1475         {
1476             res_close(result);
1477             return NULL;
1478         }
1479 
1480         /* For handling illegal char in the Intvector */
1481         value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1482         len=(uint32_t)(stopstring-string);
1483 
1484         if(len==uprv_strlen(string))
1485         {
1486             intvector_add(result, value, status);
1487             uprv_free(string);
1488             token = peekToken(state, 0, NULL, NULL, NULL, status);
1489         }
1490         else
1491         {
1492             uprv_free(string);
1493             *status=U_INVALID_CHAR_FOUND;
1494         }
1495 
1496         if (U_FAILURE(*status))
1497         {
1498             res_close(result);
1499             return NULL;
1500         }
1501 
1502         /* the comma is optional (even though it is required to prevent the reader from concatenating
1503         consecutive entries) so that a missing comma on the last entry isn't an error */
1504         if (token == TOK_COMMA)
1505         {
1506             getToken(state, NULL, NULL, NULL, status);
1507         }
1508         readToken = TRUE;
1509     }
1510 
1511     /* not reached */
1512     /* A compiler warning will appear if all paths don't contain a return statement. */
1513 /*    intvector_close(result, status);
1514     *status = U_INTERNAL_PROGRAM_ERROR;
1515     return NULL;*/
1516 }
1517 
1518 static struct SResource *
parseBinary(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1519 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1520 {
1521     struct SResource *result = NULL;
1522     uint8_t          *value;
1523     char             *string;
1524     char              toConv[3] = {'\0', '\0', '\0'};
1525     uint32_t          count;
1526     uint32_t          i;
1527     uint32_t          line;
1528     char             *stopstring;
1529     uint32_t          len;
1530 
1531     string = getInvariantString(state, &line, NULL, status);
1532 
1533     if (string == NULL || U_FAILURE(*status))
1534     {
1535         return NULL;
1536     }
1537 
1538     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1539 
1540     if (U_FAILURE(*status))
1541     {
1542         uprv_free(string);
1543         return NULL;
1544     }
1545 
1546     if(isVerbose()){
1547         printf(" binary %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1548     }
1549 
1550     count = (uint32_t)uprv_strlen(string);
1551     if (count > 0){
1552         if((count % 2)==0){
1553             value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
1554 
1555             if (value == NULL)
1556             {
1557                 uprv_free(string);
1558                 *status = U_MEMORY_ALLOCATION_ERROR;
1559                 return NULL;
1560             }
1561 
1562             for (i = 0; i < count; i += 2)
1563             {
1564                 toConv[0] = string[i];
1565                 toConv[1] = string[i + 1];
1566 
1567                 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1568                 len=(uint32_t)(stopstring-toConv);
1569 
1570                 if(len!=uprv_strlen(toConv))
1571                 {
1572                     uprv_free(string);
1573                     *status=U_INVALID_CHAR_FOUND;
1574                     return NULL;
1575                 }
1576             }
1577 
1578             result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
1579 
1580             uprv_free(value);
1581         }
1582         else
1583         {
1584             *status = U_INVALID_CHAR_FOUND;
1585             uprv_free(string);
1586             error(line, "Encountered invalid binary string");
1587             return NULL;
1588         }
1589     }
1590     else
1591     {
1592         result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
1593         warning(startline, "Encountered empty binary tag");
1594     }
1595     uprv_free(string);
1596 
1597     return result;
1598 }
1599 
1600 static struct SResource *
parseInteger(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1601 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1602 {
1603     struct SResource *result = NULL;
1604     int32_t           value;
1605     char             *string;
1606     char             *stopstring;
1607     uint32_t          len;
1608 
1609     string = getInvariantString(state, NULL, NULL, status);
1610 
1611     if (string == NULL || U_FAILURE(*status))
1612     {
1613         return NULL;
1614     }
1615 
1616     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1617 
1618     if (U_FAILURE(*status))
1619     {
1620         uprv_free(string);
1621         return NULL;
1622     }
1623 
1624     if(isVerbose()){
1625         printf(" integer %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1626     }
1627 
1628     if (uprv_strlen(string) <= 0)
1629     {
1630         warning(startline, "Encountered empty integer. Default value is 0.");
1631     }
1632 
1633     /* Allow integer support for hexdecimal, octal digit and decimal*/
1634     /* and handle illegal char in the integer*/
1635     value = uprv_strtoul(string, &stopstring, 0);
1636     len=(uint32_t)(stopstring-string);
1637     if(len==uprv_strlen(string))
1638     {
1639         result = int_open(state->bundle, tag, value, comment, status);
1640     }
1641     else
1642     {
1643         *status=U_INVALID_CHAR_FOUND;
1644     }
1645     uprv_free(string);
1646 
1647     return result;
1648 }
1649 
1650 static struct SResource *
parseImport(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1651 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1652 {
1653     struct SResource *result;
1654     FileStream       *file;
1655     int32_t           len;
1656     uint8_t          *data;
1657     char             *filename;
1658     uint32_t          line;
1659     char     *fullname = NULL;
1660     filename = getInvariantString(state, &line, NULL, status);
1661 
1662     if (U_FAILURE(*status))
1663     {
1664         return NULL;
1665     }
1666 
1667     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1668 
1669     if (U_FAILURE(*status))
1670     {
1671         uprv_free(filename);
1672         return NULL;
1673     }
1674 
1675     if(isVerbose()){
1676         printf(" import %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1677     }
1678 
1679     /* Open the input file for reading */
1680     if (state->inputdir == NULL)
1681     {
1682 #if 1
1683         /*
1684          * Always save file file name, even if there's
1685          * no input directory specified. MIGHT BREAK SOMETHING
1686          */
1687         int32_t filenameLength = uprv_strlen(filename);
1688 
1689         fullname = (char *) uprv_malloc(filenameLength + 1);
1690         uprv_strcpy(fullname, filename);
1691 #endif
1692 
1693         file = T_FileStream_open(filename, "rb");
1694     }
1695     else
1696     {
1697 
1698         int32_t  count     = (int32_t)uprv_strlen(filename);
1699 
1700         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1701         {
1702             fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1703 
1704             /* test for NULL */
1705             if(fullname == NULL)
1706             {
1707                 *status = U_MEMORY_ALLOCATION_ERROR;
1708                 return NULL;
1709             }
1710 
1711             uprv_strcpy(fullname, state->inputdir);
1712 
1713             fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
1714             fullname[state->inputdirLength + 1] = '\0';
1715 
1716             uprv_strcat(fullname, filename);
1717         }
1718         else
1719         {
1720             fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
1721 
1722             /* test for NULL */
1723             if(fullname == NULL)
1724             {
1725                 *status = U_MEMORY_ALLOCATION_ERROR;
1726                 return NULL;
1727             }
1728 
1729             uprv_strcpy(fullname, state->inputdir);
1730             uprv_strcat(fullname, filename);
1731         }
1732 
1733         file = T_FileStream_open(fullname, "rb");
1734 
1735     }
1736 
1737     if (file == NULL)
1738     {
1739         error(line, "couldn't open input file %s", filename);
1740         *status = U_FILE_ACCESS_ERROR;
1741         return NULL;
1742     }
1743 
1744     len  = T_FileStream_size(file);
1745     data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1746     /* test for NULL */
1747     if(data == NULL)
1748     {
1749         *status = U_MEMORY_ALLOCATION_ERROR;
1750         T_FileStream_close (file);
1751         return NULL;
1752     }
1753 
1754     /* int32_t numRead = */ T_FileStream_read  (file, data, len);
1755     T_FileStream_close (file);
1756 
1757     result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
1758 
1759     uprv_free(data);
1760     uprv_free(filename);
1761     uprv_free(fullname);
1762 
1763     return result;
1764 }
1765 
1766 static struct SResource *
parseInclude(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1767 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1768 {
1769     struct SResource *result;
1770     int32_t           len=0;
1771     char             *filename;
1772     uint32_t          line;
1773     UChar *pTarget     = NULL;
1774 
1775     UCHARBUF *ucbuf;
1776     char     *fullname = NULL;
1777     int32_t  count     = 0;
1778     const char* cp = NULL;
1779     const UChar* uBuffer = NULL;
1780 
1781     filename = getInvariantString(state, &line, NULL, status);
1782     count     = (int32_t)uprv_strlen(filename);
1783 
1784     if (U_FAILURE(*status))
1785     {
1786         return NULL;
1787     }
1788 
1789     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1790 
1791     if (U_FAILURE(*status))
1792     {
1793         uprv_free(filename);
1794         return NULL;
1795     }
1796 
1797     if(isVerbose()){
1798         printf(" include %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1799     }
1800 
1801     fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1802     /* test for NULL */
1803     if(fullname == NULL)
1804     {
1805         *status = U_MEMORY_ALLOCATION_ERROR;
1806         uprv_free(filename);
1807         return NULL;
1808     }
1809 
1810     if(state->inputdir!=NULL){
1811         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1812         {
1813 
1814             uprv_strcpy(fullname, state->inputdir);
1815 
1816             fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
1817             fullname[state->inputdirLength + 1] = '\0';
1818 
1819             uprv_strcat(fullname, filename);
1820         }
1821         else
1822         {
1823             uprv_strcpy(fullname, state->inputdir);
1824             uprv_strcat(fullname, filename);
1825         }
1826     }else{
1827         uprv_strcpy(fullname,filename);
1828     }
1829 
1830     ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1831 
1832     if (U_FAILURE(*status)) {
1833         error(line, "couldn't open input file %s\n", filename);
1834         return NULL;
1835     }
1836 
1837     uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1838     result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1839 
1840     ucbuf_close(ucbuf);
1841 
1842     uprv_free(pTarget);
1843 
1844     uprv_free(filename);
1845     uprv_free(fullname);
1846 
1847     return result;
1848 }
1849 
1850 
1851 
1852 
1853 
1854 U_STRING_DECL(k_type_string,    "string",    6);
1855 U_STRING_DECL(k_type_binary,    "binary",    6);
1856 U_STRING_DECL(k_type_bin,       "bin",       3);
1857 U_STRING_DECL(k_type_table,     "table",     5);
1858 U_STRING_DECL(k_type_table_no_fallback,     "table(nofallback)",         17);
1859 U_STRING_DECL(k_type_int,       "int",       3);
1860 U_STRING_DECL(k_type_integer,   "integer",   7);
1861 U_STRING_DECL(k_type_array,     "array",     5);
1862 U_STRING_DECL(k_type_alias,     "alias",     5);
1863 U_STRING_DECL(k_type_intvector, "intvector", 9);
1864 U_STRING_DECL(k_type_import,    "import",    6);
1865 U_STRING_DECL(k_type_include,   "include",   7);
1866 
1867 /* Various non-standard processing plugins that create one or more special resources. */
1868 U_STRING_DECL(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
1869 U_STRING_DECL(k_type_plugin_collation,      "process(collation)",        18);
1870 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)",   23);
1871 U_STRING_DECL(k_type_plugin_dependency,     "process(dependency)",       19);
1872 
1873 typedef enum EResourceType
1874 {
1875     RESTYPE_UNKNOWN,
1876     RESTYPE_STRING,
1877     RESTYPE_BINARY,
1878     RESTYPE_TABLE,
1879     RESTYPE_TABLE_NO_FALLBACK,
1880     RESTYPE_INTEGER,
1881     RESTYPE_ARRAY,
1882     RESTYPE_ALIAS,
1883     RESTYPE_INTVECTOR,
1884     RESTYPE_IMPORT,
1885     RESTYPE_INCLUDE,
1886     RESTYPE_PROCESS_UCA_RULES,
1887     RESTYPE_PROCESS_COLLATION,
1888     RESTYPE_PROCESS_TRANSLITERATOR,
1889     RESTYPE_PROCESS_DEPENDENCY,
1890     RESTYPE_RESERVED
1891 } EResourceType;
1892 
1893 static struct {
1894     const char *nameChars;   /* only used for debugging */
1895     const UChar *nameUChars;
1896     ParseResourceFunction *parseFunction;
1897 } gResourceTypes[] = {
1898     {"Unknown", NULL, NULL},
1899     {"string", k_type_string, parseString},
1900     {"binary", k_type_binary, parseBinary},
1901     {"table", k_type_table, parseTable},
1902     {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1903     {"integer", k_type_integer, parseInteger},
1904     {"array", k_type_array, parseArray},
1905     {"alias", k_type_alias, parseAlias},
1906     {"intvector", k_type_intvector, parseIntVector},
1907     {"import", k_type_import, parseImport},
1908     {"include", k_type_include, parseInclude},
1909     {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1910     {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1911     {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1912     {"process(dependency)", k_type_plugin_dependency, parseDependency},
1913     {"reserved", NULL, NULL}
1914 };
1915 
initParser()1916 void initParser()
1917 {
1918     U_STRING_INIT(k_type_string,    "string",    6);
1919     U_STRING_INIT(k_type_binary,    "binary",    6);
1920     U_STRING_INIT(k_type_bin,       "bin",       3);
1921     U_STRING_INIT(k_type_table,     "table",     5);
1922     U_STRING_INIT(k_type_table_no_fallback,     "table(nofallback)",         17);
1923     U_STRING_INIT(k_type_int,       "int",       3);
1924     U_STRING_INIT(k_type_integer,   "integer",   7);
1925     U_STRING_INIT(k_type_array,     "array",     5);
1926     U_STRING_INIT(k_type_alias,     "alias",     5);
1927     U_STRING_INIT(k_type_intvector, "intvector", 9);
1928     U_STRING_INIT(k_type_import,    "import",    6);
1929     U_STRING_INIT(k_type_include,   "include",   7);
1930 
1931     U_STRING_INIT(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
1932     U_STRING_INIT(k_type_plugin_collation,      "process(collation)",        18);
1933     U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)",   23);
1934     U_STRING_INIT(k_type_plugin_dependency,     "process(dependency)",       19);
1935 }
1936 
isTable(enum EResourceType type)1937 static inline UBool isTable(enum EResourceType type) {
1938     return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
1939 }
1940 
1941 static enum EResourceType
parseResourceType(ParseState * state,UErrorCode * status)1942 parseResourceType(ParseState* state, UErrorCode *status)
1943 {
1944     struct UString        *tokenValue;
1945     struct UString        comment;
1946     enum   EResourceType  result = RESTYPE_UNKNOWN;
1947     uint32_t              line=0;
1948     ustr_init(&comment);
1949     expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1950 
1951     if (U_FAILURE(*status))
1952     {
1953         return RESTYPE_UNKNOWN;
1954     }
1955 
1956     *status = U_ZERO_ERROR;
1957 
1958     /* Search for normal types */
1959     result=RESTYPE_UNKNOWN;
1960     while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
1961         if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1962             break;
1963         }
1964     }
1965     /* Now search for the aliases */
1966     if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1967         result = RESTYPE_INTEGER;
1968     }
1969     else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1970         result = RESTYPE_BINARY;
1971     }
1972     else if (result == RESTYPE_RESERVED) {
1973         char tokenBuffer[1024];
1974         u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1975         tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1976         *status = U_INVALID_FORMAT_ERROR;
1977         error(line, "unknown resource type '%s'", tokenBuffer);
1978     }
1979 
1980     return result;
1981 }
1982 
1983 /* parse a non-top-level resource */
1984 static struct SResource *
parseResource(ParseState * state,char * tag,const struct UString * comment,UErrorCode * status)1985 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1986 {
1987     enum   ETokenType      token;
1988     enum   EResourceType  resType = RESTYPE_UNKNOWN;
1989     ParseResourceFunction *parseFunction = NULL;
1990     struct UString        *tokenValue;
1991     uint32_t                 startline;
1992     uint32_t                 line;
1993 
1994 
1995     token = getToken(state, &tokenValue, NULL, &startline, status);
1996 
1997     if(isVerbose()){
1998         printf(" resource %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1999     }
2000 
2001     /* name . [ ':' type ] '{' resource '}' */
2002     /* This function parses from the colon onwards.  If the colon is present, parse the
2003     type then try to parse a resource of that type.  If there is no explicit type,
2004     work it out using the lookahead tokens. */
2005     switch (token)
2006     {
2007     case TOK_EOF:
2008         *status = U_INVALID_FORMAT_ERROR;
2009         error(startline, "Unexpected EOF encountered");
2010         return NULL;
2011 
2012     case TOK_ERROR:
2013         *status = U_INVALID_FORMAT_ERROR;
2014         return NULL;
2015 
2016     case TOK_COLON:
2017         resType = parseResourceType(state, status);
2018         expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
2019 
2020         if (U_FAILURE(*status))
2021         {
2022             return NULL;
2023         }
2024 
2025         break;
2026 
2027     case TOK_OPEN_BRACE:
2028         break;
2029 
2030     default:
2031         *status = U_INVALID_FORMAT_ERROR;
2032         error(startline, "syntax error while reading a resource, expected '{' or ':'");
2033         return NULL;
2034     }
2035 
2036 
2037     if (resType == RESTYPE_UNKNOWN)
2038     {
2039         /* No explicit type, so try to work it out.  At this point, we've read the first '{'.
2040         We could have any of the following:
2041         { {         => array (nested)
2042         { :/}       => array
2043         { string ,  => string array
2044 
2045         { string {  => table
2046 
2047         { string :/{    => table
2048         { string }      => string
2049         */
2050 
2051         token = peekToken(state, 0, NULL, &line, NULL,status);
2052 
2053         if (U_FAILURE(*status))
2054         {
2055             return NULL;
2056         }
2057 
2058         if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
2059         {
2060             resType = RESTYPE_ARRAY;
2061         }
2062         else if (token == TOK_STRING)
2063         {
2064             token = peekToken(state, 1, NULL, &line, NULL, status);
2065 
2066             if (U_FAILURE(*status))
2067             {
2068                 return NULL;
2069             }
2070 
2071             switch (token)
2072             {
2073             case TOK_COMMA:         resType = RESTYPE_ARRAY;  break;
2074             case TOK_OPEN_BRACE:    resType = RESTYPE_TABLE;  break;
2075             case TOK_CLOSE_BRACE:   resType = RESTYPE_STRING; break;
2076             case TOK_COLON:         resType = RESTYPE_TABLE;  break;
2077             default:
2078                 *status = U_INVALID_FORMAT_ERROR;
2079                 error(line, "Unexpected token after string, expected ',', '{' or '}'");
2080                 return NULL;
2081             }
2082         }
2083         else
2084         {
2085             *status = U_INVALID_FORMAT_ERROR;
2086             error(line, "Unexpected token after '{'");
2087             return NULL;
2088         }
2089 
2090         /* printf("Type guessed as %s\n", resourceNames[resType]); */
2091     } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
2092         *status = U_INVALID_FORMAT_ERROR;
2093         error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2094         return NULL;
2095     }
2096 
2097 
2098     /* We should now know what we need to parse next, so call the appropriate parser
2099     function and return. */
2100     parseFunction = gResourceTypes[resType].parseFunction;
2101     if (parseFunction != NULL) {
2102         return parseFunction(state, tag, startline, comment, status);
2103     }
2104     else {
2105         *status = U_INTERNAL_PROGRAM_ERROR;
2106         error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
2107     }
2108 
2109     return NULL;
2110 }
2111 
2112 /* parse the top-level resource */
2113 struct SRBRoot *
parse(UCHARBUF * buf,const char * inputDir,const char * outputDir,const char * filename,UBool makeBinaryCollation,UBool omitCollationRules,UErrorCode * status)2114 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
2115       UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
2116 {
2117     struct UString    *tokenValue;
2118     struct UString    comment;
2119     uint32_t           line;
2120     enum EResourceType bundleType;
2121     enum ETokenType    token;
2122     ParseState state;
2123     uint32_t i;
2124 
2125 
2126     for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2127     {
2128         ustr_init(&state.lookahead[i].value);
2129         ustr_init(&state.lookahead[i].comment);
2130     }
2131 
2132     initLookahead(&state, buf, status);
2133 
2134     state.inputdir       = inputDir;
2135     state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2136     state.outputdir       = outputDir;
2137     state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
2138     state.filename = filename;
2139     state.makeBinaryCollation = makeBinaryCollation;
2140     state.omitCollationRules = omitCollationRules;
2141 
2142     ustr_init(&comment);
2143     expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
2144 
2145     state.bundle = bundle_open(&comment, FALSE, status);
2146 
2147     if (state.bundle == NULL || U_FAILURE(*status))
2148     {
2149         return NULL;
2150     }
2151 
2152 
2153     bundle_setlocale(state.bundle, tokenValue->fChars, status);
2154 
2155     /* The following code is to make Empty bundle work no matter with :table specifer or not */
2156     token = getToken(&state, NULL, NULL, &line, status);
2157     if(token==TOK_COLON) {
2158         *status=U_ZERO_ERROR;
2159         bundleType=parseResourceType(&state, status);
2160 
2161         if(isTable(bundleType))
2162         {
2163             expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2164         }
2165         else
2166         {
2167             *status=U_PARSE_ERROR;
2168              error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2169         }
2170     }
2171     else
2172     {
2173         /* not a colon */
2174         if(token==TOK_OPEN_BRACE)
2175         {
2176             *status=U_ZERO_ERROR;
2177             bundleType=RESTYPE_TABLE;
2178         }
2179         else
2180         {
2181             /* neither colon nor open brace */
2182             *status=U_PARSE_ERROR;
2183             bundleType=RESTYPE_UNKNOWN;
2184             error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2185         }
2186     }
2187 
2188     if (U_FAILURE(*status))
2189     {
2190         bundle_close(state.bundle, status);
2191         return NULL;
2192     }
2193 
2194     if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
2195         /*
2196          * Parse a top-level table with the table(nofallback) declaration.
2197          * This is the same as a regular table, but also sets the
2198          * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2199          */
2200         state.bundle->noFallback=TRUE;
2201     }
2202     /* top-level tables need not handle special table names like "collations" */
2203     realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2204     if(dependencyArray!=NULL){
2205         table_add(state.bundle->fRoot, dependencyArray, 0, status);
2206         dependencyArray = NULL;
2207     }
2208    if (U_FAILURE(*status))
2209     {
2210         bundle_close(state.bundle, status);
2211         res_close(dependencyArray);
2212         return NULL;
2213     }
2214 
2215     if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2216     {
2217         warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2218         if(isStrict()){
2219             *status = U_INVALID_FORMAT_ERROR;
2220             return NULL;
2221         }
2222     }
2223 
2224     cleanupLookahead(&state);
2225     ustr_deinit(&comment);
2226     return state.bundle;
2227 }
2228