• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1998-2014, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File parse.cpp
10 *
11 * Modification History:
12 *
13 *   Date          Name          Description
14 *   05/26/99     stephen       Creation.
15 *   02/25/00     weiv          Overhaul to write udata
16 *   5/10/01      Ram           removed ustdio dependency
17 *   06/10/2001  Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
19 */
20 
21 // Safer use of UnicodeString.
22 #ifndef UNISTR_FROM_CHAR_EXPLICIT
23 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
24 #endif
25 
26 // Less important, but still a good idea.
27 #ifndef UNISTR_FROM_STRING_EXPLICIT
28 #   define UNISTR_FROM_STRING_EXPLICIT explicit
29 #endif
30 
31 #include "parse.h"
32 #include "errmsg.h"
33 #include "uhash.h"
34 #include "cmemory.h"
35 #include "cstring.h"
36 #include "uinvchar.h"
37 #include "read.h"
38 #include "ustr.h"
39 #include "reslist.h"
40 #include "rbt_pars.h"
41 #include "genrb.h"
42 #include "unicode/ustring.h"
43 #include "unicode/uscript.h"
44 #include "unicode/utf16.h"
45 #include "unicode/putil.h"
46 #include "collationbuilder.h"
47 #include "collationdata.h"
48 #include "collationdatareader.h"
49 #include "collationdatawriter.h"
50 #include "collationfastlatinbuilder.h"
51 #include "collationinfo.h"
52 #include "collationroot.h"
53 #include "collationruleparser.h"
54 #include "collationtailoring.h"
55 #include <stdio.h>
56 
57 /* Number of tokens to read ahead of the current stream position */
58 #define MAX_LOOKAHEAD   3
59 
60 #define CR               0x000D
61 #define LF               0x000A
62 #define SPACE            0x0020
63 #define TAB              0x0009
64 #define ESCAPE           0x005C
65 #define HASH             0x0023
66 #define QUOTE            0x0027
67 #define ZERO             0x0030
68 #define STARTCOMMAND     0x005B
69 #define ENDCOMMAND       0x005D
70 #define OPENSQBRACKET    0x005B
71 #define CLOSESQBRACKET   0x005D
72 
73 using icu::LocalPointer;
74 using icu::UnicodeString;
75 
76 struct Lookahead
77 {
78      enum   ETokenType type;
79      struct UString    value;
80      struct UString    comment;
81      uint32_t          line;
82 };
83 
84 /* keep in sync with token defines in read.h */
85 const char *tokenNames[TOK_TOKEN_COUNT] =
86 {
87      "string",             /* A string token, such as "MonthNames" */
88      "'{'",                 /* An opening brace character */
89      "'}'",                 /* A closing brace character */
90      "','",                 /* A comma */
91      "':'",                 /* A colon */
92 
93      "<end of file>",     /* End of the file has been reached successfully */
94      "<end of line>"
95 };
96 
97 /* Just to store "TRUE" */
98 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
99 
100 typedef struct {
101     struct Lookahead  lookahead[MAX_LOOKAHEAD + 1];
102     uint32_t          lookaheadPosition;
103     UCHARBUF         *buffer;
104     struct SRBRoot *bundle;
105     const char     *inputdir;
106     uint32_t        inputdirLength;
107     const char     *outputdir;
108     uint32_t        outputdirLength;
109     const char     *filename;
110     UBool           makeBinaryCollation;
111     UBool           omitCollationRules;
112 } ParseState;
113 
114 typedef struct SResource *
115 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
116 
117 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
118 
119 /* The nature of the lookahead buffer:
120    There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer.  This provides
121    MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
122    When getToken is called, the current pointer is moved to the next slot and the
123    old slot is filled with the next token from the reader by calling getNextToken.
124    The token values are stored in the slot, which means that token values don't
125    survive a call to getToken, ie.
126 
127    UString *value;
128 
129    getToken(&value, NULL, status);
130    getToken(NULL,   NULL, status);       bad - value is now a different string
131 */
132 static void
initLookahead(ParseState * state,UCHARBUF * buf,UErrorCode * status)133 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
134 {
135     static uint32_t initTypeStrings = 0;
136     uint32_t i;
137 
138     if (!initTypeStrings)
139     {
140         initTypeStrings = 1;
141     }
142 
143     state->lookaheadPosition   = 0;
144     state->buffer              = buf;
145 
146     resetLineNumber();
147 
148     for (i = 0; i < MAX_LOOKAHEAD; i++)
149     {
150         state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
151         if (U_FAILURE(*status))
152         {
153             return;
154         }
155     }
156 
157     *status = U_ZERO_ERROR;
158 }
159 
160 static void
cleanupLookahead(ParseState * state)161 cleanupLookahead(ParseState* state)
162 {
163     uint32_t i;
164     for (i = 0; i <= MAX_LOOKAHEAD; i++)
165     {
166         ustr_deinit(&state->lookahead[i].value);
167         ustr_deinit(&state->lookahead[i].comment);
168     }
169 
170 }
171 
172 static enum ETokenType
getToken(ParseState * state,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)173 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
174 {
175     enum ETokenType result;
176     uint32_t          i;
177 
178     result = state->lookahead[state->lookaheadPosition].type;
179 
180     if (tokenValue != NULL)
181     {
182         *tokenValue = &state->lookahead[state->lookaheadPosition].value;
183     }
184 
185     if (linenumber != NULL)
186     {
187         *linenumber = state->lookahead[state->lookaheadPosition].line;
188     }
189 
190     if (comment != NULL)
191     {
192         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
193     }
194 
195     i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
196     state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
197     ustr_setlen(&state->lookahead[i].comment, 0, status);
198     ustr_setlen(&state->lookahead[i].value, 0, status);
199     state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
200 
201     /* printf("getToken, returning %s\n", tokenNames[result]); */
202 
203     return result;
204 }
205 
206 static enum ETokenType
peekToken(ParseState * state,uint32_t lookaheadCount,struct UString ** tokenValue,uint32_t * linenumber,struct UString * comment,UErrorCode * status)207 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
208 {
209     uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
210 
211     if (U_FAILURE(*status))
212     {
213         return TOK_ERROR;
214     }
215 
216     if (lookaheadCount >= MAX_LOOKAHEAD)
217     {
218         *status = U_INTERNAL_PROGRAM_ERROR;
219         return TOK_ERROR;
220     }
221 
222     if (tokenValue != NULL)
223     {
224         *tokenValue = &state->lookahead[i].value;
225     }
226 
227     if (linenumber != NULL)
228     {
229         *linenumber = state->lookahead[i].line;
230     }
231 
232     if(comment != NULL){
233         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
234     }
235 
236     return state->lookahead[i].type;
237 }
238 
239 static void
expect(ParseState * state,enum ETokenType expectedToken,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)240 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
241 {
242     uint32_t        line;
243 
244     enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
245 
246     if (linenumber != NULL)
247     {
248         *linenumber = line;
249     }
250 
251     if (U_FAILURE(*status))
252     {
253         return;
254     }
255 
256     if (token != expectedToken)
257     {
258         *status = U_INVALID_FORMAT_ERROR;
259         error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
260     }
261     else
262     {
263         *status = U_ZERO_ERROR;
264     }
265 }
266 
getInvariantString(ParseState * state,uint32_t * line,struct UString * comment,UErrorCode * status)267 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
268 {
269     struct UString *tokenValue;
270     char           *result;
271     uint32_t        count;
272 
273     expect(state, TOK_STRING, &tokenValue, comment, line, status);
274 
275     if (U_FAILURE(*status))
276     {
277         return NULL;
278     }
279 
280     count = u_strlen(tokenValue->fChars);
281     if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
282         *status = U_INVALID_FORMAT_ERROR;
283         error(*line, "invariant characters required for table keys, binary data, etc.");
284         return NULL;
285     }
286 
287     result = static_cast<char *>(uprv_malloc(count+1));
288 
289     if (result == NULL)
290     {
291         *status = U_MEMORY_ALLOCATION_ERROR;
292         return NULL;
293     }
294 
295     u_UCharsToChars(tokenValue->fChars, result, count+1);
296     return result;
297 }
298 
299 static struct SResource *
parseUCARules(ParseState * state,char * tag,uint32_t startline,const struct UString *,UErrorCode * status)300 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
301 {
302     struct SResource *result = NULL;
303     struct UString   *tokenValue;
304     FileStream       *file          = NULL;
305     char              filename[256] = { '\0' };
306     char              cs[128]       = { '\0' };
307     uint32_t          line;
308     UBool quoted = FALSE;
309     UCHARBUF *ucbuf=NULL;
310     UChar32   c     = 0;
311     const char* cp  = NULL;
312     UChar *pTarget     = NULL;
313     UChar *target      = NULL;
314     UChar *targetLimit = NULL;
315     int32_t size = 0;
316 
317     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
318 
319     if(isVerbose()){
320         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
321     }
322 
323     if (U_FAILURE(*status))
324     {
325         return NULL;
326     }
327     /* make the filename including the directory */
328     if (state->inputdir != NULL)
329     {
330         uprv_strcat(filename, state->inputdir);
331 
332         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
333         {
334             uprv_strcat(filename, U_FILE_SEP_STRING);
335         }
336     }
337 
338     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
339 
340     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
341 
342     if (U_FAILURE(*status))
343     {
344         return NULL;
345     }
346     uprv_strcat(filename, cs);
347 
348     if(state->omitCollationRules) {
349         return res_none();
350     }
351 
352     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
353 
354     if (U_FAILURE(*status)) {
355         error(line, "An error occured while opening the input file %s\n", filename);
356         return NULL;
357     }
358 
359     /* We allocate more space than actually required
360     * since the actual size needed for storing UChars
361     * is not known in UTF-8 byte stream
362     */
363     size        = ucbuf_size(ucbuf) + 1;
364     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
365     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
366     target      = pTarget;
367     targetLimit = pTarget+size;
368 
369     /* read the rules into the buffer */
370     while (target < targetLimit)
371     {
372         c = ucbuf_getc(ucbuf, status);
373         if(c == QUOTE) {
374             quoted = (UBool)!quoted;
375         }
376         /* weiv (06/26/2002): adding the following:
377          * - preserving spaces in commands [...]
378          * - # comments until the end of line
379          */
380         if (c == STARTCOMMAND && !quoted)
381         {
382             /* preserve commands
383              * closing bracket will be handled by the
384              * append at the end of the loop
385              */
386             while(c != ENDCOMMAND) {
387                 U_APPEND_CHAR32_ONLY(c, target);
388                 c = ucbuf_getc(ucbuf, status);
389             }
390         }
391         else if (c == HASH && !quoted) {
392             /* skip comments */
393             while(c != CR && c != LF) {
394                 c = ucbuf_getc(ucbuf, status);
395             }
396             continue;
397         }
398         else if (c == ESCAPE)
399         {
400             c = unescape(ucbuf, status);
401 
402             if (c == (UChar32)U_ERR)
403             {
404                 uprv_free(pTarget);
405                 T_FileStream_close(file);
406                 return NULL;
407             }
408         }
409         else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
410         {
411             /* ignore spaces carriage returns
412             * and line feed unless in the form \uXXXX
413             */
414             continue;
415         }
416 
417         /* Append UChar * after dissembling if c > 0xffff*/
418         if (c != (UChar32)U_EOF)
419         {
420             U_APPEND_CHAR32_ONLY(c, target);
421         }
422         else
423         {
424             break;
425         }
426     }
427 
428     /* terminate the string */
429     if(target < targetLimit){
430         *target = 0x0000;
431     }
432 
433     result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
434 
435 
436     ucbuf_close(ucbuf);
437     uprv_free(pTarget);
438     T_FileStream_close(file);
439 
440     return result;
441 }
442 
443 static struct SResource *
parseTransliterator(ParseState * state,char * tag,uint32_t startline,const struct UString *,UErrorCode * status)444 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
445 {
446     struct SResource *result = NULL;
447     struct UString   *tokenValue;
448     FileStream       *file          = NULL;
449     char              filename[256] = { '\0' };
450     char              cs[128]       = { '\0' };
451     uint32_t          line;
452     UCHARBUF *ucbuf=NULL;
453     const char* cp  = NULL;
454     UChar *pTarget     = NULL;
455     const UChar *pSource     = NULL;
456     int32_t size = 0;
457 
458     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
459 
460     if(isVerbose()){
461         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
462     }
463 
464     if (U_FAILURE(*status))
465     {
466         return NULL;
467     }
468     /* make the filename including the directory */
469     if (state->inputdir != NULL)
470     {
471         uprv_strcat(filename, state->inputdir);
472 
473         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
474         {
475             uprv_strcat(filename, U_FILE_SEP_STRING);
476         }
477     }
478 
479     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
480 
481     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
482 
483     if (U_FAILURE(*status))
484     {
485         return NULL;
486     }
487     uprv_strcat(filename, cs);
488 
489 
490     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
491 
492     if (U_FAILURE(*status)) {
493         error(line, "An error occured while opening the input file %s\n", filename);
494         return NULL;
495     }
496 
497     /* We allocate more space than actually required
498     * since the actual size needed for storing UChars
499     * is not known in UTF-8 byte stream
500     */
501     pSource = ucbuf_getBuffer(ucbuf, &size, status);
502     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
503     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
504 
505 #if !UCONFIG_NO_TRANSLITERATION
506     size = utrans_stripRules(pSource, size, pTarget, status);
507 #else
508     size = 0;
509     fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
510 #endif
511     result = string_open(state->bundle, tag, pTarget, size, NULL, status);
512 
513     ucbuf_close(ucbuf);
514     uprv_free(pTarget);
515     T_FileStream_close(file);
516 
517     return result;
518 }
519 static struct SResource* dependencyArray = NULL;
520 
521 static struct SResource *
parseDependency(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)522 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
523 {
524     struct SResource *result = NULL;
525     struct SResource *elem = NULL;
526     struct UString   *tokenValue;
527     uint32_t          line;
528     char              filename[256] = { '\0' };
529     char              cs[128]       = { '\0' };
530 
531     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
532 
533     if(isVerbose()){
534         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
535     }
536 
537     if (U_FAILURE(*status))
538     {
539         return NULL;
540     }
541     /* make the filename including the directory */
542     if (state->outputdir != NULL)
543     {
544         uprv_strcat(filename, state->outputdir);
545 
546         if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
547         {
548             uprv_strcat(filename, U_FILE_SEP_STRING);
549         }
550     }
551 
552     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
553 
554     if (U_FAILURE(*status))
555     {
556         return NULL;
557     }
558     uprv_strcat(filename, cs);
559     if(!T_FileStream_file_exists(filename)){
560         if(isStrict()){
561             error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
562         }else{
563             warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
564         }
565     }
566     if(dependencyArray==NULL){
567         dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
568     }
569     if(tag!=NULL){
570         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
571     }
572     elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
573 
574     array_add(dependencyArray, elem, status);
575 
576     if (U_FAILURE(*status))
577     {
578         return NULL;
579     }
580     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
581     return result;
582 }
583 static struct SResource *
parseString(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)584 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
585 {
586     struct UString   *tokenValue;
587     struct SResource *result = NULL;
588 
589 /*    if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
590     {
591         return parseUCARules(tag, startline, status);
592     }*/
593     if(isVerbose()){
594         printf(" string %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
595     }
596     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
597 
598     if (U_SUCCESS(*status))
599     {
600         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
601         doesn't survive expect either) */
602 
603         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
604         if(U_SUCCESS(*status) && result) {
605             expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
606 
607             if (U_FAILURE(*status))
608             {
609                 res_close(result);
610                 return NULL;
611             }
612         }
613     }
614 
615     return result;
616 }
617 
618 static struct SResource *
parseAlias(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)619 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
620 {
621     struct UString   *tokenValue;
622     struct SResource *result  = NULL;
623 
624     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
625 
626     if(isVerbose()){
627         printf(" alias %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
628     }
629 
630     if (U_SUCCESS(*status))
631     {
632         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
633         doesn't survive expect either) */
634 
635         result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
636 
637         expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
638 
639         if (U_FAILURE(*status))
640         {
641             res_close(result);
642             return NULL;
643         }
644     }
645 
646     return result;
647 }
648 
649 #if !UCONFIG_NO_COLLATION
650 
651 namespace {
652 
resLookup(struct SResource * res,const char * key)653 static struct SResource* resLookup(struct SResource* res, const char* key){
654     struct SResource *current = NULL;
655     struct SResTable *list;
656     if (res == res_none()) {
657         return NULL;
658     }
659 
660     list = &(res->u.fTable);
661 
662     current = list->fFirst;
663     while (current != NULL) {
664         if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
665             return current;
666         }
667         current = current->fNext;
668     }
669     return NULL;
670 }
671 
672 class GenrbImporter : public icu::CollationRuleParser::Importer {
673 public:
GenrbImporter(const char * in,const char * out)674     GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
675     virtual ~GenrbImporter();
676     virtual const UnicodeString *getRules(
677             const char *localeID, const char *collationType,
678             const char *&errorReason, UErrorCode &errorCode);
679 
680 private:
681     const char *inputDir;
682     const char *outputDir;
683     UnicodeString rules;
684 };
685 
~GenrbImporter()686 GenrbImporter::~GenrbImporter() {}
687 
688 const UnicodeString *
getRules(const char * localeID,const char * collationType,const char * &,UErrorCode & errorCode)689 GenrbImporter::getRules(
690         const char *localeID, const char *collationType,
691         const char *& /*errorReason*/, UErrorCode &errorCode) {
692     struct SRBRoot *data         = NULL;
693     UCHARBUF       *ucbuf        = NULL;
694     int localeLength = strlen(localeID);
695     char* filename = (char*)uprv_malloc(localeLength+5);
696     char           *inputDirBuf  = NULL;
697     char           *openFileName = NULL;
698     const char* cp = "";
699     int32_t i = 0;
700     int32_t dirlen  = 0;
701     int32_t filelen = 0;
702     struct SResource* root;
703     struct SResource* collations;
704     struct SResource* collation;
705     struct SResource* sequence;
706 
707     memcpy(filename, localeID, localeLength);
708     for(i = 0; i < localeLength; i++){
709         if(filename[i] == '-'){
710             filename[i] = '_';
711         }
712     }
713     filename[localeLength]   = '.';
714     filename[localeLength+1] = 't';
715     filename[localeLength+2] = 'x';
716     filename[localeLength+3] = 't';
717     filename[localeLength+4] = 0;
718 
719 
720     if (U_FAILURE(errorCode)) {
721         return NULL;
722     }
723     if(filename==NULL){
724         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
725         return NULL;
726     }else{
727         filelen = (int32_t)uprv_strlen(filename);
728     }
729     if(inputDir == NULL) {
730         const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
731         openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
732         openFileName[0] = '\0';
733         if (filenameBegin != NULL) {
734             /*
735              * When a filename ../../../data/root.txt is specified,
736              * we presume that the input directory is ../../../data
737              * This is very important when the resource file includes
738              * another file, like UCARules.txt or thaidict.brk.
739              */
740             int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
741             inputDirBuf = (char *)uprv_malloc(filenameSize);
742 
743             /* test for NULL */
744             if(inputDirBuf == NULL) {
745                 errorCode = U_MEMORY_ALLOCATION_ERROR;
746                 goto finish;
747             }
748 
749             uprv_strncpy(inputDirBuf, filename, filenameSize);
750             inputDirBuf[filenameSize - 1] = 0;
751             inputDir = inputDirBuf;
752             dirlen  = (int32_t)uprv_strlen(inputDir);
753         }
754     }else{
755         dirlen  = (int32_t)uprv_strlen(inputDir);
756 
757         if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
758             openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
759 
760             /* test for NULL */
761             if(openFileName == NULL) {
762                 errorCode = U_MEMORY_ALLOCATION_ERROR;
763                 goto finish;
764             }
765 
766             openFileName[0] = '\0';
767             /*
768              * append the input dir to openFileName if the first char in
769              * filename is not file seperation char and the last char input directory is  not '.'.
770              * This is to support :
771              * genrb -s. /home/icu/data
772              * genrb -s. icu/data
773              * The user cannot mix notations like
774              * genrb -s. /icu/data --- the absolute path specified. -s redundant
775              * user should use
776              * genrb -s. icu/data  --- start from CWD and look in icu/data dir
777              */
778             if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
779                 uprv_strcpy(openFileName, inputDir);
780                 openFileName[dirlen]     = U_FILE_SEP_CHAR;
781             }
782             openFileName[dirlen + 1] = '\0';
783         } else {
784             openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
785 
786             /* test for NULL */
787             if(openFileName == NULL) {
788                 errorCode = U_MEMORY_ALLOCATION_ERROR;
789                 goto finish;
790             }
791 
792             uprv_strcpy(openFileName, inputDir);
793 
794         }
795     }
796     uprv_strcat(openFileName, filename);
797     /* printf("%s\n", openFileName);  */
798     errorCode = U_ZERO_ERROR;
799     ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, &errorCode);
800 
801     if(errorCode == U_FILE_ACCESS_ERROR) {
802 
803         fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
804         goto finish;
805     }
806     if (ucbuf == NULL || U_FAILURE(errorCode)) {
807         fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName == NULL ? filename : openFileName,u_errorName(errorCode));
808         goto finish;
809     }
810 
811     /* Parse the data into an SRBRoot */
812     data = parse(ucbuf, inputDir, outputDir, filename, FALSE, FALSE, &errorCode);
813 
814     root = data->fRoot;
815     collations = resLookup(root, "collations");
816     if (collations != NULL) {
817       collation = resLookup(collations, collationType);
818       if (collation != NULL) {
819         sequence = resLookup(collation, "Sequence");
820         if (sequence != NULL) {
821           rules.setTo(FALSE, sequence->u.fString.fChars, sequence->u.fString.fLength);
822         }
823       }
824     }
825 
826 finish:
827     if (inputDirBuf != NULL) {
828         uprv_free(inputDirBuf);
829     }
830 
831     if (openFileName != NULL) {
832         uprv_free(openFileName);
833     }
834 
835     if(ucbuf) {
836         ucbuf_close(ucbuf);
837     }
838 
839     return &rules;
840 }
841 
842 // Quick-and-dirty escaping function.
843 // Assumes that we are on an ASCII-based platform.
844 static void
escape(const UChar * s,char * buffer)845 escape(const UChar *s, char *buffer) {
846     int32_t length = u_strlen(s);
847     int32_t i = 0;
848     for (;;) {
849         UChar32 c;
850         U16_NEXT(s, i, length, c);
851         if (c == 0) {
852             *buffer = 0;
853             return;
854         } else if (0x20 <= c && c <= 0x7e) {
855             // printable ASCII
856             *buffer++ = (char)c;  // assumes ASCII-based platform
857         } else {
858             buffer += sprintf(buffer, "\\u%04X", (int)c);
859         }
860     }
861 }
862 
863 }  // namespace
864 
865 #endif  // !UCONFIG_NO_COLLATION
866 
867 static struct SResource *
addCollation(ParseState * state,struct SResource * result,const char * collationType,uint32_t startline,UErrorCode * status)868 addCollation(ParseState* state, struct SResource  *result, const char *collationType,
869              uint32_t startline, UErrorCode *status)
870 {
871     // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
872     struct SResource  *member = NULL;
873     struct UString    *tokenValue;
874     struct UString     comment;
875     enum   ETokenType  token;
876     char               subtag[1024];
877     UnicodeString      rules;
878     UBool              haveRules = FALSE;
879     UVersionInfo       version;
880     uint32_t           line;
881 
882     /* '{' . (name resource)* '}' */
883     version[0]=0; version[1]=0; version[2]=0; version[3]=0;
884 
885     for (;;)
886     {
887         ustr_init(&comment);
888         token = getToken(state, &tokenValue, &comment, &line, status);
889 
890         if (token == TOK_CLOSE_BRACE)
891         {
892             break;
893         }
894 
895         if (token != TOK_STRING)
896         {
897             res_close(result);
898             *status = U_INVALID_FORMAT_ERROR;
899 
900             if (token == TOK_EOF)
901             {
902                 error(startline, "unterminated table");
903             }
904             else
905             {
906                 error(line, "Unexpected token %s", tokenNames[token]);
907             }
908 
909             return NULL;
910         }
911 
912         u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
913 
914         if (U_FAILURE(*status))
915         {
916             res_close(result);
917             return NULL;
918         }
919 
920         member = parseResource(state, subtag, NULL, status);
921 
922         if (U_FAILURE(*status))
923         {
924             res_close(result);
925             return NULL;
926         }
927         if (result == NULL)
928         {
929             // Ignore the parsed resources, continue parsing.
930         }
931         else if (uprv_strcmp(subtag, "Version") == 0)
932         {
933             char     ver[40];
934             int32_t length = member->u.fString.fLength;
935 
936             if (length >= (int32_t) sizeof(ver))
937             {
938                 length = (int32_t) sizeof(ver) - 1;
939             }
940 
941             u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
942             u_versionFromString(version, ver);
943 
944             table_add(result, member, line, status);
945             member = NULL;
946         }
947         else if(uprv_strcmp(subtag, "%%CollationBin")==0)
948         {
949             /* discard duplicate %%CollationBin if any*/
950         }
951         else if (uprv_strcmp(subtag, "Sequence") == 0)
952         {
953             rules.setTo(member->u.fString.fChars, member->u.fString.fLength);
954             haveRules = TRUE;
955             // Defer building the collator until we have seen
956             // all sub-elements of the collation table, including the Version.
957             /* in order to achieve smaller data files, we can direct genrb */
958             /* to omit collation rules */
959             if(!state->omitCollationRules) {
960                 table_add(result, member, line, status);
961                 member = NULL;
962             }
963         }
964         else  // Just copy non-special items.
965         {
966             table_add(result, member, line, status);
967             member = NULL;
968         }
969         res_close(member);  // TODO: use LocalPointer
970         if (U_FAILURE(*status))
971         {
972             res_close(result);
973             return NULL;
974         }
975     }
976 
977     if (!haveRules) { return result; }
978 
979 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
980     warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
981     (void)collationType;
982 #else
983     if(!state->makeBinaryCollation) {
984         if(isVerbose()) {
985             printf("Not building %s~%s collation binary\n", state->filename, collationType);
986         }
987         return result;
988     }
989     UErrorCode intStatus = U_ZERO_ERROR;
990     UParseError parseError;
991     uprv_memset(&parseError, 0, sizeof(parseError));
992     GenrbImporter importer(state->inputdir, state->outputdir);
993     const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
994     if(U_FAILURE(intStatus)) {
995         error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
996         res_close(result);
997         return NULL;  // TODO: use LocalUResourceBundlePointer for result
998     }
999     icu::CollationBuilder builder(base, intStatus);
1000     if(uprv_strncmp(collationType, "search", 6) == 0) {
1001         builder.disableFastLatin();  // build fast-Latin table unless search collator
1002     }
1003     LocalPointer<icu::CollationTailoring> t(
1004             builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
1005     if(U_FAILURE(intStatus)) {
1006         const char *reason = builder.getErrorReason();
1007         if(reason == NULL) { reason = ""; }
1008         error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s  %s",
1009                 state->filename, collationType,
1010                 (long)parseError.offset, u_errorName(intStatus), reason);
1011         if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
1012             // Print pre- and post-context.
1013             char preBuffer[100], postBuffer[100];
1014             escape(parseError.preContext, preBuffer);
1015             escape(parseError.postContext, postBuffer);
1016             error(line, "  error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
1017         }
1018         if(isStrict()) {
1019             *status = intStatus;
1020             res_close(result);
1021             return NULL;
1022         }
1023     }
1024     icu::LocalMemory<uint8_t> buffer;
1025     int32_t capacity = 100000;
1026     uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
1027     if(dest == NULL) {
1028         fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
1029                 (long)capacity);
1030         *status = U_MEMORY_ALLOCATION_ERROR;
1031         res_close(result);
1032         return NULL;
1033     }
1034     int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
1035     int32_t totalSize = icu::CollationDataWriter::writeTailoring(
1036             *t, *t->settings, indexes, dest, capacity, intStatus);
1037     if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
1038         intStatus = U_ZERO_ERROR;
1039         capacity = totalSize;
1040         dest = buffer.allocateInsteadAndCopy(capacity);
1041         if(dest == NULL) {
1042             fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
1043                     (long)capacity);
1044             *status = U_MEMORY_ALLOCATION_ERROR;
1045             res_close(result);
1046             return NULL;
1047         }
1048         totalSize = icu::CollationDataWriter::writeTailoring(
1049                 *t, *t->settings, indexes, dest, capacity, intStatus);
1050     }
1051     if(U_FAILURE(intStatus)) {
1052         fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
1053                 u_errorName(intStatus));
1054         res_close(result);
1055         return NULL;
1056     }
1057     if(isVerbose()) {
1058         printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
1059         icu::CollationInfo::printSizes(totalSize, indexes);
1060     }
1061     struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
1062     table_add(result, collationBin, line, status);
1063     if (U_FAILURE(*status)) {
1064         res_close(result);
1065         return NULL;
1066     }
1067 #endif
1068     return result;
1069 }
1070 
1071 static UBool
keepCollationType(const char * type)1072 keepCollationType(const char *type) {
1073     // BEGIN android-added
1074     if (uprv_strcmp(type, "big5han") == 0 && !gIncludeBig5HanColl) { return FALSE; }
1075     if (uprv_strcmp(type, "gb2312han") == 0 && !gIncludeGb2312HanColl) { return FALSE; }
1076     if (uprv_strcmp(type, "zhuyin") == 0 && !gIncludeZhuyinHanColl) { return FALSE; }
1077     // END android-added
1078     return gIncludeUnihanColl || uprv_strcmp(type, "unihan") != 0;
1079 }
1080 
1081 static struct SResource *
parseCollationElements(ParseState * state,char * tag,uint32_t startline,UBool newCollation,UErrorCode * status)1082 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1083 {
1084     struct SResource  *result = NULL;
1085     struct SResource  *member = NULL;
1086     struct SResource  *collationRes = NULL;
1087     struct UString    *tokenValue;
1088     struct UString     comment;
1089     enum   ETokenType  token;
1090     char               subtag[1024], typeKeyword[1024];
1091     uint32_t           line;
1092 
1093     result = table_open(state->bundle, tag, NULL, status);
1094 
1095     if (result == NULL || U_FAILURE(*status))
1096     {
1097         return NULL;
1098     }
1099     if(isVerbose()){
1100         printf(" collation elements %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1101     }
1102     if(!newCollation) {
1103         return addCollation(state, result, "(no type)", startline, status);
1104     }
1105     else {
1106         for(;;) {
1107             ustr_init(&comment);
1108             token = getToken(state, &tokenValue, &comment, &line, status);
1109 
1110             if (token == TOK_CLOSE_BRACE)
1111             {
1112                 return result;
1113             }
1114 
1115             if (token != TOK_STRING)
1116             {
1117                 res_close(result);
1118                 *status = U_INVALID_FORMAT_ERROR;
1119 
1120                 if (token == TOK_EOF)
1121                 {
1122                     error(startline, "unterminated table");
1123                 }
1124                 else
1125                 {
1126                     error(line, "Unexpected token %s", tokenNames[token]);
1127                 }
1128 
1129                 return NULL;
1130             }
1131 
1132             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1133 
1134             if (U_FAILURE(*status))
1135             {
1136                 res_close(result);
1137                 return NULL;
1138             }
1139 
1140             if (uprv_strcmp(subtag, "default") == 0)
1141             {
1142                 member = parseResource(state, subtag, NULL, status);
1143 
1144                 if (U_FAILURE(*status))
1145                 {
1146                     res_close(result);
1147                     return NULL;
1148                 }
1149 
1150                 table_add(result, member, line, status);
1151             }
1152             else
1153             {
1154                 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1155                 /* this probably needs to be refactored or recursively use the parser */
1156                 /* first we assume that our collation table won't have the explicit type */
1157                 /* then, we cannot handle aliases */
1158                 if(token == TOK_OPEN_BRACE) {
1159                     token = getToken(state, &tokenValue, &comment, &line, status);
1160                     if (keepCollationType(subtag)) {
1161                         collationRes = table_open(state->bundle, subtag, NULL, status);
1162                     } else {
1163                         collationRes = NULL;
1164                     }
1165                     // need to parse the collation data regardless
1166                     collationRes = addCollation(state, collationRes, subtag, startline, status);
1167                     if (collationRes != NULL) {
1168                         table_add(result, collationRes, startline, status);
1169                     }
1170                 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1171                     /* we could have a table too */
1172                     token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1173                     u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1174                     if(uprv_strcmp(typeKeyword, "alias") == 0) {
1175                         member = parseResource(state, subtag, NULL, status);
1176                         if (U_FAILURE(*status))
1177                         {
1178                             res_close(result);
1179                             return NULL;
1180                         }
1181 
1182                         table_add(result, member, line, status);
1183                     } else {
1184                         res_close(result);
1185                         *status = U_INVALID_FORMAT_ERROR;
1186                         return NULL;
1187                     }
1188                 } else {
1189                     res_close(result);
1190                     *status = U_INVALID_FORMAT_ERROR;
1191                     return NULL;
1192                 }
1193             }
1194 
1195             /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1196 
1197             /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1198 
1199             if (U_FAILURE(*status))
1200             {
1201                 res_close(result);
1202                 return NULL;
1203             }
1204         }
1205     }
1206 }
1207 
1208 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1209    if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1210 static struct SResource *
realParseTable(ParseState * state,struct SResource * table,char * tag,uint32_t startline,UErrorCode * status)1211 realParseTable(ParseState* state, struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
1212 {
1213     struct SResource  *member = NULL;
1214     struct UString    *tokenValue=NULL;
1215     struct UString    comment;
1216     enum   ETokenType token;
1217     char              subtag[1024];
1218     uint32_t          line;
1219     UBool             readToken = FALSE;
1220 
1221     /* '{' . (name resource)* '}' */
1222 
1223     if(isVerbose()){
1224         printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1225     }
1226     for (;;)
1227     {
1228         ustr_init(&comment);
1229         token = getToken(state, &tokenValue, &comment, &line, status);
1230 
1231         if (token == TOK_CLOSE_BRACE)
1232         {
1233             if (!readToken) {
1234                 warning(startline, "Encountered empty table");
1235             }
1236             return table;
1237         }
1238 
1239         if (token != TOK_STRING)
1240         {
1241             *status = U_INVALID_FORMAT_ERROR;
1242 
1243             if (token == TOK_EOF)
1244             {
1245                 error(startline, "unterminated table");
1246             }
1247             else
1248             {
1249                 error(line, "unexpected token %s", tokenNames[token]);
1250             }
1251 
1252             return NULL;
1253         }
1254 
1255         if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1256             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1257         } else {
1258             *status = U_INVALID_FORMAT_ERROR;
1259             error(line, "invariant characters required for table keys");
1260             return NULL;
1261         }
1262 
1263         if (U_FAILURE(*status))
1264         {
1265             error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1266             return NULL;
1267         }
1268 
1269         member = parseResource(state, subtag, &comment, status);
1270 
1271         if (member == NULL || U_FAILURE(*status))
1272         {
1273             error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1274             return NULL;
1275         }
1276 
1277         table_add(table, member, line, status);
1278 
1279         if (U_FAILURE(*status))
1280         {
1281             error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1282             return NULL;
1283         }
1284         readToken = TRUE;
1285         ustr_deinit(&comment);
1286    }
1287 
1288     /* not reached */
1289     /* A compiler warning will appear if all paths don't contain a return statement. */
1290 /*     *status = U_INTERNAL_PROGRAM_ERROR;
1291      return NULL;*/
1292 }
1293 
1294 static struct SResource *
parseTable(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1295 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1296 {
1297     struct SResource *result;
1298 
1299     if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1300     {
1301         return parseCollationElements(state, tag, startline, FALSE, status);
1302     }
1303     if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1304     {
1305         return parseCollationElements(state, tag, startline, TRUE, status);
1306     }
1307     if(isVerbose()){
1308         printf(" table %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1309     }
1310 
1311     result = table_open(state->bundle, tag, comment, status);
1312 
1313     if (result == NULL || U_FAILURE(*status))
1314     {
1315         return NULL;
1316     }
1317     return realParseTable(state, result, tag, startline,  status);
1318 }
1319 
1320 static struct SResource *
parseArray(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1321 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1322 {
1323     struct SResource  *result = NULL;
1324     struct SResource  *member = NULL;
1325     struct UString    *tokenValue;
1326     struct UString    memberComments;
1327     enum   ETokenType token;
1328     UBool             readToken = FALSE;
1329 
1330     result = array_open(state->bundle, tag, comment, status);
1331 
1332     if (result == NULL || U_FAILURE(*status))
1333     {
1334         return NULL;
1335     }
1336     if(isVerbose()){
1337         printf(" array %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1338     }
1339 
1340     ustr_init(&memberComments);
1341 
1342     /* '{' . resource [','] '}' */
1343     for (;;)
1344     {
1345         /* reset length */
1346         ustr_setlen(&memberComments, 0, status);
1347 
1348         /* check for end of array, but don't consume next token unless it really is the end */
1349         token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1350 
1351 
1352         if (token == TOK_CLOSE_BRACE)
1353         {
1354             getToken(state, NULL, NULL, NULL, status);
1355             if (!readToken) {
1356                 warning(startline, "Encountered empty array");
1357             }
1358             break;
1359         }
1360 
1361         if (token == TOK_EOF)
1362         {
1363             res_close(result);
1364             *status = U_INVALID_FORMAT_ERROR;
1365             error(startline, "unterminated array");
1366             return NULL;
1367         }
1368 
1369         /* string arrays are a special case */
1370         if (token == TOK_STRING)
1371         {
1372             getToken(state, &tokenValue, &memberComments, NULL, status);
1373             member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1374         }
1375         else
1376         {
1377             member = parseResource(state, NULL, &memberComments, status);
1378         }
1379 
1380         if (member == NULL || U_FAILURE(*status))
1381         {
1382             res_close(result);
1383             return NULL;
1384         }
1385 
1386         array_add(result, member, status);
1387 
1388         if (U_FAILURE(*status))
1389         {
1390             res_close(result);
1391             return NULL;
1392         }
1393 
1394         /* eat optional comma if present */
1395         token = peekToken(state, 0, NULL, NULL, NULL, status);
1396 
1397         if (token == TOK_COMMA)
1398         {
1399             getToken(state, NULL, NULL, NULL, status);
1400         }
1401 
1402         if (U_FAILURE(*status))
1403         {
1404             res_close(result);
1405             return NULL;
1406         }
1407         readToken = TRUE;
1408     }
1409 
1410     ustr_deinit(&memberComments);
1411     return result;
1412 }
1413 
1414 static struct SResource *
parseIntVector(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1415 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1416 {
1417     struct SResource  *result = NULL;
1418     enum   ETokenType  token;
1419     char              *string;
1420     int32_t            value;
1421     UBool              readToken = FALSE;
1422     char              *stopstring;
1423     uint32_t           len;
1424     struct UString     memberComments;
1425 
1426     result = intvector_open(state->bundle, tag, comment, status);
1427 
1428     if (result == NULL || U_FAILURE(*status))
1429     {
1430         return NULL;
1431     }
1432 
1433     if(isVerbose()){
1434         printf(" vector %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1435     }
1436     ustr_init(&memberComments);
1437     /* '{' . string [','] '}' */
1438     for (;;)
1439     {
1440         ustr_setlen(&memberComments, 0, status);
1441 
1442         /* check for end of array, but don't consume next token unless it really is the end */
1443         token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1444 
1445         if (token == TOK_CLOSE_BRACE)
1446         {
1447             /* it's the end, consume the close brace */
1448             getToken(state, NULL, NULL, NULL, status);
1449             if (!readToken) {
1450                 warning(startline, "Encountered empty int vector");
1451             }
1452             ustr_deinit(&memberComments);
1453             return result;
1454         }
1455 
1456         string = getInvariantString(state, NULL, NULL, status);
1457 
1458         if (U_FAILURE(*status))
1459         {
1460             res_close(result);
1461             return NULL;
1462         }
1463 
1464         /* For handling illegal char in the Intvector */
1465         value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1466         len=(uint32_t)(stopstring-string);
1467 
1468         if(len==uprv_strlen(string))
1469         {
1470             intvector_add(result, value, status);
1471             uprv_free(string);
1472             token = peekToken(state, 0, NULL, NULL, NULL, status);
1473         }
1474         else
1475         {
1476             uprv_free(string);
1477             *status=U_INVALID_CHAR_FOUND;
1478         }
1479 
1480         if (U_FAILURE(*status))
1481         {
1482             res_close(result);
1483             return NULL;
1484         }
1485 
1486         /* the comma is optional (even though it is required to prevent the reader from concatenating
1487         consecutive entries) so that a missing comma on the last entry isn't an error */
1488         if (token == TOK_COMMA)
1489         {
1490             getToken(state, NULL, NULL, NULL, status);
1491         }
1492         readToken = TRUE;
1493     }
1494 
1495     /* not reached */
1496     /* A compiler warning will appear if all paths don't contain a return statement. */
1497 /*    intvector_close(result, status);
1498     *status = U_INTERNAL_PROGRAM_ERROR;
1499     return NULL;*/
1500 }
1501 
1502 static struct SResource *
parseBinary(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1503 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1504 {
1505     struct SResource *result = NULL;
1506     uint8_t          *value;
1507     char             *string;
1508     char              toConv[3] = {'\0', '\0', '\0'};
1509     uint32_t          count;
1510     uint32_t          i;
1511     uint32_t          line;
1512     char             *stopstring;
1513     uint32_t          len;
1514 
1515     string = getInvariantString(state, &line, NULL, status);
1516 
1517     if (string == NULL || U_FAILURE(*status))
1518     {
1519         return NULL;
1520     }
1521 
1522     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1523 
1524     if (U_FAILURE(*status))
1525     {
1526         uprv_free(string);
1527         return NULL;
1528     }
1529 
1530     if(isVerbose()){
1531         printf(" binary %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1532     }
1533 
1534     count = (uint32_t)uprv_strlen(string);
1535     if (count > 0){
1536         if((count % 2)==0){
1537             value = static_cast<uint8_t *>(uprv_malloc(sizeof(uint8_t) * count));
1538 
1539             if (value == NULL)
1540             {
1541                 uprv_free(string);
1542                 *status = U_MEMORY_ALLOCATION_ERROR;
1543                 return NULL;
1544             }
1545 
1546             for (i = 0; i < count; i += 2)
1547             {
1548                 toConv[0] = string[i];
1549                 toConv[1] = string[i + 1];
1550 
1551                 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1552                 len=(uint32_t)(stopstring-toConv);
1553 
1554                 if(len!=uprv_strlen(toConv))
1555                 {
1556                     uprv_free(string);
1557                     *status=U_INVALID_CHAR_FOUND;
1558                     return NULL;
1559                 }
1560             }
1561 
1562             result = bin_open(state->bundle, tag, (i >> 1), value,NULL, comment, status);
1563 
1564             uprv_free(value);
1565         }
1566         else
1567         {
1568             *status = U_INVALID_CHAR_FOUND;
1569             uprv_free(string);
1570             error(line, "Encountered invalid binary string");
1571             return NULL;
1572         }
1573     }
1574     else
1575     {
1576         result = bin_open(state->bundle, tag, 0, NULL, "",comment,status);
1577         warning(startline, "Encountered empty binary tag");
1578     }
1579     uprv_free(string);
1580 
1581     return result;
1582 }
1583 
1584 static struct SResource *
parseInteger(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1585 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1586 {
1587     struct SResource *result = NULL;
1588     int32_t           value;
1589     char             *string;
1590     char             *stopstring;
1591     uint32_t          len;
1592 
1593     string = getInvariantString(state, NULL, NULL, status);
1594 
1595     if (string == NULL || U_FAILURE(*status))
1596     {
1597         return NULL;
1598     }
1599 
1600     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1601 
1602     if (U_FAILURE(*status))
1603     {
1604         uprv_free(string);
1605         return NULL;
1606     }
1607 
1608     if(isVerbose()){
1609         printf(" integer %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1610     }
1611 
1612     if (uprv_strlen(string) <= 0)
1613     {
1614         warning(startline, "Encountered empty integer. Default value is 0.");
1615     }
1616 
1617     /* Allow integer support for hexdecimal, octal digit and decimal*/
1618     /* and handle illegal char in the integer*/
1619     value = uprv_strtoul(string, &stopstring, 0);
1620     len=(uint32_t)(stopstring-string);
1621     if(len==uprv_strlen(string))
1622     {
1623         result = int_open(state->bundle, tag, value, comment, status);
1624     }
1625     else
1626     {
1627         *status=U_INVALID_CHAR_FOUND;
1628     }
1629     uprv_free(string);
1630 
1631     return result;
1632 }
1633 
1634 static struct SResource *
parseImport(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1635 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1636 {
1637     struct SResource *result;
1638     FileStream       *file;
1639     int32_t           len;
1640     uint8_t          *data;
1641     char             *filename;
1642     uint32_t          line;
1643     char     *fullname = NULL;
1644     filename = getInvariantString(state, &line, NULL, status);
1645 
1646     if (U_FAILURE(*status))
1647     {
1648         return NULL;
1649     }
1650 
1651     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1652 
1653     if (U_FAILURE(*status))
1654     {
1655         uprv_free(filename);
1656         return NULL;
1657     }
1658 
1659     if(isVerbose()){
1660         printf(" import %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1661     }
1662 
1663     /* Open the input file for reading */
1664     if (state->inputdir == NULL)
1665     {
1666 #if 1
1667         /*
1668          * Always save file file name, even if there's
1669          * no input directory specified. MIGHT BREAK SOMETHING
1670          */
1671         int32_t filenameLength = uprv_strlen(filename);
1672 
1673         fullname = (char *) uprv_malloc(filenameLength + 1);
1674         uprv_strcpy(fullname, filename);
1675 #endif
1676 
1677         file = T_FileStream_open(filename, "rb");
1678     }
1679     else
1680     {
1681 
1682         int32_t  count     = (int32_t)uprv_strlen(filename);
1683 
1684         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1685         {
1686             fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1687 
1688             /* test for NULL */
1689             if(fullname == NULL)
1690             {
1691                 *status = U_MEMORY_ALLOCATION_ERROR;
1692                 return NULL;
1693             }
1694 
1695             uprv_strcpy(fullname, state->inputdir);
1696 
1697             fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
1698             fullname[state->inputdirLength + 1] = '\0';
1699 
1700             uprv_strcat(fullname, filename);
1701         }
1702         else
1703         {
1704             fullname = (char *) uprv_malloc(state->inputdirLength + count + 1);
1705 
1706             /* test for NULL */
1707             if(fullname == NULL)
1708             {
1709                 *status = U_MEMORY_ALLOCATION_ERROR;
1710                 return NULL;
1711             }
1712 
1713             uprv_strcpy(fullname, state->inputdir);
1714             uprv_strcat(fullname, filename);
1715         }
1716 
1717         file = T_FileStream_open(fullname, "rb");
1718 
1719     }
1720 
1721     if (file == NULL)
1722     {
1723         error(line, "couldn't open input file %s", filename);
1724         *status = U_FILE_ACCESS_ERROR;
1725         return NULL;
1726     }
1727 
1728     len  = T_FileStream_size(file);
1729     data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1730     /* test for NULL */
1731     if(data == NULL)
1732     {
1733         *status = U_MEMORY_ALLOCATION_ERROR;
1734         T_FileStream_close (file);
1735         return NULL;
1736     }
1737 
1738     /* int32_t numRead = */ T_FileStream_read  (file, data, len);
1739     T_FileStream_close (file);
1740 
1741     result = bin_open(state->bundle, tag, len, data, fullname, comment, status);
1742 
1743     uprv_free(data);
1744     uprv_free(filename);
1745     uprv_free(fullname);
1746 
1747     return result;
1748 }
1749 
1750 static struct SResource *
parseInclude(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1751 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1752 {
1753     struct SResource *result;
1754     int32_t           len=0;
1755     char             *filename;
1756     uint32_t          line;
1757     UChar *pTarget     = NULL;
1758 
1759     UCHARBUF *ucbuf;
1760     char     *fullname = NULL;
1761     int32_t  count     = 0;
1762     const char* cp = NULL;
1763     const UChar* uBuffer = NULL;
1764 
1765     filename = getInvariantString(state, &line, NULL, status);
1766     count     = (int32_t)uprv_strlen(filename);
1767 
1768     if (U_FAILURE(*status))
1769     {
1770         return NULL;
1771     }
1772 
1773     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1774 
1775     if (U_FAILURE(*status))
1776     {
1777         uprv_free(filename);
1778         return NULL;
1779     }
1780 
1781     if(isVerbose()){
1782         printf(" include %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1783     }
1784 
1785     fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1786     /* test for NULL */
1787     if(fullname == NULL)
1788     {
1789         *status = U_MEMORY_ALLOCATION_ERROR;
1790         uprv_free(filename);
1791         return NULL;
1792     }
1793 
1794     if(state->inputdir!=NULL){
1795         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1796         {
1797 
1798             uprv_strcpy(fullname, state->inputdir);
1799 
1800             fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
1801             fullname[state->inputdirLength + 1] = '\0';
1802 
1803             uprv_strcat(fullname, filename);
1804         }
1805         else
1806         {
1807             uprv_strcpy(fullname, state->inputdir);
1808             uprv_strcat(fullname, filename);
1809         }
1810     }else{
1811         uprv_strcpy(fullname,filename);
1812     }
1813 
1814     ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1815 
1816     if (U_FAILURE(*status)) {
1817         error(line, "couldn't open input file %s\n", filename);
1818         return NULL;
1819     }
1820 
1821     uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1822     result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1823 
1824     ucbuf_close(ucbuf);
1825 
1826     uprv_free(pTarget);
1827 
1828     uprv_free(filename);
1829     uprv_free(fullname);
1830 
1831     return result;
1832 }
1833 
1834 
1835 
1836 
1837 
1838 U_STRING_DECL(k_type_string,    "string",    6);
1839 U_STRING_DECL(k_type_binary,    "binary",    6);
1840 U_STRING_DECL(k_type_bin,       "bin",       3);
1841 U_STRING_DECL(k_type_table,     "table",     5);
1842 U_STRING_DECL(k_type_table_no_fallback,     "table(nofallback)",         17);
1843 U_STRING_DECL(k_type_int,       "int",       3);
1844 U_STRING_DECL(k_type_integer,   "integer",   7);
1845 U_STRING_DECL(k_type_array,     "array",     5);
1846 U_STRING_DECL(k_type_alias,     "alias",     5);
1847 U_STRING_DECL(k_type_intvector, "intvector", 9);
1848 U_STRING_DECL(k_type_import,    "import",    6);
1849 U_STRING_DECL(k_type_include,   "include",   7);
1850 
1851 /* Various non-standard processing plugins that create one or more special resources. */
1852 U_STRING_DECL(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
1853 U_STRING_DECL(k_type_plugin_collation,      "process(collation)",        18);
1854 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)",   23);
1855 U_STRING_DECL(k_type_plugin_dependency,     "process(dependency)",       19);
1856 
1857 typedef enum EResourceType
1858 {
1859     RESTYPE_UNKNOWN,
1860     RESTYPE_STRING,
1861     RESTYPE_BINARY,
1862     RESTYPE_TABLE,
1863     RESTYPE_TABLE_NO_FALLBACK,
1864     RESTYPE_INTEGER,
1865     RESTYPE_ARRAY,
1866     RESTYPE_ALIAS,
1867     RESTYPE_INTVECTOR,
1868     RESTYPE_IMPORT,
1869     RESTYPE_INCLUDE,
1870     RESTYPE_PROCESS_UCA_RULES,
1871     RESTYPE_PROCESS_COLLATION,
1872     RESTYPE_PROCESS_TRANSLITERATOR,
1873     RESTYPE_PROCESS_DEPENDENCY,
1874     RESTYPE_RESERVED
1875 } EResourceType;
1876 
1877 static struct {
1878     const char *nameChars;   /* only used for debugging */
1879     const UChar *nameUChars;
1880     ParseResourceFunction *parseFunction;
1881 } gResourceTypes[] = {
1882     {"Unknown", NULL, NULL},
1883     {"string", k_type_string, parseString},
1884     {"binary", k_type_binary, parseBinary},
1885     {"table", k_type_table, parseTable},
1886     {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1887     {"integer", k_type_integer, parseInteger},
1888     {"array", k_type_array, parseArray},
1889     {"alias", k_type_alias, parseAlias},
1890     {"intvector", k_type_intvector, parseIntVector},
1891     {"import", k_type_import, parseImport},
1892     {"include", k_type_include, parseInclude},
1893     {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1894     {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1895     {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1896     {"process(dependency)", k_type_plugin_dependency, parseDependency},
1897     {"reserved", NULL, NULL}
1898 };
1899 
initParser()1900 void initParser()
1901 {
1902     U_STRING_INIT(k_type_string,    "string",    6);
1903     U_STRING_INIT(k_type_binary,    "binary",    6);
1904     U_STRING_INIT(k_type_bin,       "bin",       3);
1905     U_STRING_INIT(k_type_table,     "table",     5);
1906     U_STRING_INIT(k_type_table_no_fallback,     "table(nofallback)",         17);
1907     U_STRING_INIT(k_type_int,       "int",       3);
1908     U_STRING_INIT(k_type_integer,   "integer",   7);
1909     U_STRING_INIT(k_type_array,     "array",     5);
1910     U_STRING_INIT(k_type_alias,     "alias",     5);
1911     U_STRING_INIT(k_type_intvector, "intvector", 9);
1912     U_STRING_INIT(k_type_import,    "import",    6);
1913     U_STRING_INIT(k_type_include,   "include",   7);
1914 
1915     U_STRING_INIT(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
1916     U_STRING_INIT(k_type_plugin_collation,      "process(collation)",        18);
1917     U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)",   23);
1918     U_STRING_INIT(k_type_plugin_dependency,     "process(dependency)",       19);
1919 }
1920 
isTable(enum EResourceType type)1921 static inline UBool isTable(enum EResourceType type) {
1922     return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
1923 }
1924 
1925 static enum EResourceType
parseResourceType(ParseState * state,UErrorCode * status)1926 parseResourceType(ParseState* state, UErrorCode *status)
1927 {
1928     struct UString        *tokenValue;
1929     struct UString        comment;
1930     enum   EResourceType  result = RESTYPE_UNKNOWN;
1931     uint32_t              line=0;
1932     ustr_init(&comment);
1933     expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1934 
1935     if (U_FAILURE(*status))
1936     {
1937         return RESTYPE_UNKNOWN;
1938     }
1939 
1940     *status = U_ZERO_ERROR;
1941 
1942     /* Search for normal types */
1943     result=RESTYPE_UNKNOWN;
1944     while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
1945         if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1946             break;
1947         }
1948     }
1949     /* Now search for the aliases */
1950     if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1951         result = RESTYPE_INTEGER;
1952     }
1953     else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1954         result = RESTYPE_BINARY;
1955     }
1956     else if (result == RESTYPE_RESERVED) {
1957         char tokenBuffer[1024];
1958         u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1959         tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1960         *status = U_INVALID_FORMAT_ERROR;
1961         error(line, "unknown resource type '%s'", tokenBuffer);
1962     }
1963 
1964     return result;
1965 }
1966 
1967 /* parse a non-top-level resource */
1968 static struct SResource *
parseResource(ParseState * state,char * tag,const struct UString * comment,UErrorCode * status)1969 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1970 {
1971     enum   ETokenType      token;
1972     enum   EResourceType  resType = RESTYPE_UNKNOWN;
1973     ParseResourceFunction *parseFunction = NULL;
1974     struct UString        *tokenValue;
1975     uint32_t                 startline;
1976     uint32_t                 line;
1977 
1978 
1979     token = getToken(state, &tokenValue, NULL, &startline, status);
1980 
1981     if(isVerbose()){
1982         printf(" resource %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1983     }
1984 
1985     /* name . [ ':' type ] '{' resource '}' */
1986     /* This function parses from the colon onwards.  If the colon is present, parse the
1987     type then try to parse a resource of that type.  If there is no explicit type,
1988     work it out using the lookahead tokens. */
1989     switch (token)
1990     {
1991     case TOK_EOF:
1992         *status = U_INVALID_FORMAT_ERROR;
1993         error(startline, "Unexpected EOF encountered");
1994         return NULL;
1995 
1996     case TOK_ERROR:
1997         *status = U_INVALID_FORMAT_ERROR;
1998         return NULL;
1999 
2000     case TOK_COLON:
2001         resType = parseResourceType(state, status);
2002         expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
2003 
2004         if (U_FAILURE(*status))
2005         {
2006             return NULL;
2007         }
2008 
2009         break;
2010 
2011     case TOK_OPEN_BRACE:
2012         break;
2013 
2014     default:
2015         *status = U_INVALID_FORMAT_ERROR;
2016         error(startline, "syntax error while reading a resource, expected '{' or ':'");
2017         return NULL;
2018     }
2019 
2020 
2021     if (resType == RESTYPE_UNKNOWN)
2022     {
2023         /* No explicit type, so try to work it out.  At this point, we've read the first '{'.
2024         We could have any of the following:
2025         { {         => array (nested)
2026         { :/}       => array
2027         { string ,  => string array
2028 
2029         { string {  => table
2030 
2031         { string :/{    => table
2032         { string }      => string
2033         */
2034 
2035         token = peekToken(state, 0, NULL, &line, NULL,status);
2036 
2037         if (U_FAILURE(*status))
2038         {
2039             return NULL;
2040         }
2041 
2042         if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
2043         {
2044             resType = RESTYPE_ARRAY;
2045         }
2046         else if (token == TOK_STRING)
2047         {
2048             token = peekToken(state, 1, NULL, &line, NULL, status);
2049 
2050             if (U_FAILURE(*status))
2051             {
2052                 return NULL;
2053             }
2054 
2055             switch (token)
2056             {
2057             case TOK_COMMA:         resType = RESTYPE_ARRAY;  break;
2058             case TOK_OPEN_BRACE:    resType = RESTYPE_TABLE;  break;
2059             case TOK_CLOSE_BRACE:   resType = RESTYPE_STRING; break;
2060             case TOK_COLON:         resType = RESTYPE_TABLE;  break;
2061             default:
2062                 *status = U_INVALID_FORMAT_ERROR;
2063                 error(line, "Unexpected token after string, expected ',', '{' or '}'");
2064                 return NULL;
2065             }
2066         }
2067         else
2068         {
2069             *status = U_INVALID_FORMAT_ERROR;
2070             error(line, "Unexpected token after '{'");
2071             return NULL;
2072         }
2073 
2074         /* printf("Type guessed as %s\n", resourceNames[resType]); */
2075     } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
2076         *status = U_INVALID_FORMAT_ERROR;
2077         error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
2078         return NULL;
2079     }
2080 
2081 
2082     /* We should now know what we need to parse next, so call the appropriate parser
2083     function and return. */
2084     parseFunction = gResourceTypes[resType].parseFunction;
2085     if (parseFunction != NULL) {
2086         return parseFunction(state, tag, startline, comment, status);
2087     }
2088     else {
2089         *status = U_INTERNAL_PROGRAM_ERROR;
2090         error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
2091     }
2092 
2093     return NULL;
2094 }
2095 
2096 /* parse the top-level resource */
2097 struct SRBRoot *
parse(UCHARBUF * buf,const char * inputDir,const char * outputDir,const char * filename,UBool makeBinaryCollation,UBool omitCollationRules,UErrorCode * status)2098 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
2099       UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
2100 {
2101     struct UString    *tokenValue;
2102     struct UString    comment;
2103     uint32_t           line;
2104     enum EResourceType bundleType;
2105     enum ETokenType    token;
2106     ParseState state;
2107     uint32_t i;
2108 
2109 
2110     for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
2111     {
2112         ustr_init(&state.lookahead[i].value);
2113         ustr_init(&state.lookahead[i].comment);
2114     }
2115 
2116     initLookahead(&state, buf, status);
2117 
2118     state.inputdir       = inputDir;
2119     state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
2120     state.outputdir       = outputDir;
2121     state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
2122     state.filename = filename;
2123     state.makeBinaryCollation = makeBinaryCollation;
2124     state.omitCollationRules = omitCollationRules;
2125 
2126     ustr_init(&comment);
2127     expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
2128 
2129     state.bundle = bundle_open(&comment, FALSE, status);
2130 
2131     if (state.bundle == NULL || U_FAILURE(*status))
2132     {
2133         return NULL;
2134     }
2135 
2136 
2137     bundle_setlocale(state.bundle, tokenValue->fChars, status);
2138 
2139     /* The following code is to make Empty bundle work no matter with :table specifer or not */
2140     token = getToken(&state, NULL, NULL, &line, status);
2141     if(token==TOK_COLON) {
2142         *status=U_ZERO_ERROR;
2143         bundleType=parseResourceType(&state, status);
2144 
2145         if(isTable(bundleType))
2146         {
2147             expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2148         }
2149         else
2150         {
2151             *status=U_PARSE_ERROR;
2152              error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2153         }
2154     }
2155     else
2156     {
2157         /* not a colon */
2158         if(token==TOK_OPEN_BRACE)
2159         {
2160             *status=U_ZERO_ERROR;
2161             bundleType=RESTYPE_TABLE;
2162         }
2163         else
2164         {
2165             /* neither colon nor open brace */
2166             *status=U_PARSE_ERROR;
2167             bundleType=RESTYPE_UNKNOWN;
2168             error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2169         }
2170     }
2171 
2172     if (U_FAILURE(*status))
2173     {
2174         bundle_close(state.bundle, status);
2175         return NULL;
2176     }
2177 
2178     if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
2179         /*
2180          * Parse a top-level table with the table(nofallback) declaration.
2181          * This is the same as a regular table, but also sets the
2182          * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2183          */
2184         state.bundle->noFallback=TRUE;
2185     }
2186     /* top-level tables need not handle special table names like "collations" */
2187     realParseTable(&state, state.bundle->fRoot, NULL, line, status);
2188     if(dependencyArray!=NULL){
2189         table_add(state.bundle->fRoot, dependencyArray, 0, status);
2190         dependencyArray = NULL;
2191     }
2192    if (U_FAILURE(*status))
2193     {
2194         bundle_close(state.bundle, status);
2195         res_close(dependencyArray);
2196         return NULL;
2197     }
2198 
2199     if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2200     {
2201         warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2202         if(isStrict()){
2203             *status = U_INVALID_FORMAT_ERROR;
2204             return NULL;
2205         }
2206     }
2207 
2208     cleanupLookahead(&state);
2209     ustr_deinit(&comment);
2210     return state.bundle;
2211 }
2212