• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1998-2008, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File parse.c
10 *
11 * Modification History:
12 *
13 *   Date          Name          Description
14 *   05/26/99     stephen       Creation.
15 *   02/25/00     weiv          Overhaul to write udata
16 *   5/10/01      Ram           removed ustdio dependency
17 *   06/10/2001  Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
19 */
20 
21 #include "ucol_imp.h"
22 #include "parse.h"
23 #include "errmsg.h"
24 #include "uhash.h"
25 #include "cmemory.h"
26 #include "cstring.h"
27 #include "uinvchar.h"
28 #include "read.h"
29 #include "ustr.h"
30 #include "reslist.h"
31 #include "rbt_pars.h"
32 #include "unicode/ustring.h"
33 #include "unicode/putil.h"
34 #include <stdio.h>
35 
36 /* Number of tokens to read ahead of the current stream position */
37 #define MAX_LOOKAHEAD   3
38 
39 #define CR               0x000D
40 #define LF               0x000A
41 #define SPACE            0x0020
42 #define TAB              0x0009
43 #define ESCAPE           0x005C
44 #define HASH             0x0023
45 #define QUOTE            0x0027
46 #define ZERO             0x0030
47 #define STARTCOMMAND     0x005B
48 #define ENDCOMMAND       0x005D
49 #define OPENSQBRACKET    0x005B
50 #define CLOSESQBRACKET   0x005D
51 
52 typedef struct SResource *
53 ParseResourceFunction(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
54 
55 struct Lookahead
56 {
57      enum   ETokenType type;
58      struct UString    value;
59      struct UString    comment;
60      uint32_t          line;
61 };
62 
63 /* keep in sync with token defines in read.h */
64 const char *tokenNames[TOK_TOKEN_COUNT] =
65 {
66      "string",             /* A string token, such as "MonthNames" */
67      "'{'",                 /* An opening brace character */
68      "'}'",                 /* A closing brace character */
69      "','",                 /* A comma */
70      "':'",                 /* A colon */
71 
72      "<end of file>",     /* End of the file has been reached successfully */
73      "<end of line>"
74 };
75 
76 /* Just to store "TRUE" */
77 static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
78 
79 static struct Lookahead  lookahead[MAX_LOOKAHEAD + 1];
80 static uint32_t          lookaheadPosition;
81 static UCHARBUF         *buffer;
82 
83 static struct SRBRoot *bundle;
84 static const char     *inputdir;
85 static uint32_t        inputdirLength;
86 static const char     *outputdir;
87 static uint32_t        outputdirLength;
88 
89 static UBool gMakeBinaryCollation = TRUE;
90 static UBool gOmitCollationRules  = FALSE;
91 
92 static struct SResource *parseResource(char *tag, const struct UString *comment, UErrorCode *status);
93 
94 /* The nature of the lookahead buffer:
95    There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer.  This provides
96    MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
97    When getToken is called, the current pointer is moved to the next slot and the
98    old slot is filled with the next token from the reader by calling getNextToken.
99    The token values are stored in the slot, which means that token values don't
100    survive a call to getToken, ie.
101 
102    UString *value;
103 
104    getToken(&value, NULL, status);
105    getToken(NULL,   NULL, status);       bad - value is now a different string
106 */
107 static void
initLookahead(UCHARBUF * buf,UErrorCode * status)108 initLookahead(UCHARBUF *buf, UErrorCode *status)
109 {
110     static uint32_t initTypeStrings = 0;
111     uint32_t i;
112 
113     if (!initTypeStrings)
114     {
115         initTypeStrings = 1;
116     }
117 
118     lookaheadPosition   = 0;
119     buffer              = buf;
120 
121     resetLineNumber();
122 
123     for (i = 0; i < MAX_LOOKAHEAD; i++)
124     {
125         lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status);
126         if (U_FAILURE(*status))
127         {
128             return;
129         }
130     }
131 
132     *status = U_ZERO_ERROR;
133 }
134 
135 static void
cleanupLookahead()136 cleanupLookahead()
137 {
138     uint32_t i;
139     for (i = 0; i < MAX_LOOKAHEAD; i++)
140     {
141         ustr_deinit(&lookahead[i].value);
142         ustr_deinit(&lookahead[i].comment);
143     }
144 
145 }
146 
147 static enum ETokenType
getToken(struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)148 getToken(struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
149 {
150     enum ETokenType result;
151     uint32_t          i;
152 
153     result = lookahead[lookaheadPosition].type;
154 
155     if (tokenValue != NULL)
156     {
157         *tokenValue = &lookahead[lookaheadPosition].value;
158     }
159 
160     if (linenumber != NULL)
161     {
162         *linenumber = lookahead[lookaheadPosition].line;
163     }
164 
165     if (comment != NULL)
166     {
167         ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status);
168     }
169 
170     i = (lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
171     lookaheadPosition = (lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
172     ustr_setlen(&lookahead[i].comment, 0, status);
173     ustr_setlen(&lookahead[i].value, 0, status);
174     lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status);
175 
176     /* printf("getToken, returning %s\n", tokenNames[result]); */
177 
178     return result;
179 }
180 
181 static enum ETokenType
peekToken(uint32_t lookaheadCount,struct UString ** tokenValue,uint32_t * linenumber,struct UString * comment,UErrorCode * status)182 peekToken(uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
183 {
184     uint32_t i = (lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
185 
186     if (U_FAILURE(*status))
187     {
188         return TOK_ERROR;
189     }
190 
191     if (lookaheadCount >= MAX_LOOKAHEAD)
192     {
193         *status = U_INTERNAL_PROGRAM_ERROR;
194         return TOK_ERROR;
195     }
196 
197     if (tokenValue != NULL)
198     {
199         *tokenValue = &lookahead[i].value;
200     }
201 
202     if (linenumber != NULL)
203     {
204         *linenumber = lookahead[i].line;
205     }
206 
207     if(comment != NULL){
208         ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status);
209     }
210 
211     return lookahead[i].type;
212 }
213 
214 static void
expect(enum ETokenType expectedToken,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)215 expect(enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
216 {
217     uint32_t        line;
218 
219     enum ETokenType token = getToken(tokenValue, comment, &line, status);
220 
221     if (linenumber != NULL)
222     {
223         *linenumber = line;
224     }
225 
226     if (U_FAILURE(*status))
227     {
228         return;
229     }
230 
231     if (token != expectedToken)
232     {
233         *status = U_INVALID_FORMAT_ERROR;
234         error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
235     }
236     else
237     {
238         *status = U_ZERO_ERROR;
239     }
240 }
241 
getInvariantString(uint32_t * line,struct UString * comment,UErrorCode * status)242 static char *getInvariantString(uint32_t *line, struct UString *comment, UErrorCode *status)
243 {
244     struct UString *tokenValue;
245     char           *result;
246     uint32_t        count;
247 
248     expect(TOK_STRING, &tokenValue, comment, line, status);
249 
250     if (U_FAILURE(*status))
251     {
252         return NULL;
253     }
254 
255     count = u_strlen(tokenValue->fChars);
256     if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
257         *status = U_INVALID_FORMAT_ERROR;
258         error(*line, "invariant characters required for table keys, binary data, etc.");
259         return NULL;
260     }
261 
262     result = uprv_malloc(count+1);
263 
264     if (result == NULL)
265     {
266         *status = U_MEMORY_ALLOCATION_ERROR;
267         return NULL;
268     }
269 
270     u_UCharsToChars(tokenValue->fChars, result, count+1);
271     return result;
272 }
273 
274 static struct SResource *
parseUCARules(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)275 parseUCARules(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
276 {
277     struct SResource *result = NULL;
278     struct UString   *tokenValue;
279     FileStream       *file          = NULL;
280     char              filename[256] = { '\0' };
281     char              cs[128]       = { '\0' };
282     uint32_t          line;
283     int               len=0;
284     UBool quoted = FALSE;
285     UCHARBUF *ucbuf=NULL;
286     UChar32   c     = 0;
287     const char* cp  = NULL;
288     UChar *pTarget     = NULL;
289     UChar *target      = NULL;
290     UChar *targetLimit = NULL;
291     int32_t size = 0;
292 
293     expect(TOK_STRING, &tokenValue, NULL, &line, status);
294 
295     if(isVerbose()){
296         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
297     }
298 
299     if (U_FAILURE(*status))
300     {
301         return NULL;
302     }
303     /* make the filename including the directory */
304     if (inputdir != NULL)
305     {
306         uprv_strcat(filename, inputdir);
307 
308         if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
309         {
310             uprv_strcat(filename, U_FILE_SEP_STRING);
311         }
312     }
313 
314     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
315 
316     expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
317 
318     if (U_FAILURE(*status))
319     {
320         return NULL;
321     }
322     uprv_strcat(filename, cs);
323 
324     if(gOmitCollationRules) {
325         return res_none();
326     }
327 
328     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
329 
330     if (U_FAILURE(*status)) {
331         error(line, "An error occured while opening the input file %s\n", filename);
332         return NULL;
333     }
334 
335     /* We allocate more space than actually required
336     * since the actual size needed for storing UChars
337     * is not known in UTF-8 byte stream
338     */
339     size        = ucbuf_size(ucbuf) + 1;
340     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
341     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
342     target      = pTarget;
343     targetLimit = pTarget+size;
344 
345     /* read the rules into the buffer */
346     while (target < targetLimit)
347     {
348         c = ucbuf_getc(ucbuf, status);
349         if(c == QUOTE) {
350             quoted = (UBool)!quoted;
351         }
352         /* weiv (06/26/2002): adding the following:
353          * - preserving spaces in commands [...]
354          * - # comments until the end of line
355          */
356         if (c == STARTCOMMAND && !quoted)
357         {
358             /* preserve commands
359              * closing bracket will be handled by the
360              * append at the end of the loop
361              */
362             while(c != ENDCOMMAND) {
363                 U_APPEND_CHAR32(c, target,len);
364                 c = ucbuf_getc(ucbuf, status);
365             }
366         }
367         else if (c == HASH && !quoted) {
368             /* skip comments */
369             while(c != CR && c != LF) {
370                 c = ucbuf_getc(ucbuf, status);
371             }
372             continue;
373         }
374         else if (c == ESCAPE)
375         {
376             c = unescape(ucbuf, status);
377 
378             if (c == U_ERR)
379             {
380                 uprv_free(pTarget);
381                 T_FileStream_close(file);
382                 return NULL;
383             }
384         }
385         else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
386         {
387             /* ignore spaces carriage returns
388             * and line feed unless in the form \uXXXX
389             */
390             continue;
391         }
392 
393         /* Append UChar * after dissembling if c > 0xffff*/
394         if (c != U_EOF)
395         {
396             U_APPEND_CHAR32(c, target,len);
397         }
398         else
399         {
400             break;
401         }
402     }
403 
404     /* terminate the string */
405     if(target < targetLimit){
406         *target = 0x0000;
407     }
408 
409     result = string_open(bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
410 
411 
412     ucbuf_close(ucbuf);
413     uprv_free(pTarget);
414     T_FileStream_close(file);
415 
416     return result;
417 }
418 
419 static struct SResource *
parseTransliterator(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)420 parseTransliterator(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
421 {
422     struct SResource *result = NULL;
423     struct UString   *tokenValue;
424     FileStream       *file          = NULL;
425     char              filename[256] = { '\0' };
426     char              cs[128]       = { '\0' };
427     uint32_t          line;
428     UCHARBUF *ucbuf=NULL;
429     const char* cp  = NULL;
430     UChar *pTarget     = NULL;
431     const UChar *pSource     = NULL;
432     int32_t size = 0;
433 
434     expect(TOK_STRING, &tokenValue, NULL, &line, status);
435 
436     if(isVerbose()){
437         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
438     }
439 
440     if (U_FAILURE(*status))
441     {
442         return NULL;
443     }
444     /* make the filename including the directory */
445     if (inputdir != NULL)
446     {
447         uprv_strcat(filename, inputdir);
448 
449         if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
450         {
451             uprv_strcat(filename, U_FILE_SEP_STRING);
452         }
453     }
454 
455     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
456 
457     expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
458 
459     if (U_FAILURE(*status))
460     {
461         return NULL;
462     }
463     uprv_strcat(filename, cs);
464 
465 
466     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
467 
468     if (U_FAILURE(*status)) {
469         error(line, "An error occured while opening the input file %s\n", filename);
470         return NULL;
471     }
472 
473     /* We allocate more space than actually required
474     * since the actual size needed for storing UChars
475     * is not known in UTF-8 byte stream
476     */
477     pSource = ucbuf_getBuffer(ucbuf, &size, status);
478     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
479     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
480 
481 #if !UCONFIG_NO_TRANSLITERATION
482     size = utrans_stripRules(pSource, size, pTarget, status);
483 #else
484     size = 0;
485     fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
486 #endif
487     result = string_open(bundle, tag, pTarget, size, NULL, status);
488 
489     ucbuf_close(ucbuf);
490     uprv_free(pTarget);
491     T_FileStream_close(file);
492 
493     return result;
494 }
495 static struct SResource* dependencyArray = NULL;
496 
497 static struct SResource *
parseDependency(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)498 parseDependency(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
499 {
500     struct SResource *result = NULL;
501     struct SResource *elem = NULL;
502     struct UString   *tokenValue;
503     uint32_t          line;
504     char              filename[256] = { '\0' };
505     char              cs[128]       = { '\0' };
506 
507     expect(TOK_STRING, &tokenValue, NULL, &line, status);
508 
509     if(isVerbose()){
510         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
511     }
512 
513     if (U_FAILURE(*status))
514     {
515         return NULL;
516     }
517     /* make the filename including the directory */
518     if (outputdir != NULL)
519     {
520         uprv_strcat(filename, outputdir);
521 
522         if (outputdir[outputdirLength - 1] != U_FILE_SEP_CHAR)
523         {
524             uprv_strcat(filename, U_FILE_SEP_STRING);
525         }
526     }
527 
528     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
529 
530     if (U_FAILURE(*status))
531     {
532         return NULL;
533     }
534     uprv_strcat(filename, cs);
535     if(!T_FileStream_file_exists(filename)){
536         if(isStrict()){
537             error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
538         }else{
539             warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
540         }
541     }
542     if(dependencyArray==NULL){
543         dependencyArray = array_open(bundle, "%%DEPENDENCY", NULL, status);
544     }
545     if(tag!=NULL){
546         result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
547     }
548     elem = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
549 
550     array_add(dependencyArray, elem, status);
551 
552     if (U_FAILURE(*status))
553     {
554         return NULL;
555     }
556     expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
557     return result;
558 }
559 static struct SResource *
parseString(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)560 parseString(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
561 {
562     struct UString   *tokenValue;
563     struct SResource *result = NULL;
564 
565 /*    if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
566     {
567         return parseUCARules(tag, startline, status);
568     }*/
569     if(isVerbose()){
570         printf(" string %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
571     }
572     expect(TOK_STRING, &tokenValue, NULL, NULL, status);
573 
574     if (U_SUCCESS(*status))
575     {
576         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
577         doesn't survive expect either) */
578 
579         result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
580         if(U_SUCCESS(*status) && result) {
581             expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
582 
583             if (U_FAILURE(*status))
584             {
585                 res_close(result);
586                 return NULL;
587             }
588         }
589     }
590 
591     return result;
592 }
593 
594 static struct SResource *
parseAlias(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)595 parseAlias(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
596 {
597     struct UString   *tokenValue;
598     struct SResource *result  = NULL;
599 
600     expect(TOK_STRING, &tokenValue, NULL, NULL, status);
601 
602     if(isVerbose()){
603         printf(" alias %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
604     }
605 
606     if (U_SUCCESS(*status))
607     {
608         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
609         doesn't survive expect either) */
610 
611         result = alias_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
612 
613         expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
614 
615         if (U_FAILURE(*status))
616         {
617             res_close(result);
618             return NULL;
619         }
620     }
621 
622     return result;
623 }
624 
625 static struct SResource *
addCollation(struct SResource * result,uint32_t startline,UErrorCode * status)626 addCollation(struct SResource  *result, uint32_t startline, UErrorCode *status)
627 {
628     struct SResource  *member = NULL;
629     struct UString    *tokenValue;
630     struct UString     comment;
631     enum   ETokenType  token;
632     char               subtag[1024];
633     UVersionInfo       version;
634     UBool              override = FALSE;
635     uint32_t           line;
636     /* '{' . (name resource)* '}' */
637     version[0]=0; version[1]=0; version[2]=0; version[3]=0;
638 
639     for (;;)
640     {
641         ustr_init(&comment);
642         token = getToken(&tokenValue, &comment, &line, status);
643 
644         if (token == TOK_CLOSE_BRACE)
645         {
646             return result;
647         }
648 
649         if (token != TOK_STRING)
650         {
651             res_close(result);
652             *status = U_INVALID_FORMAT_ERROR;
653 
654             if (token == TOK_EOF)
655             {
656                 error(startline, "unterminated table");
657             }
658             else
659             {
660                 error(line, "Unexpected token %s", tokenNames[token]);
661             }
662 
663             return NULL;
664         }
665 
666         u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
667 
668         if (U_FAILURE(*status))
669         {
670             res_close(result);
671             return NULL;
672         }
673 
674         member = parseResource(subtag, NULL, status);
675 
676         if (U_FAILURE(*status))
677         {
678             res_close(result);
679             return NULL;
680         }
681 
682         if (uprv_strcmp(subtag, "Version") == 0)
683         {
684             char     ver[40];
685             int32_t length = member->u.fString.fLength;
686 
687             if (length >= (int32_t) sizeof(ver))
688             {
689                 length = (int32_t) sizeof(ver) - 1;
690             }
691 
692             u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
693             u_versionFromString(version, ver);
694 
695             table_add(result, member, line, status);
696 
697         }
698         else if (uprv_strcmp(subtag, "Override") == 0)
699         {
700             override = FALSE;
701 
702             if (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0)
703             {
704                 override = TRUE;
705             }
706             table_add(result, member, line, status);
707 
708         }
709         else if(uprv_strcmp(subtag, "%%CollationBin")==0)
710         {
711             /* discard duplicate %%CollationBin if any*/
712         }
713         else if (uprv_strcmp(subtag, "Sequence") == 0)
714         {
715 #if UCONFIG_NO_COLLATION
716             warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION, see uconfig.h");
717 #else
718             /* in order to achieve smaller data files, we can direct genrb */
719             /* to omit collation rules */
720             if(!gOmitCollationRules) {
721               /* first we add the "Sequence", so that we always have rules */
722               table_add(result, member, line, status);
723             }
724             if(gMakeBinaryCollation) {
725                 UErrorCode intStatus = U_ZERO_ERROR;
726 
727                 /* do the collation elements */
728                 int32_t     len   = 0;
729                 uint8_t   *data  = NULL;
730                 UCollator *coll  = NULL;
731                 UParseError parseError;
732                 /* add sequence */
733                 /*table_add(result, member, line, status);*/
734 
735                 coll = ucol_openRules(member->u.fString.fChars, member->u.fString.fLength,
736                     UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, &intStatus);
737 
738                 if (U_SUCCESS(intStatus) && coll != NULL)
739                 {
740                     len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
741                     data = (uint8_t *)uprv_malloc(len);
742                     intStatus = U_ZERO_ERROR;
743                     len = ucol_cloneBinary(coll, data, len, &intStatus);
744                     /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
745 
746                     /* tailoring rules version */
747                     /* This is wrong! */
748                     /*coll->dataInfo.dataVersion[1] = version[0];*/
749                     /* Copy tailoring version. Builder version already */
750                     /* set in ucol_openRules */
751                     ((UCATableHeader *)data)->version[1] = version[0];
752                     ((UCATableHeader *)data)->version[2] = version[1];
753                     ((UCATableHeader *)data)->version[3] = version[2];
754 
755                     if (U_SUCCESS(intStatus) && data != NULL)
756                     {
757                         member = bin_open(bundle, "%%CollationBin", len, data, NULL, NULL, status);
758                         /*table_add(bundle->fRoot, member, line, status);*/
759                         table_add(result, member, line, status);
760                         uprv_free(data);
761                     }
762                     else
763                     {
764                         warning(line, "could not obtain rules from collator");
765                         if(isStrict()){
766                             *status = U_INVALID_FORMAT_ERROR;
767                             return NULL;
768                         }
769                     }
770 
771                     ucol_close(coll);
772                 }
773                 else
774                 {
775                     warning(line, "%%Collation could not be constructed from CollationElements - check context!");
776                     if(isStrict()){
777                         *status = intStatus;
778                         return NULL;
779                     }
780                 }
781             } else {
782                 if(isVerbose()) {
783                     printf("Not building Collation binary\n");
784                 }
785             }
786 #endif
787         }
788 
789         /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
790 
791         /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
792 
793         if (U_FAILURE(*status))
794         {
795             res_close(result);
796             return NULL;
797         }
798     }
799 
800     /* not reached */
801     /* A compiler warning will appear if all paths don't contain a return statement. */
802 /*    *status = U_INTERNAL_PROGRAM_ERROR;
803     return NULL;*/
804 }
805 
806 static struct SResource *
parseCollationElements(char * tag,uint32_t startline,UBool newCollation,UErrorCode * status)807 parseCollationElements(char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
808 {
809     struct SResource  *result = NULL;
810     struct SResource  *member = NULL;
811     struct SResource  *collationRes = NULL;
812     struct UString    *tokenValue;
813     struct UString     comment;
814     enum   ETokenType  token;
815     char               subtag[1024], typeKeyword[1024];
816     uint32_t           line;
817 
818     result = table_open(bundle, tag, NULL, status);
819 
820     if (result == NULL || U_FAILURE(*status))
821     {
822         return NULL;
823     }
824     if(isVerbose()){
825         printf(" collation elements %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
826     }
827     if(!newCollation) {
828         return addCollation(result, startline, status);
829     }
830     else {
831         for(;;) {
832             ustr_init(&comment);
833             token = getToken(&tokenValue, &comment, &line, status);
834 
835             if (token == TOK_CLOSE_BRACE)
836             {
837                 return result;
838             }
839 
840             if (token != TOK_STRING)
841             {
842                 res_close(result);
843                 *status = U_INVALID_FORMAT_ERROR;
844 
845                 if (token == TOK_EOF)
846                 {
847                     error(startline, "unterminated table");
848                 }
849                 else
850                 {
851                     error(line, "Unexpected token %s", tokenNames[token]);
852                 }
853 
854                 return NULL;
855             }
856 
857             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
858 
859             if (U_FAILURE(*status))
860             {
861                 res_close(result);
862                 return NULL;
863             }
864 
865             if (uprv_strcmp(subtag, "default") == 0)
866             {
867                 member = parseResource(subtag, NULL, status);
868 
869                 if (U_FAILURE(*status))
870                 {
871                     res_close(result);
872                     return NULL;
873                 }
874 
875                 table_add(result, member, line, status);
876             }
877             else
878             {
879                 token = peekToken(0, &tokenValue, &line, &comment, status);
880                 /* this probably needs to be refactored or recursively use the parser */
881                 /* first we assume that our collation table won't have the explicit type */
882                 /* then, we cannot handle aliases */
883                 if(token == TOK_OPEN_BRACE) {
884                     token = getToken(&tokenValue, &comment, &line, status);
885                     collationRes = table_open(bundle, subtag, NULL, status);
886                     table_add(result, addCollation(collationRes, startline, status), startline, status);
887                 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
888                     /* we could have a table too */
889                     token = peekToken(1, &tokenValue, &line, &comment, status);
890                     u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
891                     if(uprv_strcmp(typeKeyword, "alias") == 0) {
892                         member = parseResource(subtag, NULL, status);
893 
894                         if (U_FAILURE(*status))
895                         {
896                             res_close(result);
897                             return NULL;
898                         }
899 
900                         table_add(result, member, line, status);
901                     } else {
902                         res_close(result);
903                         *status = U_INVALID_FORMAT_ERROR;
904                         return NULL;
905                     }
906                 } else {
907                     res_close(result);
908                     *status = U_INVALID_FORMAT_ERROR;
909                     return NULL;
910                 }
911             }
912 
913             /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
914 
915             /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
916 
917             if (U_FAILURE(*status))
918             {
919                 res_close(result);
920                 return NULL;
921             }
922         }
923     }
924 }
925 
926 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
927    if this weren't special-cased, wouldn't be set until the entire file had been processed. */
928 static struct SResource *
realParseTable(struct SResource * table,char * tag,uint32_t startline,UErrorCode * status)929 realParseTable(struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
930 {
931     struct SResource  *member = NULL;
932     struct UString    *tokenValue=NULL;
933     struct UString    comment;
934     enum   ETokenType token;
935     char              subtag[1024];
936     uint32_t          line;
937     UBool             readToken = FALSE;
938 
939     /* '{' . (name resource)* '}' */
940     if(isVerbose()){
941         printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
942     }
943     for (;;)
944     {
945         ustr_init(&comment);
946         token = getToken(&tokenValue, &comment, &line, status);
947 
948         if (token == TOK_CLOSE_BRACE)
949         {
950             if (!readToken) {
951                 warning(startline, "Encountered empty table");
952             }
953             return table;
954         }
955 
956         if (token != TOK_STRING)
957         {
958             *status = U_INVALID_FORMAT_ERROR;
959 
960             if (token == TOK_EOF)
961             {
962                 error(startline, "unterminated table");
963             }
964             else
965             {
966                 error(line, "unexpected token %s", tokenNames[token]);
967             }
968 
969             return NULL;
970         }
971 
972         if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
973             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
974         } else {
975             *status = U_INVALID_FORMAT_ERROR;
976             error(line, "invariant characters required for table keys");
977             return NULL;
978         }
979 
980         if (U_FAILURE(*status))
981         {
982             error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
983             return NULL;
984         }
985 
986         member = parseResource(subtag, &comment, status);
987 
988         if (member == NULL || U_FAILURE(*status))
989         {
990             error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
991             return NULL;
992         }
993 
994         table_add(table, member, line, status);
995 
996         if (U_FAILURE(*status))
997         {
998             error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
999             return NULL;
1000         }
1001         readToken = TRUE;
1002         ustr_deinit(&comment);
1003     }
1004 
1005     /* not reached */
1006     /* A compiler warning will appear if all paths don't contain a return statement. */
1007 /*     *status = U_INTERNAL_PROGRAM_ERROR;
1008      return NULL;*/
1009 }
1010 
1011 static struct SResource *
parseTable(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1012 parseTable(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1013 {
1014     struct SResource *result;
1015 
1016     if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1017     {
1018         return parseCollationElements(tag, startline, FALSE, status);
1019     }
1020     if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1021     {
1022         return parseCollationElements(tag, startline, TRUE, status);
1023     }
1024     if(isVerbose()){
1025         printf(" table %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1026     }
1027 
1028     result = table_open(bundle, tag, comment, status);
1029 
1030     if (result == NULL || U_FAILURE(*status))
1031     {
1032         return NULL;
1033     }
1034 
1035     return realParseTable(result, tag, startline,  status);
1036 }
1037 
1038 static struct SResource *
parseArray(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1039 parseArray(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1040 {
1041     struct SResource  *result = NULL;
1042     struct SResource  *member = NULL;
1043     struct UString    *tokenValue;
1044     struct UString    memberComments;
1045     enum   ETokenType token;
1046     UBool             readToken = FALSE;
1047 
1048     result = array_open(bundle, tag, comment, status);
1049 
1050     if (result == NULL || U_FAILURE(*status))
1051     {
1052         return NULL;
1053     }
1054     if(isVerbose()){
1055         printf(" array %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1056     }
1057 
1058     ustr_init(&memberComments);
1059 
1060     /* '{' . resource [','] '}' */
1061     for (;;)
1062     {
1063         /* reset length */
1064         ustr_setlen(&memberComments, 0, status);
1065 
1066         /* check for end of array, but don't consume next token unless it really is the end */
1067         token = peekToken(0, &tokenValue, NULL, &memberComments, status);
1068 
1069 
1070         if (token == TOK_CLOSE_BRACE)
1071         {
1072             getToken(NULL, NULL, NULL, status);
1073             if (!readToken) {
1074                 warning(startline, "Encountered empty array");
1075             }
1076             break;
1077         }
1078 
1079         if (token == TOK_EOF)
1080         {
1081             res_close(result);
1082             *status = U_INVALID_FORMAT_ERROR;
1083             error(startline, "unterminated array");
1084             return NULL;
1085         }
1086 
1087         /* string arrays are a special case */
1088         if (token == TOK_STRING)
1089         {
1090             getToken(&tokenValue, &memberComments, NULL, status);
1091             member = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1092         }
1093         else
1094         {
1095             member = parseResource(NULL, &memberComments, status);
1096         }
1097 
1098         if (member == NULL || U_FAILURE(*status))
1099         {
1100             res_close(result);
1101             return NULL;
1102         }
1103 
1104         array_add(result, member, status);
1105 
1106         if (U_FAILURE(*status))
1107         {
1108             res_close(result);
1109             return NULL;
1110         }
1111 
1112         /* eat optional comma if present */
1113         token = peekToken(0, NULL, NULL, NULL, status);
1114 
1115         if (token == TOK_COMMA)
1116         {
1117             getToken(NULL, NULL, NULL, status);
1118         }
1119 
1120         if (U_FAILURE(*status))
1121         {
1122             res_close(result);
1123             return NULL;
1124         }
1125         readToken = TRUE;
1126     }
1127 
1128     ustr_deinit(&memberComments);
1129     return result;
1130 }
1131 
1132 static struct SResource *
parseIntVector(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1133 parseIntVector(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1134 {
1135     struct SResource  *result = NULL;
1136     enum   ETokenType  token;
1137     char              *string;
1138     int32_t            value;
1139     UBool              readToken = FALSE;
1140     char              *stopstring;
1141     uint32_t           len;
1142     struct UString     memberComments;
1143 
1144     result = intvector_open(bundle, tag, comment, status);
1145 
1146     if (result == NULL || U_FAILURE(*status))
1147     {
1148         return NULL;
1149     }
1150 
1151     if(isVerbose()){
1152         printf(" vector %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1153     }
1154     ustr_init(&memberComments);
1155     /* '{' . string [','] '}' */
1156     for (;;)
1157     {
1158         ustr_setlen(&memberComments, 0, status);
1159 
1160         /* check for end of array, but don't consume next token unless it really is the end */
1161         token = peekToken(0, NULL, NULL,&memberComments, status);
1162 
1163         if (token == TOK_CLOSE_BRACE)
1164         {
1165             /* it's the end, consume the close brace */
1166             getToken(NULL, NULL, NULL, status);
1167             if (!readToken) {
1168                 warning(startline, "Encountered empty int vector");
1169             }
1170             ustr_deinit(&memberComments);
1171             return result;
1172         }
1173 
1174         string = getInvariantString(NULL, NULL, status);
1175 
1176         if (U_FAILURE(*status))
1177         {
1178             res_close(result);
1179             return NULL;
1180         }
1181 
1182         /* For handling illegal char in the Intvector */
1183         value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1184         len=(uint32_t)(stopstring-string);
1185 
1186         if(len==uprv_strlen(string))
1187         {
1188             intvector_add(result, value, status);
1189             uprv_free(string);
1190             token = peekToken(0, NULL, NULL, NULL, status);
1191         }
1192         else
1193         {
1194             uprv_free(string);
1195             *status=U_INVALID_CHAR_FOUND;
1196         }
1197 
1198         if (U_FAILURE(*status))
1199         {
1200             res_close(result);
1201             return NULL;
1202         }
1203 
1204         /* the comma is optional (even though it is required to prevent the reader from concatenating
1205         consecutive entries) so that a missing comma on the last entry isn't an error */
1206         if (token == TOK_COMMA)
1207         {
1208             getToken(NULL, NULL, NULL, status);
1209         }
1210         readToken = TRUE;
1211     }
1212 
1213     /* not reached */
1214     /* A compiler warning will appear if all paths don't contain a return statement. */
1215 /*    intvector_close(result, status);
1216     *status = U_INTERNAL_PROGRAM_ERROR;
1217     return NULL;*/
1218 }
1219 
1220 static struct SResource *
parseBinary(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1221 parseBinary(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1222 {
1223     struct SResource *result = NULL;
1224     uint8_t          *value;
1225     char             *string;
1226     char              toConv[3] = {'\0', '\0', '\0'};
1227     uint32_t          count;
1228     uint32_t          i;
1229     uint32_t          line;
1230     char             *stopstring;
1231     uint32_t          len;
1232 
1233     string = getInvariantString(&line, NULL, status);
1234 
1235     if (string == NULL || U_FAILURE(*status))
1236     {
1237         return NULL;
1238     }
1239 
1240     expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1241 
1242     if (U_FAILURE(*status))
1243     {
1244         uprv_free(string);
1245         return NULL;
1246     }
1247 
1248     if(isVerbose()){
1249         printf(" binary %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1250     }
1251 
1252     count = (uint32_t)uprv_strlen(string);
1253     if (count > 0){
1254         if((count % 2)==0){
1255             value = uprv_malloc(sizeof(uint8_t) * count);
1256 
1257             if (value == NULL)
1258             {
1259                 uprv_free(string);
1260                 *status = U_MEMORY_ALLOCATION_ERROR;
1261                 return NULL;
1262             }
1263 
1264             for (i = 0; i < count; i += 2)
1265             {
1266                 toConv[0] = string[i];
1267                 toConv[1] = string[i + 1];
1268 
1269                 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1270                 len=(uint32_t)(stopstring-toConv);
1271 
1272                 if(len!=uprv_strlen(toConv))
1273                 {
1274                     uprv_free(string);
1275                     *status=U_INVALID_CHAR_FOUND;
1276                     return NULL;
1277                 }
1278             }
1279 
1280             result = bin_open(bundle, tag, (i >> 1), value,NULL, comment, status);
1281 
1282             uprv_free(value);
1283         }
1284         else
1285         {
1286             *status = U_INVALID_CHAR_FOUND;
1287             uprv_free(string);
1288             error(line, "Encountered invalid binary string");
1289             return NULL;
1290         }
1291     }
1292     else
1293     {
1294         result = bin_open(bundle, tag, 0, NULL, "",comment,status);
1295         warning(startline, "Encountered empty binary tag");
1296     }
1297     uprv_free(string);
1298 
1299     return result;
1300 }
1301 
1302 static struct SResource *
parseInteger(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1303 parseInteger(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1304 {
1305     struct SResource *result = NULL;
1306     int32_t           value;
1307     char             *string;
1308     char             *stopstring;
1309     uint32_t          len;
1310 
1311     string = getInvariantString(NULL, NULL, status);
1312 
1313     if (string == NULL || U_FAILURE(*status))
1314     {
1315         return NULL;
1316     }
1317 
1318     expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1319 
1320     if (U_FAILURE(*status))
1321     {
1322         uprv_free(string);
1323         return NULL;
1324     }
1325 
1326     if(isVerbose()){
1327         printf(" integer %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1328     }
1329 
1330     if (uprv_strlen(string) <= 0)
1331     {
1332         warning(startline, "Encountered empty integer. Default value is 0.");
1333     }
1334 
1335     /* Allow integer support for hexdecimal, octal digit and decimal*/
1336     /* and handle illegal char in the integer*/
1337     value = uprv_strtoul(string, &stopstring, 0);
1338     len=(uint32_t)(stopstring-string);
1339     if(len==uprv_strlen(string))
1340     {
1341         result = int_open(bundle, tag, value, comment, status);
1342     }
1343     else
1344     {
1345         *status=U_INVALID_CHAR_FOUND;
1346     }
1347     uprv_free(string);
1348 
1349     return result;
1350 }
1351 
1352 static struct SResource *
parseImport(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1353 parseImport(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1354 {
1355     struct SResource *result;
1356     FileStream       *file;
1357     int32_t           len;
1358     uint8_t          *data;
1359     char             *filename;
1360     uint32_t          line;
1361     char     *fullname = NULL;
1362     int32_t numRead = 0;
1363     filename = getInvariantString(&line, NULL, status);
1364 
1365     if (U_FAILURE(*status))
1366     {
1367         return NULL;
1368     }
1369 
1370     expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1371 
1372     if (U_FAILURE(*status))
1373     {
1374         uprv_free(filename);
1375         return NULL;
1376     }
1377 
1378     if(isVerbose()){
1379         printf(" import %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1380     }
1381 
1382     /* Open the input file for reading */
1383     if (inputdir == NULL)
1384     {
1385 #if 1
1386         /*
1387          * Always save file file name, even if there's
1388          * no input directory specified. MIGHT BREAK SOMETHING
1389          */
1390         int32_t filenameLength = uprv_strlen(filename);
1391 
1392         fullname = (char *) uprv_malloc(filenameLength + 1);
1393         uprv_strcpy(fullname, filename);
1394 #endif
1395 
1396         file = T_FileStream_open(filename, "rb");
1397     }
1398     else
1399     {
1400 
1401         int32_t  count     = (int32_t)uprv_strlen(filename);
1402 
1403         if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
1404         {
1405             fullname = (char *) uprv_malloc(inputdirLength + count + 2);
1406 
1407             /* test for NULL */
1408             if(fullname == NULL)
1409             {
1410                 *status = U_MEMORY_ALLOCATION_ERROR;
1411                 return NULL;
1412             }
1413 
1414             uprv_strcpy(fullname, inputdir);
1415 
1416             fullname[inputdirLength]      = U_FILE_SEP_CHAR;
1417             fullname[inputdirLength + 1] = '\0';
1418 
1419             uprv_strcat(fullname, filename);
1420         }
1421         else
1422         {
1423             fullname = (char *) uprv_malloc(inputdirLength + count + 1);
1424 
1425             /* test for NULL */
1426             if(fullname == NULL)
1427             {
1428                 *status = U_MEMORY_ALLOCATION_ERROR;
1429                 return NULL;
1430             }
1431 
1432             uprv_strcpy(fullname, inputdir);
1433             uprv_strcat(fullname, filename);
1434         }
1435 
1436         file = T_FileStream_open(fullname, "rb");
1437 
1438     }
1439 
1440     if (file == NULL)
1441     {
1442         error(line, "couldn't open input file %s", filename);
1443         *status = U_FILE_ACCESS_ERROR;
1444         return NULL;
1445     }
1446 
1447     len  = T_FileStream_size(file);
1448     data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1449     /* test for NULL */
1450     if(data == NULL)
1451     {
1452         *status = U_MEMORY_ALLOCATION_ERROR;
1453         T_FileStream_close (file);
1454         return NULL;
1455     }
1456 
1457     numRead = T_FileStream_read  (file, data, len);
1458     T_FileStream_close (file);
1459 
1460     result = bin_open(bundle, tag, len, data, fullname, comment, status);
1461 
1462     uprv_free(data);
1463     uprv_free(filename);
1464     uprv_free(fullname);
1465 
1466     return result;
1467 }
1468 
1469 static struct SResource *
parseInclude(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1470 parseInclude(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1471 {
1472     struct SResource *result;
1473     int32_t           len=0;
1474     char             *filename;
1475     uint32_t          line;
1476     UChar *pTarget     = NULL;
1477 
1478     UCHARBUF *ucbuf;
1479     char     *fullname = NULL;
1480     int32_t  count     = 0;
1481     const char* cp = NULL;
1482     const UChar* uBuffer = NULL;
1483 
1484     filename = getInvariantString(&line, NULL, status);
1485     count     = (int32_t)uprv_strlen(filename);
1486 
1487     if (U_FAILURE(*status))
1488     {
1489         return NULL;
1490     }
1491 
1492     expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1493 
1494     if (U_FAILURE(*status))
1495     {
1496         uprv_free(filename);
1497         return NULL;
1498     }
1499 
1500     if(isVerbose()){
1501         printf(" include %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1502     }
1503 
1504     fullname = (char *) uprv_malloc(inputdirLength + count + 2);
1505     /* test for NULL */
1506     if(fullname == NULL)
1507     {
1508         *status = U_MEMORY_ALLOCATION_ERROR;
1509         uprv_free(filename);
1510         return NULL;
1511     }
1512 
1513     if(inputdir!=NULL){
1514         if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
1515         {
1516 
1517             uprv_strcpy(fullname, inputdir);
1518 
1519             fullname[inputdirLength]      = U_FILE_SEP_CHAR;
1520             fullname[inputdirLength + 1] = '\0';
1521 
1522             uprv_strcat(fullname, filename);
1523         }
1524         else
1525         {
1526             uprv_strcpy(fullname, inputdir);
1527             uprv_strcat(fullname, filename);
1528         }
1529     }else{
1530         uprv_strcpy(fullname,filename);
1531     }
1532 
1533     ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1534 
1535     if (U_FAILURE(*status)) {
1536         error(line, "couldn't open input file %s\n", filename);
1537         return NULL;
1538     }
1539 
1540     uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1541     result = string_open(bundle, tag, uBuffer, len, comment, status);
1542 
1543     uprv_free(pTarget);
1544 
1545     uprv_free(filename);
1546     uprv_free(fullname);
1547 
1548     return result;
1549 }
1550 
1551 
1552 
1553 
1554 
1555 U_STRING_DECL(k_type_string,    "string",    6);
1556 U_STRING_DECL(k_type_binary,    "binary",    6);
1557 U_STRING_DECL(k_type_bin,       "bin",       3);
1558 U_STRING_DECL(k_type_table,     "table",     5);
1559 U_STRING_DECL(k_type_table_no_fallback,     "table(nofallback)",         17);
1560 U_STRING_DECL(k_type_int,       "int",       3);
1561 U_STRING_DECL(k_type_integer,   "integer",   7);
1562 U_STRING_DECL(k_type_array,     "array",     5);
1563 U_STRING_DECL(k_type_alias,     "alias",     5);
1564 U_STRING_DECL(k_type_intvector, "intvector", 9);
1565 U_STRING_DECL(k_type_import,    "import",    6);
1566 U_STRING_DECL(k_type_include,   "include",   7);
1567 U_STRING_DECL(k_type_reserved,  "reserved",  8);
1568 
1569 /* Various non-standard processing plugins that create one or more special resources. */
1570 U_STRING_DECL(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
1571 U_STRING_DECL(k_type_plugin_collation,      "process(collation)",        18);
1572 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)",   23);
1573 U_STRING_DECL(k_type_plugin_dependency,     "process(dependency)",       19);
1574 
1575 typedef enum EResourceType
1576 {
1577     RT_UNKNOWN,
1578     RT_STRING,
1579     RT_BINARY,
1580     RT_TABLE,
1581     RT_TABLE_NO_FALLBACK,
1582     RT_INTEGER,
1583     RT_ARRAY,
1584     RT_ALIAS,
1585     RT_INTVECTOR,
1586     RT_IMPORT,
1587     RT_INCLUDE,
1588     RT_PROCESS_UCA_RULES,
1589     RT_PROCESS_COLLATION,
1590     RT_PROCESS_TRANSLITERATOR,
1591     RT_PROCESS_DEPENDENCY,
1592     RT_RESERVED
1593 } EResourceType;
1594 
1595 static struct {
1596     const char *nameChars;   /* only used for debugging */
1597     const UChar *nameUChars;
1598     ParseResourceFunction *parseFunction;
1599 } gResourceTypes[] = {
1600     {"Unknown", NULL, NULL},
1601     {"string", k_type_string, parseString},
1602     {"binary", k_type_binary, parseBinary},
1603     {"table", k_type_table, parseTable},
1604     {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1605     {"integer", k_type_integer, parseInteger},
1606     {"array", k_type_array, parseArray},
1607     {"alias", k_type_alias, parseAlias},
1608     {"intvector", k_type_intvector, parseIntVector},
1609     {"import", k_type_import, parseImport},
1610     {"include", k_type_include, parseInclude},
1611     {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1612     {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1613     {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1614     {"process(dependency)", k_type_plugin_dependency, parseDependency},
1615     {"reserved", NULL, NULL}
1616 };
1617 
initParser(UBool omitBinaryCollation,UBool omitCollationRules)1618 void initParser(UBool omitBinaryCollation, UBool omitCollationRules)
1619 {
1620     uint32_t i;
1621 
1622     U_STRING_INIT(k_type_string,    "string",    6);
1623     U_STRING_INIT(k_type_binary,    "binary",    6);
1624     U_STRING_INIT(k_type_bin,       "bin",       3);
1625     U_STRING_INIT(k_type_table,     "table",     5);
1626     U_STRING_INIT(k_type_table_no_fallback,     "table(nofallback)",         17);
1627     U_STRING_INIT(k_type_int,       "int",       3);
1628     U_STRING_INIT(k_type_integer,   "integer",   7);
1629     U_STRING_INIT(k_type_array,     "array",     5);
1630     U_STRING_INIT(k_type_alias,     "alias",     5);
1631     U_STRING_INIT(k_type_intvector, "intvector", 9);
1632     U_STRING_INIT(k_type_import,    "import",    6);
1633     U_STRING_INIT(k_type_reserved,  "reserved",  8);
1634     U_STRING_INIT(k_type_include,   "include",   7);
1635 
1636     U_STRING_INIT(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
1637     U_STRING_INIT(k_type_plugin_collation,      "process(collation)",        18);
1638     U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)",   23);
1639     U_STRING_INIT(k_type_plugin_dependency,     "process(dependency)",       19);
1640 
1641     for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
1642     {
1643         ustr_init(&lookahead[i].value);
1644     }
1645     gMakeBinaryCollation = !omitBinaryCollation;
1646     gOmitCollationRules = omitCollationRules;
1647 }
1648 
isTable(enum EResourceType type)1649 static U_INLINE UBool isTable(enum EResourceType type) {
1650     return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
1651 }
1652 
1653 static enum EResourceType
parseResourceType(UErrorCode * status)1654 parseResourceType(UErrorCode *status)
1655 {
1656     struct UString        *tokenValue;
1657     struct UString        comment;
1658     enum   EResourceType  result = RT_UNKNOWN;
1659     uint32_t              line=0;
1660     ustr_init(&comment);
1661     expect(TOK_STRING, &tokenValue, &comment, &line, status);
1662 
1663     if (U_FAILURE(*status))
1664     {
1665         return RT_UNKNOWN;
1666     }
1667 
1668     *status = U_ZERO_ERROR;
1669 
1670     /* Search for normal types */
1671     result=RT_UNKNOWN;
1672     while (++result < RT_RESERVED) {
1673         if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1674             break;
1675         }
1676     }
1677     /* Now search for the aliases */
1678     if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1679         result = RT_INTEGER;
1680     }
1681     else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1682         result = RT_BINARY;
1683     }
1684     else if (result == RT_RESERVED) {
1685         char tokenBuffer[1024];
1686         u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1687         tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1688         *status = U_INVALID_FORMAT_ERROR;
1689         error(line, "unknown resource type '%s'", tokenBuffer);
1690     }
1691 
1692     return result;
1693 }
1694 
1695 /* parse a non-top-level resource */
1696 static struct SResource *
parseResource(char * tag,const struct UString * comment,UErrorCode * status)1697 parseResource(char *tag, const struct UString *comment, UErrorCode *status)
1698 {
1699     enum   ETokenType      token;
1700     enum   EResourceType  resType = RT_UNKNOWN;
1701     ParseResourceFunction *parseFunction = NULL;
1702     struct UString        *tokenValue;
1703     uint32_t                 startline;
1704     uint32_t                 line;
1705 
1706     token = getToken(&tokenValue, NULL, &startline, status);
1707 
1708     if(isVerbose()){
1709         printf(" resource %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1710     }
1711 
1712     /* name . [ ':' type ] '{' resource '}' */
1713     /* This function parses from the colon onwards.  If the colon is present, parse the
1714     type then try to parse a resource of that type.  If there is no explicit type,
1715     work it out using the lookahead tokens. */
1716     switch (token)
1717     {
1718     case TOK_EOF:
1719         *status = U_INVALID_FORMAT_ERROR;
1720         error(startline, "Unexpected EOF encountered");
1721         return NULL;
1722 
1723     case TOK_ERROR:
1724         *status = U_INVALID_FORMAT_ERROR;
1725         return NULL;
1726 
1727     case TOK_COLON:
1728         resType = parseResourceType(status);
1729         expect(TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1730 
1731         if (U_FAILURE(*status))
1732         {
1733             return NULL;
1734         }
1735 
1736         break;
1737 
1738     case TOK_OPEN_BRACE:
1739         break;
1740 
1741     default:
1742         *status = U_INVALID_FORMAT_ERROR;
1743         error(startline, "syntax error while reading a resource, expected '{' or ':'");
1744         return NULL;
1745     }
1746 
1747     if (resType == RT_UNKNOWN)
1748     {
1749         /* No explicit type, so try to work it out.  At this point, we've read the first '{'.
1750         We could have any of the following:
1751         { {         => array (nested)
1752         { :/}       => array
1753         { string ,  => string array
1754 
1755         { string {  => table
1756 
1757         { string :/{    => table
1758         { string }      => string
1759         */
1760 
1761         token = peekToken(0, NULL, &line, NULL,status);
1762 
1763         if (U_FAILURE(*status))
1764         {
1765             return NULL;
1766         }
1767 
1768         if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1769         {
1770             resType = RT_ARRAY;
1771         }
1772         else if (token == TOK_STRING)
1773         {
1774             token = peekToken(1, NULL, &line, NULL, status);
1775 
1776             if (U_FAILURE(*status))
1777             {
1778                 return NULL;
1779             }
1780 
1781             switch (token)
1782             {
1783             case TOK_COMMA:         resType = RT_ARRAY;  break;
1784             case TOK_OPEN_BRACE:    resType = RT_TABLE;  break;
1785             case TOK_CLOSE_BRACE:   resType = RT_STRING; break;
1786             case TOK_COLON:         resType = RT_TABLE;  break;
1787             default:
1788                 *status = U_INVALID_FORMAT_ERROR;
1789                 error(line, "Unexpected token after string, expected ',', '{' or '}'");
1790                 return NULL;
1791             }
1792         }
1793         else
1794         {
1795             *status = U_INVALID_FORMAT_ERROR;
1796             error(line, "Unexpected token after '{'");
1797             return NULL;
1798         }
1799 
1800         /* printf("Type guessed as %s\n", resourceNames[resType]); */
1801     } else if(resType == RT_TABLE_NO_FALLBACK) {
1802         *status = U_INVALID_FORMAT_ERROR;
1803         error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
1804         return NULL;
1805     }
1806 
1807     /* We should now know what we need to parse next, so call the appropriate parser
1808     function and return. */
1809     parseFunction = gResourceTypes[resType].parseFunction;
1810     if (parseFunction != NULL) {
1811         return parseFunction(tag, startline, comment, status);
1812     }
1813     else {
1814         *status = U_INTERNAL_PROGRAM_ERROR;
1815         error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
1816     }
1817 
1818     return NULL;
1819 }
1820 
1821 /* parse the top-level resource */
1822 struct SRBRoot *
parse(UCHARBUF * buf,const char * inputDir,const char * outputDir,UErrorCode * status)1823 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *status)
1824 {
1825     struct UString    *tokenValue;
1826     struct UString    comment;
1827     uint32_t           line;
1828     enum EResourceType bundleType;
1829     enum ETokenType    token;
1830 
1831     initLookahead(buf, status);
1832 
1833     inputdir       = inputDir;
1834     inputdirLength = (inputdir != NULL) ? (uint32_t)uprv_strlen(inputdir) : 0;
1835     outputdir       = outputDir;
1836     outputdirLength = (outputdir != NULL) ? (uint32_t)uprv_strlen(outputdir) : 0;
1837 
1838     ustr_init(&comment);
1839     expect(TOK_STRING, &tokenValue, &comment, NULL, status);
1840 
1841     bundle = bundle_open(&comment, status);
1842 
1843     if (bundle == NULL || U_FAILURE(*status))
1844     {
1845         return NULL;
1846     }
1847 
1848 
1849     bundle_setlocale(bundle, tokenValue->fChars, status);
1850     /* The following code is to make Empty bundle work no matter with :table specifer or not */
1851     token = getToken(NULL, NULL, &line, status);
1852     if(token==TOK_COLON) {
1853         *status=U_ZERO_ERROR;
1854         bundleType=parseResourceType(status);
1855 
1856         if(isTable(bundleType))
1857         {
1858             expect(TOK_OPEN_BRACE, NULL, NULL, &line, status);
1859         }
1860         else
1861         {
1862             *status=U_PARSE_ERROR;
1863             error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
1864         }
1865     }
1866     else
1867     {
1868         /* not a colon */
1869         if(token==TOK_OPEN_BRACE)
1870         {
1871             *status=U_ZERO_ERROR;
1872             bundleType=RT_TABLE;
1873         }
1874         else
1875         {
1876             /* neither colon nor open brace */
1877             *status=U_PARSE_ERROR;
1878             bundleType=RT_UNKNOWN;
1879             error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
1880         }
1881     }
1882 
1883     if (U_FAILURE(*status))
1884     {
1885         bundle_close(bundle, status);
1886         return NULL;
1887     }
1888 
1889     if(bundleType==RT_TABLE_NO_FALLBACK) {
1890         /*
1891          * Parse a top-level table with the table(nofallback) declaration.
1892          * This is the same as a regular table, but also sets the
1893          * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
1894          */
1895         bundle->noFallback=TRUE;
1896     }
1897     /* top-level tables need not handle special table names like "collations" */
1898     realParseTable(bundle->fRoot, NULL, line, status);
1899 
1900     if(dependencyArray!=NULL){
1901         table_add(bundle->fRoot, dependencyArray, 0, status);
1902         dependencyArray = NULL;
1903     }
1904     if (U_FAILURE(*status))
1905     {
1906         bundle_close(bundle, status);
1907         res_close(dependencyArray);
1908         return NULL;
1909     }
1910 
1911     if (getToken(NULL, NULL, &line, status) != TOK_EOF)
1912     {
1913         warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
1914         if(isStrict()){
1915             *status = U_INVALID_FORMAT_ERROR;
1916             return NULL;
1917         }
1918     }
1919 
1920     cleanupLookahead();
1921     ustr_deinit(&comment);
1922     return bundle;
1923 }
1924 
1925