1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1998-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File parse.c
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 02/25/00 weiv Overhaul to write udata
16 * 5/10/01 Ram removed ustdio dependency
17 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
18 *******************************************************************************
19 */
20
21 #include "ucol_imp.h"
22 #include "parse.h"
23 #include "errmsg.h"
24 #include "uhash.h"
25 #include "cmemory.h"
26 #include "cstring.h"
27 #include "uinvchar.h"
28 #include "read.h"
29 #include "ustr.h"
30 #include "reslist.h"
31 #include "rbt_pars.h"
32 #include "unicode/ustring.h"
33 #include "unicode/putil.h"
34 #include <stdio.h>
35
36 /* Number of tokens to read ahead of the current stream position */
37 #define MAX_LOOKAHEAD 3
38
39 #define CR 0x000D
40 #define LF 0x000A
41 #define SPACE 0x0020
42 #define TAB 0x0009
43 #define ESCAPE 0x005C
44 #define HASH 0x0023
45 #define QUOTE 0x0027
46 #define ZERO 0x0030
47 #define STARTCOMMAND 0x005B
48 #define ENDCOMMAND 0x005D
49 #define OPENSQBRACKET 0x005B
50 #define CLOSESQBRACKET 0x005D
51
52 typedef struct SResource *
53 ParseResourceFunction(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
54
55 struct Lookahead
56 {
57 enum ETokenType type;
58 struct UString value;
59 struct UString comment;
60 uint32_t line;
61 };
62
63 /* keep in sync with token defines in read.h */
64 const char *tokenNames[TOK_TOKEN_COUNT] =
65 {
66 "string", /* A string token, such as "MonthNames" */
67 "'{'", /* An opening brace character */
68 "'}'", /* A closing brace character */
69 "','", /* A comma */
70 "':'", /* A colon */
71
72 "<end of file>", /* End of the file has been reached successfully */
73 "<end of line>"
74 };
75
76 /* Just to store "TRUE" */
77 static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
78
79 static struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
80 static uint32_t lookaheadPosition;
81 static UCHARBUF *buffer;
82
83 static struct SRBRoot *bundle;
84 static const char *inputdir;
85 static uint32_t inputdirLength;
86 static const char *outputdir;
87 static uint32_t outputdirLength;
88
89 static UBool gMakeBinaryCollation = TRUE;
90 static UBool gOmitCollationRules = FALSE;
91
92 static struct SResource *parseResource(char *tag, const struct UString *comment, UErrorCode *status);
93
94 /* The nature of the lookahead buffer:
95 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
96 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
97 When getToken is called, the current pointer is moved to the next slot and the
98 old slot is filled with the next token from the reader by calling getNextToken.
99 The token values are stored in the slot, which means that token values don't
100 survive a call to getToken, ie.
101
102 UString *value;
103
104 getToken(&value, NULL, status);
105 getToken(NULL, NULL, status); bad - value is now a different string
106 */
107 static void
initLookahead(UCHARBUF * buf,UErrorCode * status)108 initLookahead(UCHARBUF *buf, UErrorCode *status)
109 {
110 static uint32_t initTypeStrings = 0;
111 uint32_t i;
112
113 if (!initTypeStrings)
114 {
115 initTypeStrings = 1;
116 }
117
118 lookaheadPosition = 0;
119 buffer = buf;
120
121 resetLineNumber();
122
123 for (i = 0; i < MAX_LOOKAHEAD; i++)
124 {
125 lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status);
126 if (U_FAILURE(*status))
127 {
128 return;
129 }
130 }
131
132 *status = U_ZERO_ERROR;
133 }
134
135 static void
cleanupLookahead()136 cleanupLookahead()
137 {
138 uint32_t i;
139 for (i = 0; i < MAX_LOOKAHEAD; i++)
140 {
141 ustr_deinit(&lookahead[i].value);
142 ustr_deinit(&lookahead[i].comment);
143 }
144
145 }
146
147 static enum ETokenType
getToken(struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)148 getToken(struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
149 {
150 enum ETokenType result;
151 uint32_t i;
152
153 result = lookahead[lookaheadPosition].type;
154
155 if (tokenValue != NULL)
156 {
157 *tokenValue = &lookahead[lookaheadPosition].value;
158 }
159
160 if (linenumber != NULL)
161 {
162 *linenumber = lookahead[lookaheadPosition].line;
163 }
164
165 if (comment != NULL)
166 {
167 ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status);
168 }
169
170 i = (lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
171 lookaheadPosition = (lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
172 ustr_setlen(&lookahead[i].comment, 0, status);
173 ustr_setlen(&lookahead[i].value, 0, status);
174 lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status);
175
176 /* printf("getToken, returning %s\n", tokenNames[result]); */
177
178 return result;
179 }
180
181 static enum ETokenType
peekToken(uint32_t lookaheadCount,struct UString ** tokenValue,uint32_t * linenumber,struct UString * comment,UErrorCode * status)182 peekToken(uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
183 {
184 uint32_t i = (lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
185
186 if (U_FAILURE(*status))
187 {
188 return TOK_ERROR;
189 }
190
191 if (lookaheadCount >= MAX_LOOKAHEAD)
192 {
193 *status = U_INTERNAL_PROGRAM_ERROR;
194 return TOK_ERROR;
195 }
196
197 if (tokenValue != NULL)
198 {
199 *tokenValue = &lookahead[i].value;
200 }
201
202 if (linenumber != NULL)
203 {
204 *linenumber = lookahead[i].line;
205 }
206
207 if(comment != NULL){
208 ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status);
209 }
210
211 return lookahead[i].type;
212 }
213
214 static void
expect(enum ETokenType expectedToken,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)215 expect(enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
216 {
217 uint32_t line;
218
219 enum ETokenType token = getToken(tokenValue, comment, &line, status);
220
221 if (linenumber != NULL)
222 {
223 *linenumber = line;
224 }
225
226 if (U_FAILURE(*status))
227 {
228 return;
229 }
230
231 if (token != expectedToken)
232 {
233 *status = U_INVALID_FORMAT_ERROR;
234 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
235 }
236 else
237 {
238 *status = U_ZERO_ERROR;
239 }
240 }
241
getInvariantString(uint32_t * line,struct UString * comment,UErrorCode * status)242 static char *getInvariantString(uint32_t *line, struct UString *comment, UErrorCode *status)
243 {
244 struct UString *tokenValue;
245 char *result;
246 uint32_t count;
247
248 expect(TOK_STRING, &tokenValue, comment, line, status);
249
250 if (U_FAILURE(*status))
251 {
252 return NULL;
253 }
254
255 count = u_strlen(tokenValue->fChars);
256 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
257 *status = U_INVALID_FORMAT_ERROR;
258 error(*line, "invariant characters required for table keys, binary data, etc.");
259 return NULL;
260 }
261
262 result = uprv_malloc(count+1);
263
264 if (result == NULL)
265 {
266 *status = U_MEMORY_ALLOCATION_ERROR;
267 return NULL;
268 }
269
270 u_UCharsToChars(tokenValue->fChars, result, count+1);
271 return result;
272 }
273
274 static struct SResource *
parseUCARules(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)275 parseUCARules(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
276 {
277 struct SResource *result = NULL;
278 struct UString *tokenValue;
279 FileStream *file = NULL;
280 char filename[256] = { '\0' };
281 char cs[128] = { '\0' };
282 uint32_t line;
283 int len=0;
284 UBool quoted = FALSE;
285 UCHARBUF *ucbuf=NULL;
286 UChar32 c = 0;
287 const char* cp = NULL;
288 UChar *pTarget = NULL;
289 UChar *target = NULL;
290 UChar *targetLimit = NULL;
291 int32_t size = 0;
292
293 expect(TOK_STRING, &tokenValue, NULL, &line, status);
294
295 if(isVerbose()){
296 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
297 }
298
299 if (U_FAILURE(*status))
300 {
301 return NULL;
302 }
303 /* make the filename including the directory */
304 if (inputdir != NULL)
305 {
306 uprv_strcat(filename, inputdir);
307
308 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
309 {
310 uprv_strcat(filename, U_FILE_SEP_STRING);
311 }
312 }
313
314 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
315
316 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
317
318 if (U_FAILURE(*status))
319 {
320 return NULL;
321 }
322 uprv_strcat(filename, cs);
323
324 if(gOmitCollationRules) {
325 return res_none();
326 }
327
328 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
329
330 if (U_FAILURE(*status)) {
331 error(line, "An error occured while opening the input file %s\n", filename);
332 return NULL;
333 }
334
335 /* We allocate more space than actually required
336 * since the actual size needed for storing UChars
337 * is not known in UTF-8 byte stream
338 */
339 size = ucbuf_size(ucbuf) + 1;
340 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
341 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
342 target = pTarget;
343 targetLimit = pTarget+size;
344
345 /* read the rules into the buffer */
346 while (target < targetLimit)
347 {
348 c = ucbuf_getc(ucbuf, status);
349 if(c == QUOTE) {
350 quoted = (UBool)!quoted;
351 }
352 /* weiv (06/26/2002): adding the following:
353 * - preserving spaces in commands [...]
354 * - # comments until the end of line
355 */
356 if (c == STARTCOMMAND && !quoted)
357 {
358 /* preserve commands
359 * closing bracket will be handled by the
360 * append at the end of the loop
361 */
362 while(c != ENDCOMMAND) {
363 U_APPEND_CHAR32(c, target,len);
364 c = ucbuf_getc(ucbuf, status);
365 }
366 }
367 else if (c == HASH && !quoted) {
368 /* skip comments */
369 while(c != CR && c != LF) {
370 c = ucbuf_getc(ucbuf, status);
371 }
372 continue;
373 }
374 else if (c == ESCAPE)
375 {
376 c = unescape(ucbuf, status);
377
378 if (c == U_ERR)
379 {
380 uprv_free(pTarget);
381 T_FileStream_close(file);
382 return NULL;
383 }
384 }
385 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
386 {
387 /* ignore spaces carriage returns
388 * and line feed unless in the form \uXXXX
389 */
390 continue;
391 }
392
393 /* Append UChar * after dissembling if c > 0xffff*/
394 if (c != U_EOF)
395 {
396 U_APPEND_CHAR32(c, target,len);
397 }
398 else
399 {
400 break;
401 }
402 }
403
404 /* terminate the string */
405 if(target < targetLimit){
406 *target = 0x0000;
407 }
408
409 result = string_open(bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
410
411
412 ucbuf_close(ucbuf);
413 uprv_free(pTarget);
414 T_FileStream_close(file);
415
416 return result;
417 }
418
419 static struct SResource *
parseTransliterator(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)420 parseTransliterator(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
421 {
422 struct SResource *result = NULL;
423 struct UString *tokenValue;
424 FileStream *file = NULL;
425 char filename[256] = { '\0' };
426 char cs[128] = { '\0' };
427 uint32_t line;
428 UCHARBUF *ucbuf=NULL;
429 const char* cp = NULL;
430 UChar *pTarget = NULL;
431 const UChar *pSource = NULL;
432 int32_t size = 0;
433
434 expect(TOK_STRING, &tokenValue, NULL, &line, status);
435
436 if(isVerbose()){
437 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
438 }
439
440 if (U_FAILURE(*status))
441 {
442 return NULL;
443 }
444 /* make the filename including the directory */
445 if (inputdir != NULL)
446 {
447 uprv_strcat(filename, inputdir);
448
449 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
450 {
451 uprv_strcat(filename, U_FILE_SEP_STRING);
452 }
453 }
454
455 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
456
457 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
458
459 if (U_FAILURE(*status))
460 {
461 return NULL;
462 }
463 uprv_strcat(filename, cs);
464
465
466 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
467
468 if (U_FAILURE(*status)) {
469 error(line, "An error occured while opening the input file %s\n", filename);
470 return NULL;
471 }
472
473 /* We allocate more space than actually required
474 * since the actual size needed for storing UChars
475 * is not known in UTF-8 byte stream
476 */
477 pSource = ucbuf_getBuffer(ucbuf, &size, status);
478 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
479 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
480
481 #if !UCONFIG_NO_TRANSLITERATION
482 size = utrans_stripRules(pSource, size, pTarget, status);
483 #else
484 size = 0;
485 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
486 #endif
487 result = string_open(bundle, tag, pTarget, size, NULL, status);
488
489 ucbuf_close(ucbuf);
490 uprv_free(pTarget);
491 T_FileStream_close(file);
492
493 return result;
494 }
495 static struct SResource* dependencyArray = NULL;
496
497 static struct SResource *
parseDependency(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)498 parseDependency(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
499 {
500 struct SResource *result = NULL;
501 struct SResource *elem = NULL;
502 struct UString *tokenValue;
503 uint32_t line;
504 char filename[256] = { '\0' };
505 char cs[128] = { '\0' };
506
507 expect(TOK_STRING, &tokenValue, NULL, &line, status);
508
509 if(isVerbose()){
510 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
511 }
512
513 if (U_FAILURE(*status))
514 {
515 return NULL;
516 }
517 /* make the filename including the directory */
518 if (outputdir != NULL)
519 {
520 uprv_strcat(filename, outputdir);
521
522 if (outputdir[outputdirLength - 1] != U_FILE_SEP_CHAR)
523 {
524 uprv_strcat(filename, U_FILE_SEP_STRING);
525 }
526 }
527
528 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
529
530 if (U_FAILURE(*status))
531 {
532 return NULL;
533 }
534 uprv_strcat(filename, cs);
535 if(!T_FileStream_file_exists(filename)){
536 if(isStrict()){
537 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
538 }else{
539 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
540 }
541 }
542 if(dependencyArray==NULL){
543 dependencyArray = array_open(bundle, "%%DEPENDENCY", NULL, status);
544 }
545 if(tag!=NULL){
546 result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
547 }
548 elem = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
549
550 array_add(dependencyArray, elem, status);
551
552 if (U_FAILURE(*status))
553 {
554 return NULL;
555 }
556 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
557 return result;
558 }
559 static struct SResource *
parseString(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)560 parseString(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
561 {
562 struct UString *tokenValue;
563 struct SResource *result = NULL;
564
565 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
566 {
567 return parseUCARules(tag, startline, status);
568 }*/
569 if(isVerbose()){
570 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
571 }
572 expect(TOK_STRING, &tokenValue, NULL, NULL, status);
573
574 if (U_SUCCESS(*status))
575 {
576 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
577 doesn't survive expect either) */
578
579 result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
580 if(U_SUCCESS(*status) && result) {
581 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
582
583 if (U_FAILURE(*status))
584 {
585 res_close(result);
586 return NULL;
587 }
588 }
589 }
590
591 return result;
592 }
593
594 static struct SResource *
parseAlias(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)595 parseAlias(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
596 {
597 struct UString *tokenValue;
598 struct SResource *result = NULL;
599
600 expect(TOK_STRING, &tokenValue, NULL, NULL, status);
601
602 if(isVerbose()){
603 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
604 }
605
606 if (U_SUCCESS(*status))
607 {
608 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
609 doesn't survive expect either) */
610
611 result = alias_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
612
613 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
614
615 if (U_FAILURE(*status))
616 {
617 res_close(result);
618 return NULL;
619 }
620 }
621
622 return result;
623 }
624
625 static struct SResource *
addCollation(struct SResource * result,uint32_t startline,UErrorCode * status)626 addCollation(struct SResource *result, uint32_t startline, UErrorCode *status)
627 {
628 struct SResource *member = NULL;
629 struct UString *tokenValue;
630 struct UString comment;
631 enum ETokenType token;
632 char subtag[1024];
633 UVersionInfo version;
634 UBool override = FALSE;
635 uint32_t line;
636 /* '{' . (name resource)* '}' */
637 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
638
639 for (;;)
640 {
641 ustr_init(&comment);
642 token = getToken(&tokenValue, &comment, &line, status);
643
644 if (token == TOK_CLOSE_BRACE)
645 {
646 return result;
647 }
648
649 if (token != TOK_STRING)
650 {
651 res_close(result);
652 *status = U_INVALID_FORMAT_ERROR;
653
654 if (token == TOK_EOF)
655 {
656 error(startline, "unterminated table");
657 }
658 else
659 {
660 error(line, "Unexpected token %s", tokenNames[token]);
661 }
662
663 return NULL;
664 }
665
666 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
667
668 if (U_FAILURE(*status))
669 {
670 res_close(result);
671 return NULL;
672 }
673
674 member = parseResource(subtag, NULL, status);
675
676 if (U_FAILURE(*status))
677 {
678 res_close(result);
679 return NULL;
680 }
681
682 if (uprv_strcmp(subtag, "Version") == 0)
683 {
684 char ver[40];
685 int32_t length = member->u.fString.fLength;
686
687 if (length >= (int32_t) sizeof(ver))
688 {
689 length = (int32_t) sizeof(ver) - 1;
690 }
691
692 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */
693 u_versionFromString(version, ver);
694
695 table_add(result, member, line, status);
696
697 }
698 else if (uprv_strcmp(subtag, "Override") == 0)
699 {
700 override = FALSE;
701
702 if (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0)
703 {
704 override = TRUE;
705 }
706 table_add(result, member, line, status);
707
708 }
709 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
710 {
711 /* discard duplicate %%CollationBin if any*/
712 }
713 else if (uprv_strcmp(subtag, "Sequence") == 0)
714 {
715 #if UCONFIG_NO_COLLATION
716 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION, see uconfig.h");
717 #else
718 /* in order to achieve smaller data files, we can direct genrb */
719 /* to omit collation rules */
720 if(!gOmitCollationRules) {
721 /* first we add the "Sequence", so that we always have rules */
722 table_add(result, member, line, status);
723 }
724 if(gMakeBinaryCollation) {
725 UErrorCode intStatus = U_ZERO_ERROR;
726
727 /* do the collation elements */
728 int32_t len = 0;
729 uint8_t *data = NULL;
730 UCollator *coll = NULL;
731 UParseError parseError;
732 /* add sequence */
733 /*table_add(result, member, line, status);*/
734
735 coll = ucol_openRules(member->u.fString.fChars, member->u.fString.fLength,
736 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, &intStatus);
737
738 if (U_SUCCESS(intStatus) && coll != NULL)
739 {
740 len = ucol_cloneBinary(coll, NULL, 0, &intStatus);
741 data = (uint8_t *)uprv_malloc(len);
742 intStatus = U_ZERO_ERROR;
743 len = ucol_cloneBinary(coll, data, len, &intStatus);
744 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/
745
746 /* tailoring rules version */
747 /* This is wrong! */
748 /*coll->dataInfo.dataVersion[1] = version[0];*/
749 /* Copy tailoring version. Builder version already */
750 /* set in ucol_openRules */
751 ((UCATableHeader *)data)->version[1] = version[0];
752 ((UCATableHeader *)data)->version[2] = version[1];
753 ((UCATableHeader *)data)->version[3] = version[2];
754
755 if (U_SUCCESS(intStatus) && data != NULL)
756 {
757 member = bin_open(bundle, "%%CollationBin", len, data, NULL, NULL, status);
758 /*table_add(bundle->fRoot, member, line, status);*/
759 table_add(result, member, line, status);
760 uprv_free(data);
761 }
762 else
763 {
764 warning(line, "could not obtain rules from collator");
765 if(isStrict()){
766 *status = U_INVALID_FORMAT_ERROR;
767 return NULL;
768 }
769 }
770
771 ucol_close(coll);
772 }
773 else
774 {
775 warning(line, "%%Collation could not be constructed from CollationElements - check context!");
776 if(isStrict()){
777 *status = intStatus;
778 return NULL;
779 }
780 }
781 } else {
782 if(isVerbose()) {
783 printf("Not building Collation binary\n");
784 }
785 }
786 #endif
787 }
788
789 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
790
791 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
792
793 if (U_FAILURE(*status))
794 {
795 res_close(result);
796 return NULL;
797 }
798 }
799
800 /* not reached */
801 /* A compiler warning will appear if all paths don't contain a return statement. */
802 /* *status = U_INTERNAL_PROGRAM_ERROR;
803 return NULL;*/
804 }
805
806 static struct SResource *
parseCollationElements(char * tag,uint32_t startline,UBool newCollation,UErrorCode * status)807 parseCollationElements(char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
808 {
809 struct SResource *result = NULL;
810 struct SResource *member = NULL;
811 struct SResource *collationRes = NULL;
812 struct UString *tokenValue;
813 struct UString comment;
814 enum ETokenType token;
815 char subtag[1024], typeKeyword[1024];
816 uint32_t line;
817
818 result = table_open(bundle, tag, NULL, status);
819
820 if (result == NULL || U_FAILURE(*status))
821 {
822 return NULL;
823 }
824 if(isVerbose()){
825 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
826 }
827 if(!newCollation) {
828 return addCollation(result, startline, status);
829 }
830 else {
831 for(;;) {
832 ustr_init(&comment);
833 token = getToken(&tokenValue, &comment, &line, status);
834
835 if (token == TOK_CLOSE_BRACE)
836 {
837 return result;
838 }
839
840 if (token != TOK_STRING)
841 {
842 res_close(result);
843 *status = U_INVALID_FORMAT_ERROR;
844
845 if (token == TOK_EOF)
846 {
847 error(startline, "unterminated table");
848 }
849 else
850 {
851 error(line, "Unexpected token %s", tokenNames[token]);
852 }
853
854 return NULL;
855 }
856
857 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
858
859 if (U_FAILURE(*status))
860 {
861 res_close(result);
862 return NULL;
863 }
864
865 if (uprv_strcmp(subtag, "default") == 0)
866 {
867 member = parseResource(subtag, NULL, status);
868
869 if (U_FAILURE(*status))
870 {
871 res_close(result);
872 return NULL;
873 }
874
875 table_add(result, member, line, status);
876 }
877 else
878 {
879 token = peekToken(0, &tokenValue, &line, &comment, status);
880 /* this probably needs to be refactored or recursively use the parser */
881 /* first we assume that our collation table won't have the explicit type */
882 /* then, we cannot handle aliases */
883 if(token == TOK_OPEN_BRACE) {
884 token = getToken(&tokenValue, &comment, &line, status);
885 collationRes = table_open(bundle, subtag, NULL, status);
886 table_add(result, addCollation(collationRes, startline, status), startline, status);
887 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
888 /* we could have a table too */
889 token = peekToken(1, &tokenValue, &line, &comment, status);
890 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
891 if(uprv_strcmp(typeKeyword, "alias") == 0) {
892 member = parseResource(subtag, NULL, status);
893
894 if (U_FAILURE(*status))
895 {
896 res_close(result);
897 return NULL;
898 }
899
900 table_add(result, member, line, status);
901 } else {
902 res_close(result);
903 *status = U_INVALID_FORMAT_ERROR;
904 return NULL;
905 }
906 } else {
907 res_close(result);
908 *status = U_INVALID_FORMAT_ERROR;
909 return NULL;
910 }
911 }
912
913 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
914
915 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
916
917 if (U_FAILURE(*status))
918 {
919 res_close(result);
920 return NULL;
921 }
922 }
923 }
924 }
925
926 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
927 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
928 static struct SResource *
realParseTable(struct SResource * table,char * tag,uint32_t startline,UErrorCode * status)929 realParseTable(struct SResource *table, char *tag, uint32_t startline, UErrorCode *status)
930 {
931 struct SResource *member = NULL;
932 struct UString *tokenValue=NULL;
933 struct UString comment;
934 enum ETokenType token;
935 char subtag[1024];
936 uint32_t line;
937 UBool readToken = FALSE;
938
939 /* '{' . (name resource)* '}' */
940 if(isVerbose()){
941 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
942 }
943 for (;;)
944 {
945 ustr_init(&comment);
946 token = getToken(&tokenValue, &comment, &line, status);
947
948 if (token == TOK_CLOSE_BRACE)
949 {
950 if (!readToken) {
951 warning(startline, "Encountered empty table");
952 }
953 return table;
954 }
955
956 if (token != TOK_STRING)
957 {
958 *status = U_INVALID_FORMAT_ERROR;
959
960 if (token == TOK_EOF)
961 {
962 error(startline, "unterminated table");
963 }
964 else
965 {
966 error(line, "unexpected token %s", tokenNames[token]);
967 }
968
969 return NULL;
970 }
971
972 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
973 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
974 } else {
975 *status = U_INVALID_FORMAT_ERROR;
976 error(line, "invariant characters required for table keys");
977 return NULL;
978 }
979
980 if (U_FAILURE(*status))
981 {
982 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
983 return NULL;
984 }
985
986 member = parseResource(subtag, &comment, status);
987
988 if (member == NULL || U_FAILURE(*status))
989 {
990 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
991 return NULL;
992 }
993
994 table_add(table, member, line, status);
995
996 if (U_FAILURE(*status))
997 {
998 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
999 return NULL;
1000 }
1001 readToken = TRUE;
1002 ustr_deinit(&comment);
1003 }
1004
1005 /* not reached */
1006 /* A compiler warning will appear if all paths don't contain a return statement. */
1007 /* *status = U_INTERNAL_PROGRAM_ERROR;
1008 return NULL;*/
1009 }
1010
1011 static struct SResource *
parseTable(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1012 parseTable(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1013 {
1014 struct SResource *result;
1015
1016 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1017 {
1018 return parseCollationElements(tag, startline, FALSE, status);
1019 }
1020 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1021 {
1022 return parseCollationElements(tag, startline, TRUE, status);
1023 }
1024 if(isVerbose()){
1025 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1026 }
1027
1028 result = table_open(bundle, tag, comment, status);
1029
1030 if (result == NULL || U_FAILURE(*status))
1031 {
1032 return NULL;
1033 }
1034
1035 return realParseTable(result, tag, startline, status);
1036 }
1037
1038 static struct SResource *
parseArray(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1039 parseArray(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1040 {
1041 struct SResource *result = NULL;
1042 struct SResource *member = NULL;
1043 struct UString *tokenValue;
1044 struct UString memberComments;
1045 enum ETokenType token;
1046 UBool readToken = FALSE;
1047
1048 result = array_open(bundle, tag, comment, status);
1049
1050 if (result == NULL || U_FAILURE(*status))
1051 {
1052 return NULL;
1053 }
1054 if(isVerbose()){
1055 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1056 }
1057
1058 ustr_init(&memberComments);
1059
1060 /* '{' . resource [','] '}' */
1061 for (;;)
1062 {
1063 /* reset length */
1064 ustr_setlen(&memberComments, 0, status);
1065
1066 /* check for end of array, but don't consume next token unless it really is the end */
1067 token = peekToken(0, &tokenValue, NULL, &memberComments, status);
1068
1069
1070 if (token == TOK_CLOSE_BRACE)
1071 {
1072 getToken(NULL, NULL, NULL, status);
1073 if (!readToken) {
1074 warning(startline, "Encountered empty array");
1075 }
1076 break;
1077 }
1078
1079 if (token == TOK_EOF)
1080 {
1081 res_close(result);
1082 *status = U_INVALID_FORMAT_ERROR;
1083 error(startline, "unterminated array");
1084 return NULL;
1085 }
1086
1087 /* string arrays are a special case */
1088 if (token == TOK_STRING)
1089 {
1090 getToken(&tokenValue, &memberComments, NULL, status);
1091 member = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1092 }
1093 else
1094 {
1095 member = parseResource(NULL, &memberComments, status);
1096 }
1097
1098 if (member == NULL || U_FAILURE(*status))
1099 {
1100 res_close(result);
1101 return NULL;
1102 }
1103
1104 array_add(result, member, status);
1105
1106 if (U_FAILURE(*status))
1107 {
1108 res_close(result);
1109 return NULL;
1110 }
1111
1112 /* eat optional comma if present */
1113 token = peekToken(0, NULL, NULL, NULL, status);
1114
1115 if (token == TOK_COMMA)
1116 {
1117 getToken(NULL, NULL, NULL, status);
1118 }
1119
1120 if (U_FAILURE(*status))
1121 {
1122 res_close(result);
1123 return NULL;
1124 }
1125 readToken = TRUE;
1126 }
1127
1128 ustr_deinit(&memberComments);
1129 return result;
1130 }
1131
1132 static struct SResource *
parseIntVector(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1133 parseIntVector(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1134 {
1135 struct SResource *result = NULL;
1136 enum ETokenType token;
1137 char *string;
1138 int32_t value;
1139 UBool readToken = FALSE;
1140 char *stopstring;
1141 uint32_t len;
1142 struct UString memberComments;
1143
1144 result = intvector_open(bundle, tag, comment, status);
1145
1146 if (result == NULL || U_FAILURE(*status))
1147 {
1148 return NULL;
1149 }
1150
1151 if(isVerbose()){
1152 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1153 }
1154 ustr_init(&memberComments);
1155 /* '{' . string [','] '}' */
1156 for (;;)
1157 {
1158 ustr_setlen(&memberComments, 0, status);
1159
1160 /* check for end of array, but don't consume next token unless it really is the end */
1161 token = peekToken(0, NULL, NULL,&memberComments, status);
1162
1163 if (token == TOK_CLOSE_BRACE)
1164 {
1165 /* it's the end, consume the close brace */
1166 getToken(NULL, NULL, NULL, status);
1167 if (!readToken) {
1168 warning(startline, "Encountered empty int vector");
1169 }
1170 ustr_deinit(&memberComments);
1171 return result;
1172 }
1173
1174 string = getInvariantString(NULL, NULL, status);
1175
1176 if (U_FAILURE(*status))
1177 {
1178 res_close(result);
1179 return NULL;
1180 }
1181
1182 /* For handling illegal char in the Intvector */
1183 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1184 len=(uint32_t)(stopstring-string);
1185
1186 if(len==uprv_strlen(string))
1187 {
1188 intvector_add(result, value, status);
1189 uprv_free(string);
1190 token = peekToken(0, NULL, NULL, NULL, status);
1191 }
1192 else
1193 {
1194 uprv_free(string);
1195 *status=U_INVALID_CHAR_FOUND;
1196 }
1197
1198 if (U_FAILURE(*status))
1199 {
1200 res_close(result);
1201 return NULL;
1202 }
1203
1204 /* the comma is optional (even though it is required to prevent the reader from concatenating
1205 consecutive entries) so that a missing comma on the last entry isn't an error */
1206 if (token == TOK_COMMA)
1207 {
1208 getToken(NULL, NULL, NULL, status);
1209 }
1210 readToken = TRUE;
1211 }
1212
1213 /* not reached */
1214 /* A compiler warning will appear if all paths don't contain a return statement. */
1215 /* intvector_close(result, status);
1216 *status = U_INTERNAL_PROGRAM_ERROR;
1217 return NULL;*/
1218 }
1219
1220 static struct SResource *
parseBinary(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1221 parseBinary(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1222 {
1223 struct SResource *result = NULL;
1224 uint8_t *value;
1225 char *string;
1226 char toConv[3] = {'\0', '\0', '\0'};
1227 uint32_t count;
1228 uint32_t i;
1229 uint32_t line;
1230 char *stopstring;
1231 uint32_t len;
1232
1233 string = getInvariantString(&line, NULL, status);
1234
1235 if (string == NULL || U_FAILURE(*status))
1236 {
1237 return NULL;
1238 }
1239
1240 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1241
1242 if (U_FAILURE(*status))
1243 {
1244 uprv_free(string);
1245 return NULL;
1246 }
1247
1248 if(isVerbose()){
1249 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1250 }
1251
1252 count = (uint32_t)uprv_strlen(string);
1253 if (count > 0){
1254 if((count % 2)==0){
1255 value = uprv_malloc(sizeof(uint8_t) * count);
1256
1257 if (value == NULL)
1258 {
1259 uprv_free(string);
1260 *status = U_MEMORY_ALLOCATION_ERROR;
1261 return NULL;
1262 }
1263
1264 for (i = 0; i < count; i += 2)
1265 {
1266 toConv[0] = string[i];
1267 toConv[1] = string[i + 1];
1268
1269 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1270 len=(uint32_t)(stopstring-toConv);
1271
1272 if(len!=uprv_strlen(toConv))
1273 {
1274 uprv_free(string);
1275 *status=U_INVALID_CHAR_FOUND;
1276 return NULL;
1277 }
1278 }
1279
1280 result = bin_open(bundle, tag, (i >> 1), value,NULL, comment, status);
1281
1282 uprv_free(value);
1283 }
1284 else
1285 {
1286 *status = U_INVALID_CHAR_FOUND;
1287 uprv_free(string);
1288 error(line, "Encountered invalid binary string");
1289 return NULL;
1290 }
1291 }
1292 else
1293 {
1294 result = bin_open(bundle, tag, 0, NULL, "",comment,status);
1295 warning(startline, "Encountered empty binary tag");
1296 }
1297 uprv_free(string);
1298
1299 return result;
1300 }
1301
1302 static struct SResource *
parseInteger(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1303 parseInteger(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1304 {
1305 struct SResource *result = NULL;
1306 int32_t value;
1307 char *string;
1308 char *stopstring;
1309 uint32_t len;
1310
1311 string = getInvariantString(NULL, NULL, status);
1312
1313 if (string == NULL || U_FAILURE(*status))
1314 {
1315 return NULL;
1316 }
1317
1318 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1319
1320 if (U_FAILURE(*status))
1321 {
1322 uprv_free(string);
1323 return NULL;
1324 }
1325
1326 if(isVerbose()){
1327 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1328 }
1329
1330 if (uprv_strlen(string) <= 0)
1331 {
1332 warning(startline, "Encountered empty integer. Default value is 0.");
1333 }
1334
1335 /* Allow integer support for hexdecimal, octal digit and decimal*/
1336 /* and handle illegal char in the integer*/
1337 value = uprv_strtoul(string, &stopstring, 0);
1338 len=(uint32_t)(stopstring-string);
1339 if(len==uprv_strlen(string))
1340 {
1341 result = int_open(bundle, tag, value, comment, status);
1342 }
1343 else
1344 {
1345 *status=U_INVALID_CHAR_FOUND;
1346 }
1347 uprv_free(string);
1348
1349 return result;
1350 }
1351
1352 static struct SResource *
parseImport(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1353 parseImport(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1354 {
1355 struct SResource *result;
1356 FileStream *file;
1357 int32_t len;
1358 uint8_t *data;
1359 char *filename;
1360 uint32_t line;
1361 char *fullname = NULL;
1362 int32_t numRead = 0;
1363 filename = getInvariantString(&line, NULL, status);
1364
1365 if (U_FAILURE(*status))
1366 {
1367 return NULL;
1368 }
1369
1370 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1371
1372 if (U_FAILURE(*status))
1373 {
1374 uprv_free(filename);
1375 return NULL;
1376 }
1377
1378 if(isVerbose()){
1379 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1380 }
1381
1382 /* Open the input file for reading */
1383 if (inputdir == NULL)
1384 {
1385 #if 1
1386 /*
1387 * Always save file file name, even if there's
1388 * no input directory specified. MIGHT BREAK SOMETHING
1389 */
1390 int32_t filenameLength = uprv_strlen(filename);
1391
1392 fullname = (char *) uprv_malloc(filenameLength + 1);
1393 uprv_strcpy(fullname, filename);
1394 #endif
1395
1396 file = T_FileStream_open(filename, "rb");
1397 }
1398 else
1399 {
1400
1401 int32_t count = (int32_t)uprv_strlen(filename);
1402
1403 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
1404 {
1405 fullname = (char *) uprv_malloc(inputdirLength + count + 2);
1406
1407 /* test for NULL */
1408 if(fullname == NULL)
1409 {
1410 *status = U_MEMORY_ALLOCATION_ERROR;
1411 return NULL;
1412 }
1413
1414 uprv_strcpy(fullname, inputdir);
1415
1416 fullname[inputdirLength] = U_FILE_SEP_CHAR;
1417 fullname[inputdirLength + 1] = '\0';
1418
1419 uprv_strcat(fullname, filename);
1420 }
1421 else
1422 {
1423 fullname = (char *) uprv_malloc(inputdirLength + count + 1);
1424
1425 /* test for NULL */
1426 if(fullname == NULL)
1427 {
1428 *status = U_MEMORY_ALLOCATION_ERROR;
1429 return NULL;
1430 }
1431
1432 uprv_strcpy(fullname, inputdir);
1433 uprv_strcat(fullname, filename);
1434 }
1435
1436 file = T_FileStream_open(fullname, "rb");
1437
1438 }
1439
1440 if (file == NULL)
1441 {
1442 error(line, "couldn't open input file %s", filename);
1443 *status = U_FILE_ACCESS_ERROR;
1444 return NULL;
1445 }
1446
1447 len = T_FileStream_size(file);
1448 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t));
1449 /* test for NULL */
1450 if(data == NULL)
1451 {
1452 *status = U_MEMORY_ALLOCATION_ERROR;
1453 T_FileStream_close (file);
1454 return NULL;
1455 }
1456
1457 numRead = T_FileStream_read (file, data, len);
1458 T_FileStream_close (file);
1459
1460 result = bin_open(bundle, tag, len, data, fullname, comment, status);
1461
1462 uprv_free(data);
1463 uprv_free(filename);
1464 uprv_free(fullname);
1465
1466 return result;
1467 }
1468
1469 static struct SResource *
parseInclude(char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1470 parseInclude(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1471 {
1472 struct SResource *result;
1473 int32_t len=0;
1474 char *filename;
1475 uint32_t line;
1476 UChar *pTarget = NULL;
1477
1478 UCHARBUF *ucbuf;
1479 char *fullname = NULL;
1480 int32_t count = 0;
1481 const char* cp = NULL;
1482 const UChar* uBuffer = NULL;
1483
1484 filename = getInvariantString(&line, NULL, status);
1485 count = (int32_t)uprv_strlen(filename);
1486
1487 if (U_FAILURE(*status))
1488 {
1489 return NULL;
1490 }
1491
1492 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1493
1494 if (U_FAILURE(*status))
1495 {
1496 uprv_free(filename);
1497 return NULL;
1498 }
1499
1500 if(isVerbose()){
1501 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1502 }
1503
1504 fullname = (char *) uprv_malloc(inputdirLength + count + 2);
1505 /* test for NULL */
1506 if(fullname == NULL)
1507 {
1508 *status = U_MEMORY_ALLOCATION_ERROR;
1509 uprv_free(filename);
1510 return NULL;
1511 }
1512
1513 if(inputdir!=NULL){
1514 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR)
1515 {
1516
1517 uprv_strcpy(fullname, inputdir);
1518
1519 fullname[inputdirLength] = U_FILE_SEP_CHAR;
1520 fullname[inputdirLength + 1] = '\0';
1521
1522 uprv_strcat(fullname, filename);
1523 }
1524 else
1525 {
1526 uprv_strcpy(fullname, inputdir);
1527 uprv_strcat(fullname, filename);
1528 }
1529 }else{
1530 uprv_strcpy(fullname,filename);
1531 }
1532
1533 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1534
1535 if (U_FAILURE(*status)) {
1536 error(line, "couldn't open input file %s\n", filename);
1537 return NULL;
1538 }
1539
1540 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1541 result = string_open(bundle, tag, uBuffer, len, comment, status);
1542
1543 uprv_free(pTarget);
1544
1545 uprv_free(filename);
1546 uprv_free(fullname);
1547
1548 return result;
1549 }
1550
1551
1552
1553
1554
1555 U_STRING_DECL(k_type_string, "string", 6);
1556 U_STRING_DECL(k_type_binary, "binary", 6);
1557 U_STRING_DECL(k_type_bin, "bin", 3);
1558 U_STRING_DECL(k_type_table, "table", 5);
1559 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1560 U_STRING_DECL(k_type_int, "int", 3);
1561 U_STRING_DECL(k_type_integer, "integer", 7);
1562 U_STRING_DECL(k_type_array, "array", 5);
1563 U_STRING_DECL(k_type_alias, "alias", 5);
1564 U_STRING_DECL(k_type_intvector, "intvector", 9);
1565 U_STRING_DECL(k_type_import, "import", 6);
1566 U_STRING_DECL(k_type_include, "include", 7);
1567 U_STRING_DECL(k_type_reserved, "reserved", 8);
1568
1569 /* Various non-standard processing plugins that create one or more special resources. */
1570 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1571 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1572 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1573 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1574
1575 typedef enum EResourceType
1576 {
1577 RT_UNKNOWN,
1578 RT_STRING,
1579 RT_BINARY,
1580 RT_TABLE,
1581 RT_TABLE_NO_FALLBACK,
1582 RT_INTEGER,
1583 RT_ARRAY,
1584 RT_ALIAS,
1585 RT_INTVECTOR,
1586 RT_IMPORT,
1587 RT_INCLUDE,
1588 RT_PROCESS_UCA_RULES,
1589 RT_PROCESS_COLLATION,
1590 RT_PROCESS_TRANSLITERATOR,
1591 RT_PROCESS_DEPENDENCY,
1592 RT_RESERVED
1593 } EResourceType;
1594
1595 static struct {
1596 const char *nameChars; /* only used for debugging */
1597 const UChar *nameUChars;
1598 ParseResourceFunction *parseFunction;
1599 } gResourceTypes[] = {
1600 {"Unknown", NULL, NULL},
1601 {"string", k_type_string, parseString},
1602 {"binary", k_type_binary, parseBinary},
1603 {"table", k_type_table, parseTable},
1604 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1605 {"integer", k_type_integer, parseInteger},
1606 {"array", k_type_array, parseArray},
1607 {"alias", k_type_alias, parseAlias},
1608 {"intvector", k_type_intvector, parseIntVector},
1609 {"import", k_type_import, parseImport},
1610 {"include", k_type_include, parseInclude},
1611 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1612 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1613 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1614 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1615 {"reserved", NULL, NULL}
1616 };
1617
initParser(UBool omitBinaryCollation,UBool omitCollationRules)1618 void initParser(UBool omitBinaryCollation, UBool omitCollationRules)
1619 {
1620 uint32_t i;
1621
1622 U_STRING_INIT(k_type_string, "string", 6);
1623 U_STRING_INIT(k_type_binary, "binary", 6);
1624 U_STRING_INIT(k_type_bin, "bin", 3);
1625 U_STRING_INIT(k_type_table, "table", 5);
1626 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1627 U_STRING_INIT(k_type_int, "int", 3);
1628 U_STRING_INIT(k_type_integer, "integer", 7);
1629 U_STRING_INIT(k_type_array, "array", 5);
1630 U_STRING_INIT(k_type_alias, "alias", 5);
1631 U_STRING_INIT(k_type_intvector, "intvector", 9);
1632 U_STRING_INIT(k_type_import, "import", 6);
1633 U_STRING_INIT(k_type_reserved, "reserved", 8);
1634 U_STRING_INIT(k_type_include, "include", 7);
1635
1636 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1637 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1638 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1639 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
1640
1641 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
1642 {
1643 ustr_init(&lookahead[i].value);
1644 }
1645 gMakeBinaryCollation = !omitBinaryCollation;
1646 gOmitCollationRules = omitCollationRules;
1647 }
1648
isTable(enum EResourceType type)1649 static U_INLINE UBool isTable(enum EResourceType type) {
1650 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK);
1651 }
1652
1653 static enum EResourceType
parseResourceType(UErrorCode * status)1654 parseResourceType(UErrorCode *status)
1655 {
1656 struct UString *tokenValue;
1657 struct UString comment;
1658 enum EResourceType result = RT_UNKNOWN;
1659 uint32_t line=0;
1660 ustr_init(&comment);
1661 expect(TOK_STRING, &tokenValue, &comment, &line, status);
1662
1663 if (U_FAILURE(*status))
1664 {
1665 return RT_UNKNOWN;
1666 }
1667
1668 *status = U_ZERO_ERROR;
1669
1670 /* Search for normal types */
1671 result=RT_UNKNOWN;
1672 while (++result < RT_RESERVED) {
1673 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1674 break;
1675 }
1676 }
1677 /* Now search for the aliases */
1678 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1679 result = RT_INTEGER;
1680 }
1681 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1682 result = RT_BINARY;
1683 }
1684 else if (result == RT_RESERVED) {
1685 char tokenBuffer[1024];
1686 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1687 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1688 *status = U_INVALID_FORMAT_ERROR;
1689 error(line, "unknown resource type '%s'", tokenBuffer);
1690 }
1691
1692 return result;
1693 }
1694
1695 /* parse a non-top-level resource */
1696 static struct SResource *
parseResource(char * tag,const struct UString * comment,UErrorCode * status)1697 parseResource(char *tag, const struct UString *comment, UErrorCode *status)
1698 {
1699 enum ETokenType token;
1700 enum EResourceType resType = RT_UNKNOWN;
1701 ParseResourceFunction *parseFunction = NULL;
1702 struct UString *tokenValue;
1703 uint32_t startline;
1704 uint32_t line;
1705
1706 token = getToken(&tokenValue, NULL, &startline, status);
1707
1708 if(isVerbose()){
1709 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1710 }
1711
1712 /* name . [ ':' type ] '{' resource '}' */
1713 /* This function parses from the colon onwards. If the colon is present, parse the
1714 type then try to parse a resource of that type. If there is no explicit type,
1715 work it out using the lookahead tokens. */
1716 switch (token)
1717 {
1718 case TOK_EOF:
1719 *status = U_INVALID_FORMAT_ERROR;
1720 error(startline, "Unexpected EOF encountered");
1721 return NULL;
1722
1723 case TOK_ERROR:
1724 *status = U_INVALID_FORMAT_ERROR;
1725 return NULL;
1726
1727 case TOK_COLON:
1728 resType = parseResourceType(status);
1729 expect(TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1730
1731 if (U_FAILURE(*status))
1732 {
1733 return NULL;
1734 }
1735
1736 break;
1737
1738 case TOK_OPEN_BRACE:
1739 break;
1740
1741 default:
1742 *status = U_INVALID_FORMAT_ERROR;
1743 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1744 return NULL;
1745 }
1746
1747 if (resType == RT_UNKNOWN)
1748 {
1749 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1750 We could have any of the following:
1751 { { => array (nested)
1752 { :/} => array
1753 { string , => string array
1754
1755 { string { => table
1756
1757 { string :/{ => table
1758 { string } => string
1759 */
1760
1761 token = peekToken(0, NULL, &line, NULL,status);
1762
1763 if (U_FAILURE(*status))
1764 {
1765 return NULL;
1766 }
1767
1768 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1769 {
1770 resType = RT_ARRAY;
1771 }
1772 else if (token == TOK_STRING)
1773 {
1774 token = peekToken(1, NULL, &line, NULL, status);
1775
1776 if (U_FAILURE(*status))
1777 {
1778 return NULL;
1779 }
1780
1781 switch (token)
1782 {
1783 case TOK_COMMA: resType = RT_ARRAY; break;
1784 case TOK_OPEN_BRACE: resType = RT_TABLE; break;
1785 case TOK_CLOSE_BRACE: resType = RT_STRING; break;
1786 case TOK_COLON: resType = RT_TABLE; break;
1787 default:
1788 *status = U_INVALID_FORMAT_ERROR;
1789 error(line, "Unexpected token after string, expected ',', '{' or '}'");
1790 return NULL;
1791 }
1792 }
1793 else
1794 {
1795 *status = U_INVALID_FORMAT_ERROR;
1796 error(line, "Unexpected token after '{'");
1797 return NULL;
1798 }
1799
1800 /* printf("Type guessed as %s\n", resourceNames[resType]); */
1801 } else if(resType == RT_TABLE_NO_FALLBACK) {
1802 *status = U_INVALID_FORMAT_ERROR;
1803 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
1804 return NULL;
1805 }
1806
1807 /* We should now know what we need to parse next, so call the appropriate parser
1808 function and return. */
1809 parseFunction = gResourceTypes[resType].parseFunction;
1810 if (parseFunction != NULL) {
1811 return parseFunction(tag, startline, comment, status);
1812 }
1813 else {
1814 *status = U_INTERNAL_PROGRAM_ERROR;
1815 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
1816 }
1817
1818 return NULL;
1819 }
1820
1821 /* parse the top-level resource */
1822 struct SRBRoot *
parse(UCHARBUF * buf,const char * inputDir,const char * outputDir,UErrorCode * status)1823 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *status)
1824 {
1825 struct UString *tokenValue;
1826 struct UString comment;
1827 uint32_t line;
1828 enum EResourceType bundleType;
1829 enum ETokenType token;
1830
1831 initLookahead(buf, status);
1832
1833 inputdir = inputDir;
1834 inputdirLength = (inputdir != NULL) ? (uint32_t)uprv_strlen(inputdir) : 0;
1835 outputdir = outputDir;
1836 outputdirLength = (outputdir != NULL) ? (uint32_t)uprv_strlen(outputdir) : 0;
1837
1838 ustr_init(&comment);
1839 expect(TOK_STRING, &tokenValue, &comment, NULL, status);
1840
1841 bundle = bundle_open(&comment, status);
1842
1843 if (bundle == NULL || U_FAILURE(*status))
1844 {
1845 return NULL;
1846 }
1847
1848
1849 bundle_setlocale(bundle, tokenValue->fChars, status);
1850 /* The following code is to make Empty bundle work no matter with :table specifer or not */
1851 token = getToken(NULL, NULL, &line, status);
1852 if(token==TOK_COLON) {
1853 *status=U_ZERO_ERROR;
1854 bundleType=parseResourceType(status);
1855
1856 if(isTable(bundleType))
1857 {
1858 expect(TOK_OPEN_BRACE, NULL, NULL, &line, status);
1859 }
1860 else
1861 {
1862 *status=U_PARSE_ERROR;
1863 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
1864 }
1865 }
1866 else
1867 {
1868 /* not a colon */
1869 if(token==TOK_OPEN_BRACE)
1870 {
1871 *status=U_ZERO_ERROR;
1872 bundleType=RT_TABLE;
1873 }
1874 else
1875 {
1876 /* neither colon nor open brace */
1877 *status=U_PARSE_ERROR;
1878 bundleType=RT_UNKNOWN;
1879 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
1880 }
1881 }
1882
1883 if (U_FAILURE(*status))
1884 {
1885 bundle_close(bundle, status);
1886 return NULL;
1887 }
1888
1889 if(bundleType==RT_TABLE_NO_FALLBACK) {
1890 /*
1891 * Parse a top-level table with the table(nofallback) declaration.
1892 * This is the same as a regular table, but also sets the
1893 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
1894 */
1895 bundle->noFallback=TRUE;
1896 }
1897 /* top-level tables need not handle special table names like "collations" */
1898 realParseTable(bundle->fRoot, NULL, line, status);
1899
1900 if(dependencyArray!=NULL){
1901 table_add(bundle->fRoot, dependencyArray, 0, status);
1902 dependencyArray = NULL;
1903 }
1904 if (U_FAILURE(*status))
1905 {
1906 bundle_close(bundle, status);
1907 res_close(dependencyArray);
1908 return NULL;
1909 }
1910
1911 if (getToken(NULL, NULL, &line, status) != TOK_EOF)
1912 {
1913 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
1914 if(isStrict()){
1915 *status = U_INVALID_FORMAT_ERROR;
1916 return NULL;
1917 }
1918 }
1919
1920 cleanupLookahead();
1921 ustr_deinit(&comment);
1922 return bundle;
1923 }
1924
1925