1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 1998-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File parse.cpp
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 05/26/99 stephen Creation.
17 * 02/25/00 weiv Overhaul to write udata
18 * 5/10/01 Ram removed ustdio dependency
19 * 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
20 *******************************************************************************
21 */
22
23 // Safer use of UnicodeString.
24 #ifndef UNISTR_FROM_CHAR_EXPLICIT
25 # define UNISTR_FROM_CHAR_EXPLICIT explicit
26 #endif
27
28 // Less important, but still a good idea.
29 #ifndef UNISTR_FROM_STRING_EXPLICIT
30 # define UNISTR_FROM_STRING_EXPLICIT explicit
31 #endif
32
33 #include <assert.h>
34 #include "parse.h"
35 #include "errmsg.h"
36 #include "uhash.h"
37 #include "cmemory.h"
38 #include "cstring.h"
39 #include "uinvchar.h"
40 #include "read.h"
41 #include "ustr.h"
42 #include "reslist.h"
43 #include "rbt_pars.h"
44 #include "genrb.h"
45 #include "unicode/stringpiece.h"
46 #include "unicode/unistr.h"
47 #include "unicode/ustring.h"
48 #include "unicode/uscript.h"
49 #include "unicode/utf16.h"
50 #include "unicode/putil.h"
51 #include "charstr.h"
52 #include "collationbuilder.h"
53 #include "collationdata.h"
54 #include "collationdatareader.h"
55 #include "collationdatawriter.h"
56 #include "collationfastlatinbuilder.h"
57 #include "collationinfo.h"
58 #include "collationroot.h"
59 #include "collationruleparser.h"
60 #include "collationtailoring.h"
61 #include <stdio.h>
62
63 /* Number of tokens to read ahead of the current stream position */
64 #define MAX_LOOKAHEAD 3
65
66 #define CR 0x000D
67 #define LF 0x000A
68 #define SPACE 0x0020
69 #define TAB 0x0009
70 #define ESCAPE 0x005C
71 #define HASH 0x0023
72 #define QUOTE 0x0027
73 #define ZERO 0x0030
74 #define STARTCOMMAND 0x005B
75 #define ENDCOMMAND 0x005D
76 #define OPENSQBRACKET 0x005B
77 #define CLOSESQBRACKET 0x005D
78
79 using icu::CharString;
80 using icu::LocalMemory;
81 using icu::LocalPointer;
82 using icu::LocalUCHARBUFPointer;
83 using icu::StringPiece;
84 using icu::UnicodeString;
85
86 struct Lookahead
87 {
88 enum ETokenType type;
89 struct UString value;
90 struct UString comment;
91 uint32_t line;
92 };
93
94 /* keep in sync with token defines in read.h */
95 const char *tokenNames[TOK_TOKEN_COUNT] =
96 {
97 "string", /* A string token, such as "MonthNames" */
98 "'{'", /* An opening brace character */
99 "'}'", /* A closing brace character */
100 "','", /* A comma */
101 "':'", /* A colon */
102
103 "<end of file>", /* End of the file has been reached successfully */
104 "<end of line>"
105 };
106
107 /* Just to store "TRUE" */
108 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
109
110 typedef struct {
111 struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
112 uint32_t lookaheadPosition;
113 UCHARBUF *buffer;
114 struct SRBRoot *bundle;
115 const char *inputdir;
116 uint32_t inputdirLength;
117 const char *outputdir;
118 uint32_t outputdirLength;
119 const char *filename;
120 UBool makeBinaryCollation;
121 UBool omitCollationRules;
122 } ParseState;
123
124 typedef struct SResource *
125 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
126
127 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
128
129 /* The nature of the lookahead buffer:
130 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
131 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
132 When getToken is called, the current pointer is moved to the next slot and the
133 old slot is filled with the next token from the reader by calling getNextToken.
134 The token values are stored in the slot, which means that token values don't
135 survive a call to getToken, ie.
136
137 UString *value;
138
139 getToken(&value, NULL, status);
140 getToken(NULL, NULL, status); bad - value is now a different string
141 */
142 static void
initLookahead(ParseState * state,UCHARBUF * buf,UErrorCode * status)143 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
144 {
145 static uint32_t initTypeStrings = 0;
146 uint32_t i;
147
148 if (!initTypeStrings)
149 {
150 initTypeStrings = 1;
151 }
152
153 state->lookaheadPosition = 0;
154 state->buffer = buf;
155
156 resetLineNumber();
157
158 for (i = 0; i < MAX_LOOKAHEAD; i++)
159 {
160 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
161 if (U_FAILURE(*status))
162 {
163 return;
164 }
165 }
166
167 *status = U_ZERO_ERROR;
168 }
169
170 static void
cleanupLookahead(ParseState * state)171 cleanupLookahead(ParseState* state)
172 {
173 uint32_t i;
174 for (i = 0; i <= MAX_LOOKAHEAD; i++)
175 {
176 ustr_deinit(&state->lookahead[i].value);
177 ustr_deinit(&state->lookahead[i].comment);
178 }
179
180 }
181
182 static enum ETokenType
getToken(ParseState * state,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)183 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
184 {
185 enum ETokenType result;
186 uint32_t i;
187
188 result = state->lookahead[state->lookaheadPosition].type;
189
190 if (tokenValue != NULL)
191 {
192 *tokenValue = &state->lookahead[state->lookaheadPosition].value;
193 }
194
195 if (linenumber != NULL)
196 {
197 *linenumber = state->lookahead[state->lookaheadPosition].line;
198 }
199
200 if (comment != NULL)
201 {
202 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
203 }
204
205 i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
206 state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
207 ustr_setlen(&state->lookahead[i].comment, 0, status);
208 ustr_setlen(&state->lookahead[i].value, 0, status);
209 state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
210
211 /* printf("getToken, returning %s\n", tokenNames[result]); */
212
213 return result;
214 }
215
216 static enum ETokenType
peekToken(ParseState * state,uint32_t lookaheadCount,struct UString ** tokenValue,uint32_t * linenumber,struct UString * comment,UErrorCode * status)217 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
218 {
219 uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
220
221 if (U_FAILURE(*status))
222 {
223 return TOK_ERROR;
224 }
225
226 if (lookaheadCount >= MAX_LOOKAHEAD)
227 {
228 *status = U_INTERNAL_PROGRAM_ERROR;
229 return TOK_ERROR;
230 }
231
232 if (tokenValue != NULL)
233 {
234 *tokenValue = &state->lookahead[i].value;
235 }
236
237 if (linenumber != NULL)
238 {
239 *linenumber = state->lookahead[i].line;
240 }
241
242 if(comment != NULL){
243 ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
244 }
245
246 return state->lookahead[i].type;
247 }
248
249 static void
expect(ParseState * state,enum ETokenType expectedToken,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)250 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
251 {
252 uint32_t line;
253
254 enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
255
256 if (linenumber != NULL)
257 {
258 *linenumber = line;
259 }
260
261 if (U_FAILURE(*status))
262 {
263 return;
264 }
265
266 if (token != expectedToken)
267 {
268 *status = U_INVALID_FORMAT_ERROR;
269 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
270 }
271 else
272 {
273 *status = U_ZERO_ERROR;
274 }
275 }
276
getInvariantString(ParseState * state,uint32_t * line,struct UString * comment,UErrorCode * status)277 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
278 {
279 struct UString *tokenValue;
280 char *result;
281 uint32_t count;
282
283 expect(state, TOK_STRING, &tokenValue, comment, line, status);
284
285 if (U_FAILURE(*status))
286 {
287 return NULL;
288 }
289
290 count = u_strlen(tokenValue->fChars);
291 if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
292 *status = U_INVALID_FORMAT_ERROR;
293 error(*line, "invariant characters required for table keys, binary data, etc.");
294 return NULL;
295 }
296
297 result = static_cast<char *>(uprv_malloc(count+1));
298
299 if (result == NULL)
300 {
301 *status = U_MEMORY_ALLOCATION_ERROR;
302 return NULL;
303 }
304
305 u_UCharsToChars(tokenValue->fChars, result, count+1);
306 return result;
307 }
308
309 static struct SResource *
parseUCARules(ParseState * state,char * tag,uint32_t startline,const struct UString *,UErrorCode * status)310 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
311 {
312 struct SResource *result = NULL;
313 struct UString *tokenValue;
314 FileStream *file = NULL;
315 char filename[256] = { '\0' };
316 char cs[128] = { '\0' };
317 uint32_t line;
318 UBool quoted = FALSE;
319 UCHARBUF *ucbuf=NULL;
320 UChar32 c = 0;
321 const char* cp = NULL;
322 UChar *pTarget = NULL;
323 UChar *target = NULL;
324 UChar *targetLimit = NULL;
325 int32_t size = 0;
326
327 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
328
329 if(isVerbose()){
330 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
331 }
332
333 if (U_FAILURE(*status))
334 {
335 return NULL;
336 }
337 /* make the filename including the directory */
338 if (state->inputdir != NULL)
339 {
340 uprv_strcat(filename, state->inputdir);
341
342 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
343 {
344 uprv_strcat(filename, U_FILE_SEP_STRING);
345 }
346 }
347
348 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
349
350 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
351
352 if (U_FAILURE(*status))
353 {
354 return NULL;
355 }
356 uprv_strcat(filename, cs);
357
358 if(state->omitCollationRules) {
359 return res_none();
360 }
361
362 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
363
364 if (U_FAILURE(*status)) {
365 error(line, "An error occured while opening the input file %s\n", filename);
366 return NULL;
367 }
368
369 /* We allocate more space than actually required
370 * since the actual size needed for storing UChars
371 * is not known in UTF-8 byte stream
372 */
373 size = ucbuf_size(ucbuf) + 1;
374 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
375 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
376 target = pTarget;
377 targetLimit = pTarget+size;
378
379 /* read the rules into the buffer */
380 while (target < targetLimit)
381 {
382 c = ucbuf_getc(ucbuf, status);
383 if(c == QUOTE) {
384 quoted = (UBool)!quoted;
385 }
386 /* weiv (06/26/2002): adding the following:
387 * - preserving spaces in commands [...]
388 * - # comments until the end of line
389 */
390 if (c == STARTCOMMAND && !quoted)
391 {
392 /* preserve commands
393 * closing bracket will be handled by the
394 * append at the end of the loop
395 */
396 while(c != ENDCOMMAND) {
397 U_APPEND_CHAR32_ONLY(c, target);
398 c = ucbuf_getc(ucbuf, status);
399 }
400 }
401 else if (c == HASH && !quoted) {
402 /* skip comments */
403 while(c != CR && c != LF) {
404 c = ucbuf_getc(ucbuf, status);
405 }
406 continue;
407 }
408 else if (c == ESCAPE)
409 {
410 c = unescape(ucbuf, status);
411
412 if (c == (UChar32)U_ERR)
413 {
414 uprv_free(pTarget);
415 T_FileStream_close(file);
416 return NULL;
417 }
418 }
419 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
420 {
421 /* ignore spaces carriage returns
422 * and line feed unless in the form \uXXXX
423 */
424 continue;
425 }
426
427 /* Append UChar * after dissembling if c > 0xffff*/
428 if (c != (UChar32)U_EOF)
429 {
430 U_APPEND_CHAR32_ONLY(c, target);
431 }
432 else
433 {
434 break;
435 }
436 }
437
438 /* terminate the string */
439 if(target < targetLimit){
440 *target = 0x0000;
441 }
442
443 result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
444
445
446 ucbuf_close(ucbuf);
447 uprv_free(pTarget);
448 T_FileStream_close(file);
449
450 return result;
451 }
452
453 static struct SResource *
parseTransliterator(ParseState * state,char * tag,uint32_t startline,const struct UString *,UErrorCode * status)454 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
455 {
456 struct SResource *result = NULL;
457 struct UString *tokenValue;
458 FileStream *file = NULL;
459 char filename[256] = { '\0' };
460 char cs[128] = { '\0' };
461 uint32_t line;
462 UCHARBUF *ucbuf=NULL;
463 const char* cp = NULL;
464 UChar *pTarget = NULL;
465 const UChar *pSource = NULL;
466 int32_t size = 0;
467
468 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
469
470 if(isVerbose()){
471 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
472 }
473
474 if (U_FAILURE(*status))
475 {
476 return NULL;
477 }
478 /* make the filename including the directory */
479 if (state->inputdir != NULL)
480 {
481 uprv_strcat(filename, state->inputdir);
482
483 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
484 {
485 uprv_strcat(filename, U_FILE_SEP_STRING);
486 }
487 }
488
489 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
490
491 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
492
493 if (U_FAILURE(*status))
494 {
495 return NULL;
496 }
497 uprv_strcat(filename, cs);
498
499
500 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
501
502 if (U_FAILURE(*status)) {
503 error(line, "An error occured while opening the input file %s\n", filename);
504 return NULL;
505 }
506
507 /* We allocate more space than actually required
508 * since the actual size needed for storing UChars
509 * is not known in UTF-8 byte stream
510 */
511 pSource = ucbuf_getBuffer(ucbuf, &size, status);
512 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
513 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
514
515 #if !UCONFIG_NO_TRANSLITERATION
516 size = utrans_stripRules(pSource, size, pTarget, status);
517 #else
518 size = 0;
519 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
520 #endif
521 result = string_open(state->bundle, tag, pTarget, size, NULL, status);
522
523 ucbuf_close(ucbuf);
524 uprv_free(pTarget);
525 T_FileStream_close(file);
526
527 return result;
528 }
529 static ArrayResource* dependencyArray = NULL;
530
531 static struct SResource *
parseDependency(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)532 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
533 {
534 struct SResource *result = NULL;
535 struct SResource *elem = NULL;
536 struct UString *tokenValue;
537 uint32_t line;
538 char filename[256] = { '\0' };
539 char cs[128] = { '\0' };
540
541 expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
542
543 if(isVerbose()){
544 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
545 }
546
547 if (U_FAILURE(*status))
548 {
549 return NULL;
550 }
551 /* make the filename including the directory */
552 if (state->outputdir != NULL)
553 {
554 uprv_strcat(filename, state->outputdir);
555
556 if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
557 {
558 uprv_strcat(filename, U_FILE_SEP_STRING);
559 }
560 }
561
562 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
563
564 if (U_FAILURE(*status))
565 {
566 return NULL;
567 }
568 uprv_strcat(filename, cs);
569 if(!T_FileStream_file_exists(filename)){
570 if(isStrict()){
571 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
572 }else{
573 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
574 }
575 }
576 if(dependencyArray==NULL){
577 dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
578 }
579 if(tag!=NULL){
580 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
581 }
582 elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
583
584 dependencyArray->add(elem);
585
586 if (U_FAILURE(*status))
587 {
588 return NULL;
589 }
590 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
591 return result;
592 }
593 static struct SResource *
parseString(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)594 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
595 {
596 struct UString *tokenValue;
597 struct SResource *result = NULL;
598
599 /* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
600 {
601 return parseUCARules(tag, startline, status);
602 }*/
603 if(isVerbose()){
604 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
605 }
606 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
607
608 if (U_SUCCESS(*status))
609 {
610 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
611 doesn't survive expect either) */
612
613 result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
614 if(U_SUCCESS(*status) && result) {
615 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
616
617 if (U_FAILURE(*status))
618 {
619 res_close(result);
620 return NULL;
621 }
622 }
623 }
624
625 return result;
626 }
627
628 static struct SResource *
parseAlias(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)629 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
630 {
631 struct UString *tokenValue;
632 struct SResource *result = NULL;
633
634 expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
635
636 if(isVerbose()){
637 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
638 }
639
640 if (U_SUCCESS(*status))
641 {
642 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
643 doesn't survive expect either) */
644
645 result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
646
647 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
648
649 if (U_FAILURE(*status))
650 {
651 res_close(result);
652 return NULL;
653 }
654 }
655
656 return result;
657 }
658
659 #if !UCONFIG_NO_COLLATION
660
661 namespace {
662
resLookup(struct SResource * res,const char * key)663 static struct SResource* resLookup(struct SResource* res, const char* key){
664 if (res == res_none() || !res->isTable()) {
665 return NULL;
666 }
667
668 TableResource *list = static_cast<TableResource *>(res);
669 SResource *current = list->fFirst;
670 while (current != NULL) {
671 if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
672 return current;
673 }
674 current = current->fNext;
675 }
676 return NULL;
677 }
678
679 class GenrbImporter : public icu::CollationRuleParser::Importer {
680 public:
GenrbImporter(const char * in,const char * out)681 GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
682 virtual ~GenrbImporter();
683 virtual void getRules(
684 const char *localeID, const char *collationType,
685 UnicodeString &rules,
686 const char *&errorReason, UErrorCode &errorCode);
687
688 private:
689 const char *inputDir;
690 const char *outputDir;
691 };
692
~GenrbImporter()693 GenrbImporter::~GenrbImporter() {}
694
695 void
getRules(const char * localeID,const char * collationType,UnicodeString & rules,const char * &,UErrorCode & errorCode)696 GenrbImporter::getRules(
697 const char *localeID, const char *collationType,
698 UnicodeString &rules,
699 const char *& /*errorReason*/, UErrorCode &errorCode) {
700 CharString filename(localeID, errorCode);
701 for(int32_t i = 0; i < filename.length(); i++){
702 if(filename[i] == '-'){
703 filename.data()[i] = '_';
704 }
705 }
706 filename.append(".txt", errorCode);
707 if (U_FAILURE(errorCode)) {
708 return;
709 }
710 CharString inputDirBuf;
711 CharString openFileName;
712 if(inputDir == NULL) {
713 const char *filenameBegin = uprv_strrchr(filename.data(), U_FILE_SEP_CHAR);
714 if (filenameBegin != NULL) {
715 /*
716 * When a filename ../../../data/root.txt is specified,
717 * we presume that the input directory is ../../../data
718 * This is very important when the resource file includes
719 * another file, like UCARules.txt or thaidict.brk.
720 */
721 StringPiece dir = filename.toStringPiece();
722 const char *filenameLimit = filename.data() + filename.length();
723 dir.remove_suffix((int32_t)(filenameLimit - filenameBegin));
724 inputDirBuf.append(dir, errorCode);
725 inputDir = inputDirBuf.data();
726 }
727 }else{
728 int32_t dirlen = (int32_t)uprv_strlen(inputDir);
729
730 if((filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')) {
731 /*
732 * append the input dir to openFileName if the first char in
733 * filename is not file separator char and the last char input directory is not '.'.
734 * This is to support :
735 * genrb -s. /home/icu/data
736 * genrb -s. icu/data
737 * The user cannot mix notations like
738 * genrb -s. /icu/data --- the absolute path specified. -s redundant
739 * user should use
740 * genrb -s. icu/data --- start from CWD and look in icu/data dir
741 */
742 openFileName.append(inputDir, dirlen, errorCode);
743 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
744 openFileName.append(U_FILE_SEP_CHAR, errorCode);
745 }
746 }
747 }
748 openFileName.append(filename, errorCode);
749 if(U_FAILURE(errorCode)) {
750 return;
751 }
752 // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data());
753 const char* cp = "";
754 LocalUCHARBUFPointer ucbuf(
755 ucbuf_open(openFileName.data(), &cp, getShowWarning(), TRUE, &errorCode));
756 if(errorCode == U_FILE_ACCESS_ERROR) {
757 fprintf(stderr, "couldn't open file %s\n", openFileName.data());
758 return;
759 }
760 if (ucbuf.isNull() || U_FAILURE(errorCode)) {
761 fprintf(stderr, "An error occured processing file %s. Error: %s\n", openFileName.data(), u_errorName(errorCode));
762 return;
763 }
764
765 /* Parse the data into an SRBRoot */
766 struct SRBRoot *data =
767 parse(ucbuf.getAlias(), inputDir, outputDir, filename.data(), FALSE, FALSE, &errorCode);
768 if (U_FAILURE(errorCode)) {
769 return;
770 }
771
772 struct SResource *root = data->fRoot;
773 struct SResource *collations = resLookup(root, "collations");
774 if (collations != NULL) {
775 struct SResource *collation = resLookup(collations, collationType);
776 if (collation != NULL) {
777 struct SResource *sequence = resLookup(collation, "Sequence");
778 if (sequence != NULL && sequence->isString()) {
779 // No string pointer aliasing so that we need not hold onto the resource bundle.
780 StringResource *sr = static_cast<StringResource *>(sequence);
781 rules = sr->fString;
782 }
783 }
784 }
785 }
786
787 // Quick-and-dirty escaping function.
788 // Assumes that we are on an ASCII-based platform.
789 static void
escape(const UChar * s,char * buffer)790 escape(const UChar *s, char *buffer) {
791 int32_t length = u_strlen(s);
792 int32_t i = 0;
793 for (;;) {
794 UChar32 c;
795 U16_NEXT(s, i, length, c);
796 if (c == 0) {
797 *buffer = 0;
798 return;
799 } else if (0x20 <= c && c <= 0x7e) {
800 // printable ASCII
801 *buffer++ = (char)c; // assumes ASCII-based platform
802 } else {
803 buffer += sprintf(buffer, "\\u%04X", (int)c);
804 }
805 }
806 }
807
808 } // namespace
809
810 #endif // !UCONFIG_NO_COLLATION
811
812 static TableResource *
addCollation(ParseState * state,TableResource * result,const char * collationType,uint32_t startline,UErrorCode * status)813 addCollation(ParseState* state, TableResource *result, const char *collationType,
814 uint32_t startline, UErrorCode *status)
815 {
816 // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
817 struct SResource *member = NULL;
818 struct UString *tokenValue;
819 struct UString comment;
820 enum ETokenType token;
821 char subtag[1024];
822 UnicodeString rules;
823 UBool haveRules = FALSE;
824 UVersionInfo version;
825 uint32_t line;
826
827 /* '{' . (name resource)* '}' */
828 version[0]=0; version[1]=0; version[2]=0; version[3]=0;
829
830 for (;;)
831 {
832 ustr_init(&comment);
833 token = getToken(state, &tokenValue, &comment, &line, status);
834
835 if (token == TOK_CLOSE_BRACE)
836 {
837 break;
838 }
839
840 if (token != TOK_STRING)
841 {
842 res_close(result);
843 *status = U_INVALID_FORMAT_ERROR;
844
845 if (token == TOK_EOF)
846 {
847 error(startline, "unterminated table");
848 }
849 else
850 {
851 error(line, "Unexpected token %s", tokenNames[token]);
852 }
853
854 return NULL;
855 }
856
857 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
858
859 if (U_FAILURE(*status))
860 {
861 res_close(result);
862 return NULL;
863 }
864
865 member = parseResource(state, subtag, NULL, status);
866
867 if (U_FAILURE(*status))
868 {
869 res_close(result);
870 return NULL;
871 }
872 if (result == NULL)
873 {
874 // Ignore the parsed resources, continue parsing.
875 }
876 else if (uprv_strcmp(subtag, "Version") == 0 && member->isString())
877 {
878 StringResource *sr = static_cast<StringResource *>(member);
879 char ver[40];
880 int32_t length = sr->length();
881
882 if (length >= UPRV_LENGTHOF(ver))
883 {
884 length = UPRV_LENGTHOF(ver) - 1;
885 }
886
887 sr->fString.extract(0, length, ver, UPRV_LENGTHOF(ver), US_INV);
888 u_versionFromString(version, ver);
889
890 result->add(member, line, *status);
891 member = NULL;
892 }
893 else if(uprv_strcmp(subtag, "%%CollationBin")==0)
894 {
895 /* discard duplicate %%CollationBin if any*/
896 }
897 else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString())
898 {
899 StringResource *sr = static_cast<StringResource *>(member);
900 rules = sr->fString;
901 haveRules = TRUE;
902 // Defer building the collator until we have seen
903 // all sub-elements of the collation table, including the Version.
904 /* in order to achieve smaller data files, we can direct genrb */
905 /* to omit collation rules */
906 if(!state->omitCollationRules) {
907 result->add(member, line, *status);
908 member = NULL;
909 }
910 }
911 else // Just copy non-special items.
912 {
913 result->add(member, line, *status);
914 member = NULL;
915 }
916 res_close(member); // TODO: use LocalPointer
917 if (U_FAILURE(*status))
918 {
919 res_close(result);
920 return NULL;
921 }
922 }
923
924 if (!haveRules) { return result; }
925
926 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
927 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
928 (void)collationType;
929 #else
930 // CLDR ticket #3949, ICU ticket #8082:
931 // Do not build collation binary data for for-import-only "private" collation rule strings.
932 if (uprv_strncmp(collationType, "private-", 8) == 0) {
933 if(isVerbose()) {
934 printf("Not building %s~%s collation binary\n", state->filename, collationType);
935 }
936 return result;
937 }
938
939 if(!state->makeBinaryCollation) {
940 if(isVerbose()) {
941 printf("Not building %s~%s collation binary\n", state->filename, collationType);
942 }
943 return result;
944 }
945 UErrorCode intStatus = U_ZERO_ERROR;
946 UParseError parseError;
947 uprv_memset(&parseError, 0, sizeof(parseError));
948 GenrbImporter importer(state->inputdir, state->outputdir);
949 const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
950 if(U_FAILURE(intStatus)) {
951 error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
952 res_close(result);
953 return NULL; // TODO: use LocalUResourceBundlePointer for result
954 }
955 icu::CollationBuilder builder(base, intStatus);
956 if(uprv_strncmp(collationType, "search", 6) == 0) {
957 builder.disableFastLatin(); // build fast-Latin table unless search collator
958 }
959 LocalPointer<icu::CollationTailoring> t(
960 builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
961 if(U_FAILURE(intStatus)) {
962 const char *reason = builder.getErrorReason();
963 if(reason == NULL) { reason = ""; }
964 error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
965 state->filename, collationType,
966 (long)parseError.offset, u_errorName(intStatus), reason);
967 if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
968 // Print pre- and post-context.
969 char preBuffer[100], postBuffer[100];
970 escape(parseError.preContext, preBuffer);
971 escape(parseError.postContext, postBuffer);
972 error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
973 }
974 if(isStrict() || t.isNull()) {
975 *status = intStatus;
976 res_close(result);
977 return NULL;
978 }
979 }
980 icu::LocalMemory<uint8_t> buffer;
981 int32_t capacity = 100000;
982 uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
983 if(dest == NULL) {
984 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
985 (long)capacity);
986 *status = U_MEMORY_ALLOCATION_ERROR;
987 res_close(result);
988 return NULL;
989 }
990 int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
991 int32_t totalSize = icu::CollationDataWriter::writeTailoring(
992 *t, *t->settings, indexes, dest, capacity, intStatus);
993 if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
994 intStatus = U_ZERO_ERROR;
995 capacity = totalSize;
996 dest = buffer.allocateInsteadAndCopy(capacity);
997 if(dest == NULL) {
998 fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
999 (long)capacity);
1000 *status = U_MEMORY_ALLOCATION_ERROR;
1001 res_close(result);
1002 return NULL;
1003 }
1004 totalSize = icu::CollationDataWriter::writeTailoring(
1005 *t, *t->settings, indexes, dest, capacity, intStatus);
1006 }
1007 if(U_FAILURE(intStatus)) {
1008 fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
1009 u_errorName(intStatus));
1010 res_close(result);
1011 return NULL;
1012 }
1013 if(isVerbose()) {
1014 printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
1015 icu::CollationInfo::printSizes(totalSize, indexes);
1016 if(t->settings->hasReordering()) {
1017 printf("%s~%s collation reordering ranges:\n", state->filename, collationType);
1018 icu::CollationInfo::printReorderRanges(
1019 *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength);
1020 }
1021 }
1022 struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
1023 result->add(collationBin, line, *status);
1024 if (U_FAILURE(*status)) {
1025 res_close(result);
1026 return NULL;
1027 }
1028 #endif
1029 return result;
1030 }
1031
1032 static UBool
keepCollationType(const char * type)1033 keepCollationType(const char *type) { // android-changed
1034 // BEGIN android-added
1035 if (uprv_strcmp(type, "big5han") == 0) { return FALSE; }
1036 if (uprv_strcmp(type, "gb2312han") == 0) { return FALSE; }
1037 // END android-added
1038 return TRUE;
1039 }
1040
1041 static struct SResource *
parseCollationElements(ParseState * state,char * tag,uint32_t startline,UBool newCollation,UErrorCode * status)1042 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1043 {
1044 TableResource *result = NULL;
1045 struct SResource *member = NULL;
1046 struct UString *tokenValue;
1047 struct UString comment;
1048 enum ETokenType token;
1049 char subtag[1024], typeKeyword[1024];
1050 uint32_t line;
1051
1052 result = table_open(state->bundle, tag, NULL, status);
1053
1054 if (result == NULL || U_FAILURE(*status))
1055 {
1056 return NULL;
1057 }
1058 if(isVerbose()){
1059 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1060 }
1061 if(!newCollation) {
1062 return addCollation(state, result, "(no type)", startline, status);
1063 }
1064 else {
1065 for(;;) {
1066 ustr_init(&comment);
1067 token = getToken(state, &tokenValue, &comment, &line, status);
1068
1069 if (token == TOK_CLOSE_BRACE)
1070 {
1071 return result;
1072 }
1073
1074 if (token != TOK_STRING)
1075 {
1076 res_close(result);
1077 *status = U_INVALID_FORMAT_ERROR;
1078
1079 if (token == TOK_EOF)
1080 {
1081 error(startline, "unterminated table");
1082 }
1083 else
1084 {
1085 error(line, "Unexpected token %s", tokenNames[token]);
1086 }
1087
1088 return NULL;
1089 }
1090
1091 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1092
1093 if (U_FAILURE(*status))
1094 {
1095 res_close(result);
1096 return NULL;
1097 }
1098
1099 if (uprv_strcmp(subtag, "default") == 0)
1100 {
1101 member = parseResource(state, subtag, NULL, status);
1102
1103 if (U_FAILURE(*status))
1104 {
1105 res_close(result);
1106 return NULL;
1107 }
1108
1109 result->add(member, line, *status);
1110 }
1111 else
1112 {
1113 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1114 /* this probably needs to be refactored or recursively use the parser */
1115 /* first we assume that our collation table won't have the explicit type */
1116 /* then, we cannot handle aliases */
1117 if(token == TOK_OPEN_BRACE) {
1118 token = getToken(state, &tokenValue, &comment, &line, status);
1119 TableResource *collationRes;
1120 if (keepCollationType(subtag)) {
1121 collationRes = table_open(state->bundle, subtag, NULL, status);
1122 } else {
1123 collationRes = NULL;
1124 }
1125 // need to parse the collation data regardless
1126 collationRes = addCollation(state, collationRes, subtag, startline, status);
1127 if (collationRes != NULL) {
1128 result->add(collationRes, startline, *status);
1129 }
1130 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1131 /* we could have a table too */
1132 token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1133 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1134 if(uprv_strcmp(typeKeyword, "alias") == 0) {
1135 member = parseResource(state, subtag, NULL, status);
1136 if (U_FAILURE(*status))
1137 {
1138 res_close(result);
1139 return NULL;
1140 }
1141
1142 result->add(member, line, *status);
1143 } else {
1144 res_close(result);
1145 *status = U_INVALID_FORMAT_ERROR;
1146 return NULL;
1147 }
1148 } else {
1149 res_close(result);
1150 *status = U_INVALID_FORMAT_ERROR;
1151 return NULL;
1152 }
1153 }
1154
1155 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1156
1157 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1158
1159 if (U_FAILURE(*status))
1160 {
1161 res_close(result);
1162 return NULL;
1163 }
1164 }
1165 }
1166 }
1167
1168 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1169 if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1170 static struct SResource *
realParseTable(ParseState * state,TableResource * table,char * tag,uint32_t startline,UErrorCode * status)1171 realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t startline, UErrorCode *status)
1172 {
1173 struct SResource *member = NULL;
1174 struct UString *tokenValue=NULL;
1175 struct UString comment;
1176 enum ETokenType token;
1177 char subtag[1024];
1178 uint32_t line;
1179 UBool readToken = FALSE;
1180
1181 /* '{' . (name resource)* '}' */
1182
1183 if(isVerbose()){
1184 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1185 }
1186 for (;;)
1187 {
1188 ustr_init(&comment);
1189 token = getToken(state, &tokenValue, &comment, &line, status);
1190
1191 if (token == TOK_CLOSE_BRACE)
1192 {
1193 if (!readToken) {
1194 warning(startline, "Encountered empty table");
1195 }
1196 return table;
1197 }
1198
1199 if (token != TOK_STRING)
1200 {
1201 *status = U_INVALID_FORMAT_ERROR;
1202
1203 if (token == TOK_EOF)
1204 {
1205 error(startline, "unterminated table");
1206 }
1207 else
1208 {
1209 error(line, "unexpected token %s", tokenNames[token]);
1210 }
1211
1212 return NULL;
1213 }
1214
1215 if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1216 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1217 } else {
1218 *status = U_INVALID_FORMAT_ERROR;
1219 error(line, "invariant characters required for table keys");
1220 return NULL;
1221 }
1222
1223 if (U_FAILURE(*status))
1224 {
1225 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1226 return NULL;
1227 }
1228
1229 member = parseResource(state, subtag, &comment, status);
1230
1231 if (member == NULL || U_FAILURE(*status))
1232 {
1233 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1234 return NULL;
1235 }
1236
1237 table->add(member, line, *status);
1238
1239 if (U_FAILURE(*status))
1240 {
1241 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1242 return NULL;
1243 }
1244 readToken = TRUE;
1245 ustr_deinit(&comment);
1246 }
1247
1248 /* not reached */
1249 /* A compiler warning will appear if all paths don't contain a return statement. */
1250 /* *status = U_INTERNAL_PROGRAM_ERROR;
1251 return NULL;*/
1252 }
1253
1254 static struct SResource *
parseTable(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1255 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1256 {
1257 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1258 {
1259 return parseCollationElements(state, tag, startline, FALSE, status);
1260 }
1261 if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1262 {
1263 return parseCollationElements(state, tag, startline, TRUE, status);
1264 }
1265 if(isVerbose()){
1266 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1267 }
1268
1269 TableResource *result = table_open(state->bundle, tag, comment, status);
1270
1271 if (result == NULL || U_FAILURE(*status))
1272 {
1273 return NULL;
1274 }
1275 return realParseTable(state, result, tag, startline, status);
1276 }
1277
1278 static struct SResource *
parseArray(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1279 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1280 {
1281 struct SResource *member = NULL;
1282 struct UString *tokenValue;
1283 struct UString memberComments;
1284 enum ETokenType token;
1285 UBool readToken = FALSE;
1286
1287 ArrayResource *result = array_open(state->bundle, tag, comment, status);
1288
1289 if (result == NULL || U_FAILURE(*status))
1290 {
1291 return NULL;
1292 }
1293 if(isVerbose()){
1294 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1295 }
1296
1297 ustr_init(&memberComments);
1298
1299 /* '{' . resource [','] '}' */
1300 for (;;)
1301 {
1302 /* reset length */
1303 ustr_setlen(&memberComments, 0, status);
1304
1305 /* check for end of array, but don't consume next token unless it really is the end */
1306 token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1307
1308
1309 if (token == TOK_CLOSE_BRACE)
1310 {
1311 getToken(state, NULL, NULL, NULL, status);
1312 if (!readToken) {
1313 warning(startline, "Encountered empty array");
1314 }
1315 break;
1316 }
1317
1318 if (token == TOK_EOF)
1319 {
1320 res_close(result);
1321 *status = U_INVALID_FORMAT_ERROR;
1322 error(startline, "unterminated array");
1323 return NULL;
1324 }
1325
1326 /* string arrays are a special case */
1327 if (token == TOK_STRING)
1328 {
1329 getToken(state, &tokenValue, &memberComments, NULL, status);
1330 member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1331 }
1332 else
1333 {
1334 member = parseResource(state, NULL, &memberComments, status);
1335 }
1336
1337 if (member == NULL || U_FAILURE(*status))
1338 {
1339 res_close(result);
1340 return NULL;
1341 }
1342
1343 result->add(member);
1344
1345 /* eat optional comma if present */
1346 token = peekToken(state, 0, NULL, NULL, NULL, status);
1347
1348 if (token == TOK_COMMA)
1349 {
1350 getToken(state, NULL, NULL, NULL, status);
1351 }
1352
1353 if (U_FAILURE(*status))
1354 {
1355 res_close(result);
1356 return NULL;
1357 }
1358 readToken = TRUE;
1359 }
1360
1361 ustr_deinit(&memberComments);
1362 return result;
1363 }
1364
1365 static struct SResource *
parseIntVector(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1366 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1367 {
1368 enum ETokenType token;
1369 char *string;
1370 int32_t value;
1371 UBool readToken = FALSE;
1372 char *stopstring;
1373 uint32_t len;
1374 struct UString memberComments;
1375
1376 IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
1377
1378 if (result == NULL || U_FAILURE(*status))
1379 {
1380 return NULL;
1381 }
1382
1383 if(isVerbose()){
1384 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1385 }
1386 ustr_init(&memberComments);
1387 /* '{' . string [','] '}' */
1388 for (;;)
1389 {
1390 ustr_setlen(&memberComments, 0, status);
1391
1392 /* check for end of array, but don't consume next token unless it really is the end */
1393 token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1394
1395 if (token == TOK_CLOSE_BRACE)
1396 {
1397 /* it's the end, consume the close brace */
1398 getToken(state, NULL, NULL, NULL, status);
1399 if (!readToken) {
1400 warning(startline, "Encountered empty int vector");
1401 }
1402 ustr_deinit(&memberComments);
1403 return result;
1404 }
1405
1406 string = getInvariantString(state, NULL, NULL, status);
1407
1408 if (U_FAILURE(*status))
1409 {
1410 res_close(result);
1411 return NULL;
1412 }
1413
1414 /* For handling illegal char in the Intvector */
1415 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1416 len=(uint32_t)(stopstring-string);
1417
1418 if(len==uprv_strlen(string))
1419 {
1420 result->add(value, *status);
1421 uprv_free(string);
1422 token = peekToken(state, 0, NULL, NULL, NULL, status);
1423 }
1424 else
1425 {
1426 uprv_free(string);
1427 *status=U_INVALID_CHAR_FOUND;
1428 }
1429
1430 if (U_FAILURE(*status))
1431 {
1432 res_close(result);
1433 return NULL;
1434 }
1435
1436 /* the comma is optional (even though it is required to prevent the reader from concatenating
1437 consecutive entries) so that a missing comma on the last entry isn't an error */
1438 if (token == TOK_COMMA)
1439 {
1440 getToken(state, NULL, NULL, NULL, status);
1441 }
1442 readToken = TRUE;
1443 }
1444
1445 /* not reached */
1446 /* A compiler warning will appear if all paths don't contain a return statement. */
1447 /* intvector_close(result, status);
1448 *status = U_INTERNAL_PROGRAM_ERROR;
1449 return NULL;*/
1450 }
1451
1452 static struct SResource *
parseBinary(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1453 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1454 {
1455 uint32_t line;
1456 LocalMemory<char> string(getInvariantString(state, &line, NULL, status));
1457 if (string.isNull() || U_FAILURE(*status))
1458 {
1459 return NULL;
1460 }
1461
1462 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1463 if (U_FAILURE(*status))
1464 {
1465 return NULL;
1466 }
1467
1468 if(isVerbose()){
1469 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1470 }
1471
1472 uint32_t count = (uint32_t)uprv_strlen(string.getAlias());
1473 if (count > 0){
1474 if((count % 2)==0){
1475 LocalMemory<uint8_t> value;
1476 if (value.allocateInsteadAndCopy(count) == NULL)
1477 {
1478 *status = U_MEMORY_ALLOCATION_ERROR;
1479 return NULL;
1480 }
1481
1482 char toConv[3] = {'\0', '\0', '\0'};
1483 for (uint32_t i = 0; i < count; i += 2)
1484 {
1485 toConv[0] = string[i];
1486 toConv[1] = string[i + 1];
1487
1488 char *stopstring;
1489 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1490 uint32_t len=(uint32_t)(stopstring-toConv);
1491
1492 if(len!=2)
1493 {
1494 *status=U_INVALID_CHAR_FOUND;
1495 return NULL;
1496 }
1497 }
1498
1499 return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status);
1500 }
1501 else
1502 {
1503 *status = U_INVALID_CHAR_FOUND;
1504 error(line, "Encountered invalid binary value (length is odd)");
1505 return NULL;
1506 }
1507 }
1508 else
1509 {
1510 warning(startline, "Encountered empty binary value");
1511 return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
1512 }
1513 }
1514
1515 static struct SResource *
parseInteger(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1516 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1517 {
1518 struct SResource *result = NULL;
1519 int32_t value;
1520 char *string;
1521 char *stopstring;
1522 uint32_t len;
1523
1524 string = getInvariantString(state, NULL, NULL, status);
1525
1526 if (string == NULL || U_FAILURE(*status))
1527 {
1528 return NULL;
1529 }
1530
1531 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1532
1533 if (U_FAILURE(*status))
1534 {
1535 uprv_free(string);
1536 return NULL;
1537 }
1538
1539 if(isVerbose()){
1540 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1541 }
1542
1543 if (uprv_strlen(string) <= 0)
1544 {
1545 warning(startline, "Encountered empty integer. Default value is 0.");
1546 }
1547
1548 /* Allow integer support for hexdecimal, octal digit and decimal*/
1549 /* and handle illegal char in the integer*/
1550 value = uprv_strtoul(string, &stopstring, 0);
1551 len=(uint32_t)(stopstring-string);
1552 if(len==uprv_strlen(string))
1553 {
1554 result = int_open(state->bundle, tag, value, comment, status);
1555 }
1556 else
1557 {
1558 *status=U_INVALID_CHAR_FOUND;
1559 }
1560 uprv_free(string);
1561
1562 return result;
1563 }
1564
1565 static struct SResource *
parseImport(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1566 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1567 {
1568 uint32_t line;
1569 LocalMemory<char> filename(getInvariantString(state, &line, NULL, status));
1570 if (U_FAILURE(*status))
1571 {
1572 return NULL;
1573 }
1574
1575 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1576
1577 if (U_FAILURE(*status))
1578 {
1579 return NULL;
1580 }
1581
1582 if(isVerbose()){
1583 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1584 }
1585
1586 /* Open the input file for reading */
1587 CharString fullname;
1588 if (state->inputdir != NULL) {
1589 fullname.append(state->inputdir, *status);
1590 }
1591 fullname.appendPathPart(filename.getAlias(), *status);
1592 if (U_FAILURE(*status)) {
1593 return NULL;
1594 }
1595
1596 FileStream *file = T_FileStream_open(fullname.data(), "rb");
1597 if (file == NULL)
1598 {
1599 error(line, "couldn't open input file %s", filename.getAlias());
1600 *status = U_FILE_ACCESS_ERROR;
1601 return NULL;
1602 }
1603
1604 int32_t len = T_FileStream_size(file);
1605 LocalMemory<uint8_t> data;
1606 if(data.allocateInsteadAndCopy(len) == NULL)
1607 {
1608 *status = U_MEMORY_ALLOCATION_ERROR;
1609 T_FileStream_close (file);
1610 return NULL;
1611 }
1612
1613 /* int32_t numRead = */ T_FileStream_read(file, data.getAlias(), len);
1614 T_FileStream_close (file);
1615
1616 return bin_open(state->bundle, tag, len, data.getAlias(), fullname.data(), comment, status);
1617 }
1618
1619 static struct SResource *
parseInclude(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1620 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1621 {
1622 struct SResource *result;
1623 int32_t len=0;
1624 char *filename;
1625 uint32_t line;
1626 UChar *pTarget = NULL;
1627
1628 UCHARBUF *ucbuf;
1629 char *fullname = NULL;
1630 int32_t count = 0;
1631 const char* cp = NULL;
1632 const UChar* uBuffer = NULL;
1633
1634 filename = getInvariantString(state, &line, NULL, status);
1635 count = (int32_t)uprv_strlen(filename);
1636
1637 if (U_FAILURE(*status))
1638 {
1639 return NULL;
1640 }
1641
1642 expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1643
1644 if (U_FAILURE(*status))
1645 {
1646 uprv_free(filename);
1647 return NULL;
1648 }
1649
1650 if(isVerbose()){
1651 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1652 }
1653
1654 fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
1655 /* test for NULL */
1656 if(fullname == NULL)
1657 {
1658 *status = U_MEMORY_ALLOCATION_ERROR;
1659 uprv_free(filename);
1660 return NULL;
1661 }
1662
1663 if(state->inputdir!=NULL){
1664 if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1665 {
1666
1667 uprv_strcpy(fullname, state->inputdir);
1668
1669 fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
1670 fullname[state->inputdirLength + 1] = '\0';
1671
1672 uprv_strcat(fullname, filename);
1673 }
1674 else
1675 {
1676 uprv_strcpy(fullname, state->inputdir);
1677 uprv_strcat(fullname, filename);
1678 }
1679 }else{
1680 uprv_strcpy(fullname,filename);
1681 }
1682
1683 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1684
1685 if (U_FAILURE(*status)) {
1686 error(line, "couldn't open input file %s\n", filename);
1687 return NULL;
1688 }
1689
1690 uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1691 result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1692
1693 ucbuf_close(ucbuf);
1694
1695 uprv_free(pTarget);
1696
1697 uprv_free(filename);
1698 uprv_free(fullname);
1699
1700 return result;
1701 }
1702
1703
1704
1705
1706
1707 U_STRING_DECL(k_type_string, "string", 6);
1708 U_STRING_DECL(k_type_binary, "binary", 6);
1709 U_STRING_DECL(k_type_bin, "bin", 3);
1710 U_STRING_DECL(k_type_table, "table", 5);
1711 U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
1712 U_STRING_DECL(k_type_int, "int", 3);
1713 U_STRING_DECL(k_type_integer, "integer", 7);
1714 U_STRING_DECL(k_type_array, "array", 5);
1715 U_STRING_DECL(k_type_alias, "alias", 5);
1716 U_STRING_DECL(k_type_intvector, "intvector", 9);
1717 U_STRING_DECL(k_type_import, "import", 6);
1718 U_STRING_DECL(k_type_include, "include", 7);
1719
1720 /* Various non-standard processing plugins that create one or more special resources. */
1721 U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1722 U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
1723 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
1724 U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
1725
1726 typedef enum EResourceType
1727 {
1728 RESTYPE_UNKNOWN,
1729 RESTYPE_STRING,
1730 RESTYPE_BINARY,
1731 RESTYPE_TABLE,
1732 RESTYPE_TABLE_NO_FALLBACK,
1733 RESTYPE_INTEGER,
1734 RESTYPE_ARRAY,
1735 RESTYPE_ALIAS,
1736 RESTYPE_INTVECTOR,
1737 RESTYPE_IMPORT,
1738 RESTYPE_INCLUDE,
1739 RESTYPE_PROCESS_UCA_RULES,
1740 RESTYPE_PROCESS_COLLATION,
1741 RESTYPE_PROCESS_TRANSLITERATOR,
1742 RESTYPE_PROCESS_DEPENDENCY,
1743 RESTYPE_RESERVED
1744 } EResourceType;
1745
1746 static struct {
1747 const char *nameChars; /* only used for debugging */
1748 const UChar *nameUChars;
1749 ParseResourceFunction *parseFunction;
1750 } gResourceTypes[] = {
1751 {"Unknown", NULL, NULL},
1752 {"string", k_type_string, parseString},
1753 {"binary", k_type_binary, parseBinary},
1754 {"table", k_type_table, parseTable},
1755 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1756 {"integer", k_type_integer, parseInteger},
1757 {"array", k_type_array, parseArray},
1758 {"alias", k_type_alias, parseAlias},
1759 {"intvector", k_type_intvector, parseIntVector},
1760 {"import", k_type_import, parseImport},
1761 {"include", k_type_include, parseInclude},
1762 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1763 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1764 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1765 {"process(dependency)", k_type_plugin_dependency, parseDependency},
1766 {"reserved", NULL, NULL}
1767 };
1768
initParser()1769 void initParser()
1770 {
1771 U_STRING_INIT(k_type_string, "string", 6);
1772 U_STRING_INIT(k_type_binary, "binary", 6);
1773 U_STRING_INIT(k_type_bin, "bin", 3);
1774 U_STRING_INIT(k_type_table, "table", 5);
1775 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
1776 U_STRING_INIT(k_type_int, "int", 3);
1777 U_STRING_INIT(k_type_integer, "integer", 7);
1778 U_STRING_INIT(k_type_array, "array", 5);
1779 U_STRING_INIT(k_type_alias, "alias", 5);
1780 U_STRING_INIT(k_type_intvector, "intvector", 9);
1781 U_STRING_INIT(k_type_import, "import", 6);
1782 U_STRING_INIT(k_type_include, "include", 7);
1783
1784 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
1785 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
1786 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
1787 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
1788 }
1789
isTable(enum EResourceType type)1790 static inline UBool isTable(enum EResourceType type) {
1791 return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
1792 }
1793
1794 static enum EResourceType
parseResourceType(ParseState * state,UErrorCode * status)1795 parseResourceType(ParseState* state, UErrorCode *status)
1796 {
1797 struct UString *tokenValue;
1798 struct UString comment;
1799 enum EResourceType result = RESTYPE_UNKNOWN;
1800 uint32_t line=0;
1801 ustr_init(&comment);
1802 expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1803
1804 if (U_FAILURE(*status))
1805 {
1806 return RESTYPE_UNKNOWN;
1807 }
1808
1809 *status = U_ZERO_ERROR;
1810
1811 /* Search for normal types */
1812 result=RESTYPE_UNKNOWN;
1813 while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
1814 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1815 break;
1816 }
1817 }
1818 /* Now search for the aliases */
1819 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1820 result = RESTYPE_INTEGER;
1821 }
1822 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1823 result = RESTYPE_BINARY;
1824 }
1825 else if (result == RESTYPE_RESERVED) {
1826 char tokenBuffer[1024];
1827 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1828 tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1829 *status = U_INVALID_FORMAT_ERROR;
1830 error(line, "unknown resource type '%s'", tokenBuffer);
1831 }
1832
1833 return result;
1834 }
1835
1836 /* parse a non-top-level resource */
1837 static struct SResource *
parseResource(ParseState * state,char * tag,const struct UString * comment,UErrorCode * status)1838 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1839 {
1840 enum ETokenType token;
1841 enum EResourceType resType = RESTYPE_UNKNOWN;
1842 ParseResourceFunction *parseFunction = NULL;
1843 struct UString *tokenValue;
1844 uint32_t startline;
1845 uint32_t line;
1846
1847
1848 token = getToken(state, &tokenValue, NULL, &startline, status);
1849
1850 if(isVerbose()){
1851 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1852 }
1853
1854 /* name . [ ':' type ] '{' resource '}' */
1855 /* This function parses from the colon onwards. If the colon is present, parse the
1856 type then try to parse a resource of that type. If there is no explicit type,
1857 work it out using the lookahead tokens. */
1858 switch (token)
1859 {
1860 case TOK_EOF:
1861 *status = U_INVALID_FORMAT_ERROR;
1862 error(startline, "Unexpected EOF encountered");
1863 return NULL;
1864
1865 case TOK_ERROR:
1866 *status = U_INVALID_FORMAT_ERROR;
1867 return NULL;
1868
1869 case TOK_COLON:
1870 resType = parseResourceType(state, status);
1871 expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1872
1873 if (U_FAILURE(*status))
1874 {
1875 return NULL;
1876 }
1877
1878 break;
1879
1880 case TOK_OPEN_BRACE:
1881 break;
1882
1883 default:
1884 *status = U_INVALID_FORMAT_ERROR;
1885 error(startline, "syntax error while reading a resource, expected '{' or ':'");
1886 return NULL;
1887 }
1888
1889
1890 if (resType == RESTYPE_UNKNOWN)
1891 {
1892 /* No explicit type, so try to work it out. At this point, we've read the first '{'.
1893 We could have any of the following:
1894 { { => array (nested)
1895 { :/} => array
1896 { string , => string array
1897
1898 { string { => table
1899
1900 { string :/{ => table
1901 { string } => string
1902 */
1903
1904 token = peekToken(state, 0, NULL, &line, NULL,status);
1905
1906 if (U_FAILURE(*status))
1907 {
1908 return NULL;
1909 }
1910
1911 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1912 {
1913 resType = RESTYPE_ARRAY;
1914 }
1915 else if (token == TOK_STRING)
1916 {
1917 token = peekToken(state, 1, NULL, &line, NULL, status);
1918
1919 if (U_FAILURE(*status))
1920 {
1921 return NULL;
1922 }
1923
1924 switch (token)
1925 {
1926 case TOK_COMMA: resType = RESTYPE_ARRAY; break;
1927 case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break;
1928 case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break;
1929 case TOK_COLON: resType = RESTYPE_TABLE; break;
1930 default:
1931 *status = U_INVALID_FORMAT_ERROR;
1932 error(line, "Unexpected token after string, expected ',', '{' or '}'");
1933 return NULL;
1934 }
1935 }
1936 else
1937 {
1938 *status = U_INVALID_FORMAT_ERROR;
1939 error(line, "Unexpected token after '{'");
1940 return NULL;
1941 }
1942
1943 /* printf("Type guessed as %s\n", resourceNames[resType]); */
1944 } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
1945 *status = U_INVALID_FORMAT_ERROR;
1946 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
1947 return NULL;
1948 }
1949
1950
1951 /* We should now know what we need to parse next, so call the appropriate parser
1952 function and return. */
1953 parseFunction = gResourceTypes[resType].parseFunction;
1954 if (parseFunction != NULL) {
1955 return parseFunction(state, tag, startline, comment, status);
1956 }
1957 else {
1958 *status = U_INTERNAL_PROGRAM_ERROR;
1959 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
1960 }
1961
1962 return NULL;
1963 }
1964
1965 /* parse the top-level resource */
1966 struct SRBRoot *
parse(UCHARBUF * buf,const char * inputDir,const char * outputDir,const char * filename,UBool makeBinaryCollation,UBool omitCollationRules,UErrorCode * status)1967 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
1968 UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
1969 {
1970 struct UString *tokenValue;
1971 struct UString comment;
1972 uint32_t line;
1973 enum EResourceType bundleType;
1974 enum ETokenType token;
1975 ParseState state;
1976 uint32_t i;
1977
1978
1979 for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
1980 {
1981 ustr_init(&state.lookahead[i].value);
1982 ustr_init(&state.lookahead[i].comment);
1983 }
1984
1985 initLookahead(&state, buf, status);
1986
1987 state.inputdir = inputDir;
1988 state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
1989 state.outputdir = outputDir;
1990 state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
1991 state.filename = filename;
1992 state.makeBinaryCollation = makeBinaryCollation;
1993 state.omitCollationRules = omitCollationRules;
1994
1995 ustr_init(&comment);
1996 expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
1997
1998 state.bundle = new SRBRoot(&comment, FALSE, *status);
1999
2000 if (state.bundle == NULL || U_FAILURE(*status))
2001 {
2002 return NULL;
2003 }
2004
2005
2006 state.bundle->setLocale(tokenValue->fChars, *status);
2007
2008 /* The following code is to make Empty bundle work no matter with :table specifer or not */
2009 token = getToken(&state, NULL, NULL, &line, status);
2010 if(token==TOK_COLON) {
2011 *status=U_ZERO_ERROR;
2012 bundleType=parseResourceType(&state, status);
2013
2014 if(isTable(bundleType))
2015 {
2016 expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2017 }
2018 else
2019 {
2020 *status=U_PARSE_ERROR;
2021 error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2022 }
2023 }
2024 else
2025 {
2026 /* not a colon */
2027 if(token==TOK_OPEN_BRACE)
2028 {
2029 *status=U_ZERO_ERROR;
2030 bundleType=RESTYPE_TABLE;
2031 }
2032 else
2033 {
2034 /* neither colon nor open brace */
2035 *status=U_PARSE_ERROR;
2036 bundleType=RESTYPE_UNKNOWN;
2037 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2038 }
2039 }
2040
2041 if (U_FAILURE(*status))
2042 {
2043 delete state.bundle;
2044 return NULL;
2045 }
2046
2047 if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
2048 /*
2049 * Parse a top-level table with the table(nofallback) declaration.
2050 * This is the same as a regular table, but also sets the
2051 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2052 */
2053 state.bundle->fNoFallback=TRUE;
2054 }
2055 /* top-level tables need not handle special table names like "collations" */
2056 assert(!state.bundle->fIsPoolBundle);
2057 assert(state.bundle->fRoot->fType == URES_TABLE);
2058 TableResource *rootTable = static_cast<TableResource *>(state.bundle->fRoot);
2059 realParseTable(&state, rootTable, NULL, line, status);
2060 if(dependencyArray!=NULL){
2061 rootTable->add(dependencyArray, 0, *status);
2062 dependencyArray = NULL;
2063 }
2064 if (U_FAILURE(*status))
2065 {
2066 delete state.bundle;
2067 res_close(dependencyArray);
2068 return NULL;
2069 }
2070
2071 if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2072 {
2073 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2074 if(isStrict()){
2075 *status = U_INVALID_FORMAT_ERROR;
2076 return NULL;
2077 }
2078 }
2079
2080 cleanupLookahead(&state);
2081 ustr_deinit(&comment);
2082 return state.bundle;
2083 }
2084