1 /*
2 *******************************************************************************
3 * Copyright (C) 2011-2012, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
6 */
7
8 #include "unicode/utypes.h"
9
10 #if !UCONFIG_NO_FORMATTING
11
12 #include "unicode/calendar.h"
13 #include "unicode/tzfmt.h"
14 #include "unicode/numsys.h"
15 #include "unicode/uchar.h"
16 #include "unicode/udat.h"
17 #include "tzgnames.h"
18 #include "cmemory.h"
19 #include "cstring.h"
20 #include "putilimp.h"
21 #include "uassert.h"
22 #include "ucln_in.h"
23 #include "umutex.h"
24 #include "uresimp.h"
25 #include "ureslocs.h"
26 #include "uvector.h"
27 #include "zonemeta.h"
28
29 U_NAMESPACE_BEGIN
30
31 static const char gZoneStringsTag[] = "zoneStrings";
32 static const char gGmtFormatTag[]= "gmtFormat";
33 static const char gGmtZeroFormatTag[] = "gmtZeroFormat";
34 static const char gHourFormatTag[]= "hourFormat";
35
36 static const UChar TZID_GMT[] = {0x0045, 0x0074, 0x0063, 0x002F, 0x0047, 0x004D, 0x0054, 0}; // Etc/GMT
37
38 static const UChar DEFAULT_GMT_PATTERN[] = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0}; // GMT{0}
39 //static const UChar DEFAULT_GMT_ZERO[] = {0x0047, 0x004D, 0x0054, 0}; // GMT
40 static const UChar DEFAULT_GMT_POSITIVE_HM[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0}; // +HH:mm
41 static const UChar DEFAULT_GMT_POSITIVE_HMS[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0}; // +HH:mm:ss
42 static const UChar DEFAULT_GMT_NEGATIVE_HM[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0}; // -HH:mm
43 static const UChar DEFAULT_GMT_NEGATIVE_HMS[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0}; // -HH:mm:ss
44
45 static const UChar32 DEFAULT_GMT_DIGITS[] = {
46 0x0030, 0x0031, 0x0032, 0x0033, 0x0034,
47 0x0035, 0x0036, 0x0037, 0x0038, 0x0039
48 };
49
50 static const UChar DEFAULT_GMT_OFFSET_SEP = 0x003A; // ':'
51
52 static const UChar ARG0[] = {0x007B, 0x0030, 0x007D}; // "{0}"
53 static const int ARG0_LEN = 3;
54
55 static const UChar DEFAULT_GMT_OFFSET_MINUTE_PATTERN[] = {0x006D, 0x006D, 0}; // "mm"
56 static const UChar DEFAULT_GMT_OFFSET_SECOND_PATTERN[] = {0x0073, 0x0073, 0}; // "ss"
57
58 static const UChar ALT_GMT_STRINGS[][4] = {
59 {0x0047, 0x004D, 0x0054, 0}, // GMT
60 {0x0055, 0x0054, 0x0043, 0}, // UTC
61 {0x0055, 0x0054, 0, 0}, // UT
62 {0, 0, 0, 0}
63 };
64
65 // Order of GMT offset pattern parsing, *_HMS must be evaluated first
66 // because *_HM is most likely a substring of *_HMS
67 static const int32_t PARSE_GMT_OFFSET_TYPES[] = {
68 UTZFMT_PAT_POSITIVE_HMS,
69 UTZFMT_PAT_NEGATIVE_HMS,
70 UTZFMT_PAT_POSITIVE_HM,
71 UTZFMT_PAT_NEGATIVE_HM,
72 -1
73 };
74
75 static const UChar SINGLEQUOTE = 0x0027;
76 static const UChar PLUS = 0x002B;
77 static const UChar MINUS = 0x002D;
78 static const UChar ISO8601_UTC = 0x005A; // 'Z'
79 static const UChar ISO8601_SEP = 0x003A; // ':'
80
81 static const int32_t MILLIS_PER_HOUR = 60 * 60 * 1000;
82 static const int32_t MILLIS_PER_MINUTE = 60 * 1000;
83 static const int32_t MILLIS_PER_SECOND = 1000;
84
85 // Maximum offset (exclusive) in millisecond supported by offset formats
86 static int32_t MAX_OFFSET = 24 * MILLIS_PER_HOUR;
87
88 // Maximum values for GMT offset fields
89 static const int32_t MAX_OFFSET_HOUR = 23;
90 static const int32_t MAX_OFFSET_MINUTE = 59;
91 static const int32_t MAX_OFFSET_SECOND = 59;
92
93 static const int32_t UNKNOWN_OFFSET = 0x7FFFFFFF;
94
95 static const int32_t ALL_SPECIFIC_NAME_TYPES = UTZNM_LONG_STANDARD | UTZNM_LONG_DAYLIGHT | UTZNM_SHORT_STANDARD | UTZNM_SHORT_DAYLIGHT;
96 static const int32_t ALL_GENERIC_NAME_TYPES = UTZGNM_LOCATION | UTZGNM_LONG | UTZGNM_SHORT;
97
98 #define STYLE_FLAG(c) (1 << (c))
99 #define DIGIT_VAL(c) (0x0030 <= (c) && (c) <= 0x0039 ? (c) - 0x0030 : -1)
100 #define MAX_OFFSET_DIGITS 6
101
102
103 // ------------------------------------------------------------------
104 // GMTOffsetField
105 //
106 // This class represents a localized GMT offset pattern
107 // item and used by TimeZoneFormat
108 // ------------------------------------------------------------------
109 class GMTOffsetField : public UMemory {
110 public:
111 enum FieldType {
112 TEXT = 0,
113 HOUR = 1,
114 MINUTE = 2,
115 SECOND = 4
116 };
117
118 virtual ~GMTOffsetField();
119
120 static GMTOffsetField* createText(const UnicodeString& text, UErrorCode& status);
121 static GMTOffsetField* createTimeField(FieldType type, uint8_t width, UErrorCode& status);
122 static UBool isValid(FieldType type, int32_t width);
123 static FieldType getTypeByLetter(UChar ch);
124
125 FieldType getType() const;
126 uint8_t getWidth() const;
127 const UChar* getPatternText(void) const;
128
129 private:
130 UChar* fText;
131 FieldType fType;
132 uint8_t fWidth;
133
134 GMTOffsetField();
135 };
136
GMTOffsetField()137 GMTOffsetField::GMTOffsetField()
138 : fText(NULL), fType(TEXT), fWidth(0) {
139 }
140
~GMTOffsetField()141 GMTOffsetField::~GMTOffsetField() {
142 if (fText) {
143 uprv_free(fText);
144 }
145 }
146
147 GMTOffsetField*
createText(const UnicodeString & text,UErrorCode & status)148 GMTOffsetField::createText(const UnicodeString& text, UErrorCode& status) {
149 if (U_FAILURE(status)) {
150 return NULL;
151 }
152 GMTOffsetField* result = new GMTOffsetField();
153 if (result == NULL) {
154 status = U_MEMORY_ALLOCATION_ERROR;
155 return NULL;
156 }
157
158 int32_t len = text.length();
159 result->fText = (UChar*)uprv_malloc((len + 1) * sizeof(UChar));
160 if (result->fText == NULL) {
161 status = U_MEMORY_ALLOCATION_ERROR;
162 delete result;
163 return NULL;
164 }
165 u_strncpy(result->fText, text.getBuffer(), len);
166 result->fText[len] = 0;
167 result->fType = TEXT;
168
169 return result;
170 }
171
172 GMTOffsetField*
createTimeField(FieldType type,uint8_t width,UErrorCode & status)173 GMTOffsetField::createTimeField(FieldType type, uint8_t width, UErrorCode& status) {
174 U_ASSERT(type != TEXT);
175 if (U_FAILURE(status)) {
176 return NULL;
177 }
178 GMTOffsetField* result = new GMTOffsetField();
179 if (result == NULL) {
180 status = U_MEMORY_ALLOCATION_ERROR;
181 return NULL;
182 }
183
184 result->fType = type;
185 result->fWidth = width;
186
187 return result;
188 }
189
190 UBool
isValid(FieldType type,int32_t width)191 GMTOffsetField::isValid(FieldType type, int32_t width) {
192 switch (type) {
193 case HOUR:
194 return (width == 1 || width == 2);
195 case MINUTE:
196 case SECOND:
197 return (width == 2);
198 default:
199 U_ASSERT(FALSE);
200 }
201 return (width > 0);
202 }
203
204 GMTOffsetField::FieldType
getTypeByLetter(UChar ch)205 GMTOffsetField::getTypeByLetter(UChar ch) {
206 if (ch == 0x0048 /* H */) {
207 return HOUR;
208 } else if (ch == 0x006D /* m */) {
209 return MINUTE;
210 } else if (ch == 0x0073 /* s */) {
211 return SECOND;
212 }
213 return TEXT;
214 }
215
216 inline GMTOffsetField::FieldType
getType() const217 GMTOffsetField::getType() const {
218 return fType;
219 }
220
221 inline uint8_t
getWidth() const222 GMTOffsetField::getWidth() const {
223 return fWidth;
224 }
225
226 inline const UChar*
getPatternText(void) const227 GMTOffsetField::getPatternText(void) const {
228 return fText;
229 }
230
231
232 U_CDECL_BEGIN
233 static void U_CALLCONV
deleteGMTOffsetField(void * obj)234 deleteGMTOffsetField(void *obj) {
235 delete static_cast<GMTOffsetField *>(obj);
236 }
237 U_CDECL_END
238
239
240 // ------------------------------------------------------------------
241 // TimeZoneFormat
242 // ------------------------------------------------------------------
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TimeZoneFormat)243 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TimeZoneFormat)
244
245 TimeZoneFormat::TimeZoneFormat(const Locale& locale, UErrorCode& status)
246 : fLocale(locale), fTimeZoneNames(NULL), fTimeZoneGenericNames(NULL), fDefParseOptionFlags(0) {
247
248 for (int32_t i = 0; i <= UTZFMT_PAT_NEGATIVE_HMS; i++) {
249 fGMTOffsetPatternItems[i] = NULL;
250 }
251
252 const char* region = fLocale.getCountry();
253 int32_t regionLen = uprv_strlen(region);
254 if (regionLen == 0) {
255 char loc[ULOC_FULLNAME_CAPACITY];
256 uloc_addLikelySubtags(fLocale.getName(), loc, sizeof(loc), &status);
257
258 regionLen = uloc_getCountry(loc, fTargetRegion, sizeof(fTargetRegion), &status);
259 if (U_SUCCESS(status)) {
260 fTargetRegion[regionLen] = 0;
261 } else {
262 return;
263 }
264 } else if (regionLen < (int32_t)sizeof(fTargetRegion)) {
265 uprv_strcpy(fTargetRegion, region);
266 } else {
267 fTargetRegion[0] = 0;
268 }
269
270 fTimeZoneNames = TimeZoneNames::createInstance(locale, status);
271 // fTimeZoneGenericNames is lazily instantiated
272
273 const UChar* gmtPattern = NULL;
274 const UChar* hourFormats = NULL;
275
276 UResourceBundle *zoneBundle = ures_open(U_ICUDATA_ZONE, locale.getName(), &status);
277 UResourceBundle *zoneStringsArray = ures_getByKeyWithFallback(zoneBundle, gZoneStringsTag, NULL, &status);
278 if (U_SUCCESS(status)) {
279 const UChar* resStr;
280 int32_t len;
281 resStr = ures_getStringByKeyWithFallback(zoneStringsArray, gGmtFormatTag, &len, &status);
282 if (len > 0) {
283 gmtPattern = resStr;
284 }
285 resStr = ures_getStringByKeyWithFallback(zoneStringsArray, gGmtZeroFormatTag, &len, &status);
286 if (len > 0) {
287 fGMTZeroFormat.setTo(TRUE, resStr, len);
288 }
289 resStr = ures_getStringByKeyWithFallback(zoneStringsArray, gHourFormatTag, &len, &status);
290 if (len > 0) {
291 hourFormats = resStr;
292 }
293 ures_close(zoneStringsArray);
294 ures_close(zoneBundle);
295 }
296
297 if (gmtPattern == NULL) {
298 gmtPattern = DEFAULT_GMT_PATTERN;
299 }
300 initGMTPattern(UnicodeString(gmtPattern, -1), status);
301
302 UBool useDefHourFmt = TRUE;
303 if (hourFormats) {
304 UChar *sep = u_strchr(hourFormats, (UChar)0x003B /* ';' */);
305 if (sep != NULL) {
306 fGMTOffsetPatterns[UTZFMT_PAT_POSITIVE_HM].setTo(FALSE, hourFormats, (int32_t)(sep - hourFormats));
307 fGMTOffsetPatterns[UTZFMT_PAT_NEGATIVE_HM].setTo(TRUE, sep + 1, -1);
308 expandOffsetPattern(fGMTOffsetPatterns[UTZFMT_PAT_POSITIVE_HM], fGMTOffsetPatterns[UTZFMT_PAT_POSITIVE_HMS]);
309 expandOffsetPattern(fGMTOffsetPatterns[UTZFMT_PAT_NEGATIVE_HM], fGMTOffsetPatterns[UTZFMT_PAT_NEGATIVE_HMS]);
310 useDefHourFmt = FALSE;
311 }
312 }
313 if (useDefHourFmt) {
314 fGMTOffsetPatterns[UTZFMT_PAT_POSITIVE_HM].setTo(TRUE, DEFAULT_GMT_POSITIVE_HM, -1);
315 fGMTOffsetPatterns[UTZFMT_PAT_POSITIVE_HMS].setTo(TRUE, DEFAULT_GMT_POSITIVE_HMS, -1);
316 fGMTOffsetPatterns[UTZFMT_PAT_NEGATIVE_HM].setTo(TRUE, DEFAULT_GMT_NEGATIVE_HM, -1);
317 fGMTOffsetPatterns[UTZFMT_PAT_NEGATIVE_HMS].setTo(TRUE, DEFAULT_GMT_NEGATIVE_HMS, -1);
318 }
319 initGMTOffsetPatterns(status);
320
321 NumberingSystem* ns = NumberingSystem::createInstance(locale, status);
322 UBool useDefDigits = TRUE;
323 if (ns && !ns->isAlgorithmic()) {
324 UnicodeString digits = ns->getDescription();
325 useDefDigits = !toCodePoints(digits, fGMTOffsetDigits, 10);
326 }
327 if (useDefDigits) {
328 uprv_memcpy(fGMTOffsetDigits, DEFAULT_GMT_DIGITS, sizeof(UChar32) * 10);
329 }
330 delete ns;
331 }
332
TimeZoneFormat(const TimeZoneFormat & other)333 TimeZoneFormat::TimeZoneFormat(const TimeZoneFormat& other)
334 : Format(other), fTimeZoneNames(NULL), fTimeZoneGenericNames(NULL) {
335
336 for (int32_t i = 0; i <= UTZFMT_PAT_NEGATIVE_HMS; i++) {
337 fGMTOffsetPatternItems[i] = NULL;
338 }
339 *this = other;
340 }
341
342
~TimeZoneFormat()343 TimeZoneFormat::~TimeZoneFormat() {
344 delete fTimeZoneNames;
345 delete fTimeZoneGenericNames;
346 for (int32_t i = 0; i <= UTZFMT_PAT_NEGATIVE_HMS; i++) {
347 delete fGMTOffsetPatternItems[i];
348 }
349 }
350
351 TimeZoneFormat&
operator =(const TimeZoneFormat & other)352 TimeZoneFormat::operator=(const TimeZoneFormat& other) {
353 if (this == &other) {
354 return *this;
355 }
356
357 delete fTimeZoneNames;
358 delete fTimeZoneGenericNames;
359 fTimeZoneGenericNames = NULL;
360
361 fLocale = other.fLocale;
362 uprv_memcpy(fTargetRegion, other.fTargetRegion, sizeof(fTargetRegion));
363
364 fTimeZoneNames = other.fTimeZoneNames->clone();
365 if (other.fTimeZoneGenericNames) {
366 fTimeZoneGenericNames = other.fTimeZoneGenericNames->clone();
367 }
368
369 fGMTPattern = other.fGMTPattern;
370 fGMTPatternPrefix = other.fGMTPatternPrefix;
371 fGMTPatternSuffix = other.fGMTPatternSuffix;
372
373 UErrorCode status = U_ZERO_ERROR;
374 for (int32_t i = 0; i <= UTZFMT_PAT_NEGATIVE_HMS; i++) {
375 fGMTOffsetPatterns[i] = other.fGMTOffsetPatterns[i];
376 delete fGMTOffsetPatternItems[i];
377 }
378 initGMTOffsetPatterns(status);
379 U_ASSERT(U_SUCCESS(status));
380
381 fGMTZeroFormat = other.fGMTZeroFormat;
382
383 uprv_memcpy(fGMTOffsetDigits, other.fGMTOffsetDigits, sizeof(fGMTOffsetDigits));
384
385 fDefParseOptionFlags = other.fDefParseOptionFlags;
386
387 return *this;
388 }
389
390
391 UBool
operator ==(const Format & other) const392 TimeZoneFormat::operator==(const Format& other) const {
393 TimeZoneFormat* tzfmt = (TimeZoneFormat*)&other;
394
395 UBool isEqual =
396 fLocale == tzfmt->fLocale
397 && fGMTPattern == tzfmt->fGMTPattern
398 && fGMTZeroFormat == tzfmt->fGMTZeroFormat
399 && *fTimeZoneNames == *tzfmt->fTimeZoneNames;
400
401 for (int32_t i = 0; i <= UTZFMT_PAT_NEGATIVE_HMS && isEqual; i++) {
402 isEqual = fGMTOffsetPatterns[i] == tzfmt->fGMTOffsetPatterns[i];
403 }
404 for (int32_t i = 0; i < 10 && isEqual; i++) {
405 isEqual = fGMTOffsetDigits[i] == tzfmt->fGMTOffsetDigits[i];
406 }
407 // TODO
408 // Check fTimeZoneGenericNames. For now,
409 // if fTimeZoneNames is same, fTimeZoneGenericNames should
410 // be also equivalent.
411 return isEqual;
412 }
413
414 Format*
clone() const415 TimeZoneFormat::clone() const {
416 return new TimeZoneFormat(*this);
417 }
418
419 TimeZoneFormat* U_EXPORT2
createInstance(const Locale & locale,UErrorCode & status)420 TimeZoneFormat::createInstance(const Locale& locale, UErrorCode& status) {
421 TimeZoneFormat* tzfmt = new TimeZoneFormat(locale, status);
422 if (U_SUCCESS(status)) {
423 return tzfmt;
424 }
425 delete tzfmt;
426 return NULL;
427 }
428
429 // ------------------------------------------------------------------
430 // Setter and Getter
431
432 const TimeZoneNames*
getTimeZoneNames() const433 TimeZoneFormat::getTimeZoneNames() const {
434 return (const TimeZoneNames*)fTimeZoneNames;
435 }
436
437 void
adoptTimeZoneNames(TimeZoneNames * tznames)438 TimeZoneFormat::adoptTimeZoneNames(TimeZoneNames *tznames) {
439 delete fTimeZoneNames;
440 fTimeZoneNames = tznames;
441
442 // TODO - We should also update fTimeZoneGenericNames
443 }
444
445 void
setTimeZoneNames(const TimeZoneNames & tznames)446 TimeZoneFormat::setTimeZoneNames(const TimeZoneNames &tznames) {
447 delete fTimeZoneNames;
448 fTimeZoneNames = tznames.clone();
449
450 // TODO - We should also update fTimeZoneGenericNames
451 }
452
453 void
setDefaultParseOptions(uint32_t flags)454 TimeZoneFormat::setDefaultParseOptions(uint32_t flags) {
455 fDefParseOptionFlags = flags;
456 }
457
458 uint32_t
getDefaultParseOptions(void) const459 TimeZoneFormat::getDefaultParseOptions(void) const {
460 return fDefParseOptionFlags;
461 }
462
463
464 UnicodeString&
getGMTPattern(UnicodeString & pattern) const465 TimeZoneFormat::getGMTPattern(UnicodeString& pattern) const {
466 return pattern.setTo(fGMTPattern);
467 }
468
469 void
setGMTPattern(const UnicodeString & pattern,UErrorCode & status)470 TimeZoneFormat::setGMTPattern(const UnicodeString& pattern, UErrorCode& status) {
471 initGMTPattern(pattern, status);
472 }
473
474 UnicodeString&
getGMTOffsetPattern(UTimeZoneFormatGMTOffsetPatternType type,UnicodeString & pattern) const475 TimeZoneFormat::getGMTOffsetPattern(UTimeZoneFormatGMTOffsetPatternType type, UnicodeString& pattern) const {
476 return pattern.setTo(fGMTOffsetPatterns[type]);
477 }
478
479 void
setGMTOffsetPattern(UTimeZoneFormatGMTOffsetPatternType type,const UnicodeString & pattern,UErrorCode & status)480 TimeZoneFormat::setGMTOffsetPattern(UTimeZoneFormatGMTOffsetPatternType type, const UnicodeString& pattern, UErrorCode& status) {
481 if (U_FAILURE(status)) {
482 return;
483 }
484 if (pattern == fGMTOffsetPatterns[type]) {
485 // No need to reset
486 return;
487 }
488
489 OffsetFields required = (type == UTZFMT_PAT_POSITIVE_HMS || type == UTZFMT_PAT_NEGATIVE_HMS) ? FIELDS_HMS : FIELDS_HM;
490
491 UVector* patternItems = parseOffsetPattern(pattern, required, status);
492 if (patternItems == NULL) {
493 return;
494 }
495
496 fGMTOffsetPatterns[type].setTo(pattern);
497 delete fGMTOffsetPatternItems[type];
498 fGMTOffsetPatternItems[type] = patternItems;
499 }
500
501 UnicodeString&
getGMTOffsetDigits(UnicodeString & digits) const502 TimeZoneFormat::getGMTOffsetDigits(UnicodeString& digits) const {
503 digits.remove();
504 for (int32_t i = 0; i < 10; i++) {
505 digits.append(fGMTOffsetDigits[i]);
506 }
507 return digits;
508 }
509
510 void
setGMTOffsetDigits(const UnicodeString & digits,UErrorCode & status)511 TimeZoneFormat::setGMTOffsetDigits(const UnicodeString& digits, UErrorCode& status) {
512 if (U_FAILURE(status)) {
513 return;
514 }
515 UChar32 digitArray[10];
516 if (!toCodePoints(digits, digitArray, 10)) {
517 status = U_ILLEGAL_ARGUMENT_ERROR;
518 return;
519 }
520 uprv_memcpy(fGMTOffsetDigits, digitArray, sizeof(UChar32)*10);
521 }
522
523 UnicodeString&
getGMTZeroFormat(UnicodeString & gmtZeroFormat) const524 TimeZoneFormat::getGMTZeroFormat(UnicodeString& gmtZeroFormat) const {
525 return gmtZeroFormat.setTo(fGMTZeroFormat);
526 }
527
528 void
setGMTZeroFormat(const UnicodeString & gmtZeroFormat,UErrorCode & status)529 TimeZoneFormat::setGMTZeroFormat(const UnicodeString& gmtZeroFormat, UErrorCode& status) {
530 if (U_SUCCESS(status)) {
531 if (gmtZeroFormat.isEmpty()) {
532 status = U_ILLEGAL_ARGUMENT_ERROR;
533 } else if (gmtZeroFormat != fGMTZeroFormat) {
534 fGMTZeroFormat.setTo(gmtZeroFormat);
535 }
536 }
537 }
538
539 // ------------------------------------------------------------------
540 // Format and Parse
541
542 UnicodeString&
format(UTimeZoneFormatStyle style,const TimeZone & tz,UDate date,UnicodeString & name,UTimeZoneFormatTimeType * timeType) const543 TimeZoneFormat::format(UTimeZoneFormatStyle style, const TimeZone& tz, UDate date,
544 UnicodeString& name, UTimeZoneFormatTimeType* timeType /* = NULL */) const {
545 if (timeType) {
546 *timeType = UTZFMT_TIME_TYPE_UNKNOWN;
547 }
548 switch (style) {
549 case UTZFMT_STYLE_GENERIC_LOCATION:
550 formatGeneric(tz, UTZGNM_LOCATION, date, name);
551 break;
552 case UTZFMT_STYLE_GENERIC_LONG:
553 formatGeneric(tz, UTZGNM_LONG, date, name);
554 break;
555 case UTZFMT_STYLE_GENERIC_SHORT:
556 formatGeneric(tz, UTZGNM_SHORT, date, name);
557 break;
558 case UTZFMT_STYLE_SPECIFIC_LONG:
559 formatSpecific(tz, UTZNM_LONG_STANDARD, UTZNM_LONG_DAYLIGHT, date, name, timeType);
560 break;
561 case UTZFMT_STYLE_SPECIFIC_SHORT:
562 formatSpecific(tz, UTZNM_SHORT_STANDARD, UTZNM_SHORT_DAYLIGHT, date, name, timeType);
563 break;
564 case UTZFMT_STYLE_RFC822:
565 case UTZFMT_STYLE_ISO8601:
566 case UTZFMT_STYLE_LOCALIZED_GMT:
567 // will be handled below
568 break;
569 }
570
571 if (name.isEmpty()) {
572 UErrorCode status = U_ZERO_ERROR;
573 int32_t rawOffset, dstOffset;
574 tz.getOffset(date, FALSE, rawOffset, dstOffset, status);
575 if (U_SUCCESS(status)) {
576 switch (style) {
577 case UTZFMT_STYLE_RFC822:
578 formatOffsetRFC822(rawOffset + dstOffset, name, status);
579 break;
580 case UTZFMT_STYLE_ISO8601:
581 formatOffsetISO8601(rawOffset + dstOffset, name, status);
582 break;
583 default:
584 formatOffsetLocalizedGMT(rawOffset + dstOffset, name, status);
585 break;
586 }
587 if (timeType) {
588 *timeType = (dstOffset != 0) ? UTZFMT_TIME_TYPE_DAYLIGHT : UTZFMT_TIME_TYPE_STANDARD;
589 }
590 }
591 U_ASSERT(U_SUCCESS(status));
592 }
593
594 return name;
595 }
596
597 UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const598 TimeZoneFormat::format(const Formattable& obj, UnicodeString& appendTo,
599 FieldPosition& pos, UErrorCode& status) const {
600 if (U_FAILURE(status)) {
601 return appendTo;
602 }
603 UDate date = Calendar::getNow();
604 if (obj.getType() == Formattable::kObject) {
605 const UObject* formatObj = obj.getObject();
606 const TimeZone* tz = dynamic_cast<const TimeZone*>(formatObj);
607 if (tz == NULL) {
608 const Calendar* cal = dynamic_cast<const Calendar*>(formatObj);
609 if (cal != NULL) {
610 tz = &cal->getTimeZone();
611 date = cal->getTime(status);
612 }
613 }
614 if (tz != NULL) {
615 int32_t rawOffset, dstOffset;
616 tz->getOffset(date, FALSE, rawOffset, dstOffset, status);
617 UnicodeString result;
618 formatOffsetLocalizedGMT(rawOffset + dstOffset, result, status);
619 if (U_SUCCESS(status)) {
620 appendTo.append(result);
621 if (pos.getField() == UDAT_TIMEZONE_FIELD) {
622 pos.setBeginIndex(0);
623 pos.setEndIndex(result.length());
624 }
625 }
626 }
627 }
628 return appendTo;
629 }
630
631 TimeZone*
parse(UTimeZoneFormatStyle style,const UnicodeString & text,ParsePosition & pos,UTimeZoneFormatTimeType * timeType) const632 TimeZoneFormat::parse(UTimeZoneFormatStyle style, const UnicodeString& text, ParsePosition& pos,
633 UTimeZoneFormatTimeType* timeType /*= NULL*/) const {
634 return parse(style, text, pos, getDefaultParseOptions(), timeType);
635 }
636
637 TimeZone*
parse(UTimeZoneFormatStyle style,const UnicodeString & text,ParsePosition & pos,int32_t parseOptions,UTimeZoneFormatTimeType * timeType) const638 TimeZoneFormat::parse(UTimeZoneFormatStyle style, const UnicodeString& text, ParsePosition& pos,
639 int32_t parseOptions, UTimeZoneFormatTimeType* timeType /* = NULL */) const {
640 if (timeType) {
641 *timeType = UTZFMT_TIME_TYPE_UNKNOWN;
642 }
643
644 int32_t startIdx = pos.getIndex();
645 int32_t maxPos = text.length();
646 int32_t offset;
647
648 UBool fallbackLocalizedGMT = FALSE;
649 if (style == UTZFMT_STYLE_SPECIFIC_LONG || style == UTZFMT_STYLE_SPECIFIC_SHORT
650 || style == UTZFMT_STYLE_GENERIC_LONG || style == UTZFMT_STYLE_GENERIC_SHORT || style == UTZFMT_STYLE_GENERIC_LOCATION) {
651 // above styles may use localized gmt format as fallback
652 fallbackLocalizedGMT = TRUE;
653 }
654
655 int32_t evaluated = 0;
656 ParsePosition tmpPos(startIdx);
657
658 int32_t parsedOffset = UNKNOWN_OFFSET; // stores successfully parsed offset for later use
659 int32_t parsedPos = -1; // stores successfully parsed offset position for later use
660
661 // Try localized GMT format first if necessary
662 if (fallbackLocalizedGMT) {
663 UBool hasDigitOffset = FALSE;
664 offset = parseOffsetLocalizedGMT(text, tmpPos, &hasDigitOffset);
665 if (tmpPos.getErrorIndex() == -1) {
666 // Even when the input text was successfully parsed as a localized GMT format text,
667 // we may still need to evaluate the specified style if -
668 // 1) GMT zero format was used, and
669 // 2) The input text was not completely processed
670 if (tmpPos.getIndex() == maxPos || hasDigitOffset) {
671 pos.setIndex(tmpPos.getIndex());
672 return createTimeZoneForOffset(offset);
673 }
674 parsedOffset = offset;
675 parsedPos = tmpPos.getIndex();
676 }
677 evaluated |= STYLE_FLAG(UTZFMT_STYLE_LOCALIZED_GMT);
678
679 tmpPos.setIndex(startIdx);
680 tmpPos.setErrorIndex(-1);
681 }
682
683 UErrorCode status = U_ZERO_ERROR;
684 UnicodeString tzID;
685 UTimeZoneFormatTimeType parsedTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
686
687 // Try the specified style
688 switch (style) {
689 case UTZFMT_STYLE_RFC822:
690 {
691 offset = parseOffsetRFC822(text, tmpPos);
692 if (tmpPos.getErrorIndex() == -1) {
693 pos.setIndex(tmpPos.getIndex());
694 return createTimeZoneForOffset(offset);
695 }
696 }
697 break;
698
699 case UTZFMT_STYLE_LOCALIZED_GMT:
700 {
701 offset = parseOffsetLocalizedGMT(text, tmpPos);
702 if (tmpPos.getErrorIndex() == -1) {
703 pos.setIndex(tmpPos.getIndex());
704 return createTimeZoneForOffset(offset);
705 }
706 }
707 break;
708
709 case UTZFMT_STYLE_ISO8601:
710 {
711 offset = parseOffsetISO8601(text, tmpPos);
712 if (tmpPos.getErrorIndex() == -1) {
713 pos.setIndex(tmpPos.getIndex());
714 return createTimeZoneForOffset(offset);
715 }
716 // Note: ISO 8601 parser also support basic format (without ':'),
717 // which is same with RFC 822 format.
718 evaluated |= STYLE_FLAG(UTZFMT_STYLE_RFC822);
719 }
720 break;
721
722 case UTZFMT_STYLE_SPECIFIC_LONG:
723 case UTZFMT_STYLE_SPECIFIC_SHORT:
724 {
725 // Specific styles
726 int32_t nameTypes = 0;
727 if (style == UTZFMT_STYLE_SPECIFIC_LONG) {
728 nameTypes = (UTZNM_LONG_STANDARD | UTZNM_LONG_DAYLIGHT);
729 } else {
730 U_ASSERT(style == UTZFMT_STYLE_SPECIFIC_SHORT);
731 nameTypes = (UTZNM_SHORT_STANDARD | UTZNM_SHORT_DAYLIGHT);
732 }
733 LocalPointer<TimeZoneNames::MatchInfoCollection> specificMatches(fTimeZoneNames->find(text, startIdx, nameTypes, status));
734 if (U_FAILURE(status)) {
735 pos.setErrorIndex(startIdx);
736 return NULL;
737 }
738 if (!specificMatches.isNull()) {
739 int32_t matchIdx = -1;
740 int32_t matchPos = -1;
741 for (int32_t i = 0; i < specificMatches->size(); i++) {
742 matchPos = startIdx + specificMatches->getMatchLengthAt(i);
743 if (matchPos > parsedPos) {
744 matchIdx = i;
745 parsedPos = matchPos;
746 }
747 }
748 if (matchIdx >= 0) {
749 if (timeType) {
750 *timeType = getTimeType(specificMatches->getNameTypeAt(matchIdx));
751 }
752 pos.setIndex(matchPos);
753 getTimeZoneID(specificMatches.getAlias(), matchIdx, tzID);
754 U_ASSERT(!tzID.isEmpty());
755 return TimeZone::createTimeZone(tzID);
756 }
757 }
758 }
759 break;
760
761 case UTZFMT_STYLE_GENERIC_LONG:
762 case UTZFMT_STYLE_GENERIC_SHORT:
763 case UTZFMT_STYLE_GENERIC_LOCATION:
764 {
765 int32_t genericNameTypes = 0;
766 switch (style) {
767 case UTZFMT_STYLE_GENERIC_LOCATION:
768 genericNameTypes = UTZGNM_LOCATION;
769 break;
770
771 case UTZFMT_STYLE_GENERIC_LONG:
772 genericNameTypes = UTZGNM_LONG | UTZGNM_LOCATION;
773 break;
774
775 case UTZFMT_STYLE_GENERIC_SHORT:
776 genericNameTypes = UTZGNM_SHORT | UTZGNM_LOCATION;
777 break;
778
779 default:
780 U_ASSERT(FALSE);
781 }
782
783 int32_t len = 0;
784 const TimeZoneGenericNames *gnames = getTimeZoneGenericNames(status);
785 if (U_SUCCESS(status)) {
786 len = gnames->findBestMatch(text, startIdx, genericNameTypes, tzID, parsedTimeType, status);
787 }
788 if (U_FAILURE(status)) {
789 pos.setErrorIndex(startIdx);
790 return NULL;
791 }
792 if (len > 0) {
793 // Found a match
794 if (timeType) {
795 *timeType = parsedTimeType;
796 }
797 pos.setIndex(startIdx + len);
798 U_ASSERT(!tzID.isEmpty());
799 return TimeZone::createTimeZone(tzID);
800 }
801 }
802 break;
803 }
804 evaluated |= STYLE_FLAG(style);
805
806
807 if (parsedPos > startIdx) {
808 // When the specified style is one of SPECIFIC_XXX or GENERIC_XXX, we tried to parse the input
809 // as localized GMT format earlier. If parsedOffset is positive, it means it was successfully
810 // parsed as localized GMT format, but offset digits were not detected (more specifically, GMT
811 // zero format). Then, it tried to find a match within the set of display names, but could not
812 // find a match. At this point, we can safely assume the input text contains the localized
813 // GMT format.
814 U_ASSERT(parsedOffset != UNKNOWN_OFFSET);
815 pos.setIndex(parsedPos);
816 return createTimeZoneForOffset(parsedOffset);
817 }
818
819 // Failed to parse the input text as the time zone format in the specified style.
820 // Check the longest match among other styles below.
821 U_ASSERT(parsedPos < 0);
822 U_ASSERT(parsedOffset == UNKNOWN_OFFSET);
823 tmpPos.setIndex(startIdx);
824 tmpPos.setErrorIndex(-1);
825
826 // ISO 8601
827 if ((evaluated & STYLE_FLAG(UTZFMT_STYLE_ISO8601)) == 0) {
828 UBool hasDigitOffset = FALSE;
829 offset = parseOffsetISO8601(text, tmpPos, FALSE, &hasDigitOffset);
830 if (tmpPos.getErrorIndex() == -1) {
831 if (tmpPos.getIndex() == maxPos || hasDigitOffset) {
832 pos.setIndex(tmpPos.getIndex());
833 return createTimeZoneForOffset(offset);
834 }
835 // Note: When ISO 8601 format contains offset digits, it should not
836 // collide with other formats (except RFC 822, which is compatible with
837 // ISO 8601 basic format). However, ISO 8601 UTC format "Z" (single letter)
838 // may collide with other names. In this case, we need to evaluate other
839 // names.
840 parsedOffset = offset;
841 parsedPos = tmpPos.getIndex();
842 U_ASSERT(parsedPos == startIdx + 1); // only when "Z" is used
843 }
844 tmpPos.setIndex(startIdx);
845 tmpPos.setErrorIndex(-1);
846 }
847
848 // RFC 822
849 // Note: ISO 8601 parser supports RFC 822 format. So we do not need to parse
850 // it as RFC 822 here. This might be changed in future when we support
851 // strict format option for ISO 8601 or RFC 822.
852
853 //if ((evaluated & STYLE_FLAG(UTZFMT_STYLE_RFC822)) == 0) {
854 // offset = parseOffsetRFC822(text, tmpPos);
855 // if (tmpPos.getErrorIndex() == -1) {
856 // pos.setIndex(tmpPos.getIndex());
857 // return createTimeZoneForOffset(offset);
858 // }
859 // tmpPos.setIndex(startIdx);
860 // tmpPos.setErrorIndex(-1);
861 //}
862
863 // Localized GMT format
864 if ((evaluated & STYLE_FLAG(UTZFMT_STYLE_LOCALIZED_GMT)) == 0) {
865 UBool hasDigitOffset = FALSE;
866 offset = parseOffsetLocalizedGMT(text, tmpPos, &hasDigitOffset);
867 if (tmpPos.getErrorIndex() == -1) {
868 if (tmpPos.getIndex() == maxPos || hasDigitOffset) {
869 pos.setIndex(tmpPos.getIndex());
870 return createTimeZoneForOffset(offset);
871 }
872 // Evaluate other names - see the comment earlier in this method.
873 parsedOffset = offset;
874 parsedPos = tmpPos.getIndex();
875 }
876 }
877
878 // When ParseOption.ALL_STYLES is available, we also try to look all possible display names.
879 // For example, when style is GENERIC_LONG, "EST" (SPECIFIC_SHORT) is never
880 // used for America/New_York. With parseAllStyles true, this code parses "EST"
881 // as America/New_York.
882
883 // Note: Adding all possible names into the trie used by the implementation is quite heavy operation,
884 // which we want to avoid normally (note that we cache the trie, so this is applicable to the
885 // first time only as long as the cache does not expire).
886 if (parseOptions & UTZFMT_PARSE_OPTION_ALL_STYLES) {
887 // Try all specific names first
888 LocalPointer<TimeZoneNames::MatchInfoCollection> spAllMatches(fTimeZoneNames->find(text, startIdx, ALL_SPECIFIC_NAME_TYPES, status));
889 if (U_FAILURE(status)) {
890 pos.setErrorIndex(startIdx);
891 return NULL;
892 }
893 int32_t spMatchIdx = -1;
894 if (!spAllMatches.isNull()) {
895 int32_t matchPos = -1;
896 for (int32_t i = 0; i < spAllMatches->size(); i++) {
897 matchPos = startIdx + spAllMatches->getMatchLengthAt(i);
898 if (matchPos > parsedPos) {
899 spMatchIdx = i;
900 parsedPos = matchPos;
901 }
902 }
903 }
904 int32_t genMatchLen = -1;
905 if (parsedPos < maxPos) {
906 const TimeZoneGenericNames *gnames = getTimeZoneGenericNames(status);
907 if (U_SUCCESS(status)) {
908 genMatchLen = gnames->findBestMatch(text, startIdx, ALL_GENERIC_NAME_TYPES, tzID, parsedTimeType, status);
909 }
910 if (U_FAILURE(status)) {
911 pos.setErrorIndex(startIdx);
912 return NULL;
913 }
914 }
915 // Pick up better match
916 if (startIdx + genMatchLen > parsedPos) {
917 // use generic name match
918 parsedPos = startIdx + genMatchLen;
919 if (timeType) {
920 *timeType = parsedTimeType;
921 }
922 pos.setIndex(parsedPos);
923 U_ASSERT(!tzID.isEmpty());
924 return TimeZone::createTimeZone(tzID);
925 } else if (spMatchIdx >= 0) {
926 // use specific name match
927 if (timeType) {
928 *timeType = getTimeType(spAllMatches->getNameTypeAt(spMatchIdx));
929 }
930 pos.setIndex(parsedPos);
931 getTimeZoneID(spAllMatches.getAlias(), spMatchIdx, tzID);
932 U_ASSERT(!tzID.isEmpty());
933 return TimeZone::createTimeZone(tzID);
934 }
935 }
936
937 if (parsedPos > startIdx) {
938 // Parsed successfully as one of 'offset' format
939 U_ASSERT(parsedOffset != UNKNOWN_OFFSET);
940 pos.setIndex(parsedPos);
941 return createTimeZoneForOffset(parsedOffset);
942 }
943
944 pos.setErrorIndex(startIdx);
945 return NULL;
946 }
947
948 void
parseObject(const UnicodeString & source,Formattable & result,ParsePosition & parse_pos) const949 TimeZoneFormat::parseObject(const UnicodeString& source, Formattable& result,
950 ParsePosition& parse_pos) const {
951 result.adoptObject(parse(UTZFMT_STYLE_GENERIC_LOCATION, source, parse_pos, UTZFMT_PARSE_OPTION_ALL_STYLES));
952 }
953
954
955 // ------------------------------------------------------------------
956 // Private zone name format/parse implementation
957
958 UnicodeString&
formatGeneric(const TimeZone & tz,int32_t genType,UDate date,UnicodeString & name) const959 TimeZoneFormat::formatGeneric(const TimeZone& tz, int32_t genType, UDate date, UnicodeString& name) const {
960 UErrorCode status = U_ZERO_ERROR;
961 const TimeZoneGenericNames* gnames = getTimeZoneGenericNames(status);
962 if (U_FAILURE(status)) {
963 name.setToBogus();
964 return name;
965 }
966
967 if (genType == UTZGNM_LOCATION) {
968 const UChar* canonicalID = ZoneMeta::getCanonicalCLDRID(tz);
969 if (canonicalID == NULL) {
970 name.setToBogus();
971 return name;
972 }
973 return gnames->getGenericLocationName(UnicodeString(canonicalID), name);
974 }
975 return gnames->getDisplayName(tz, (UTimeZoneGenericNameType)genType, date, name);
976 }
977
978 UnicodeString&
formatSpecific(const TimeZone & tz,UTimeZoneNameType stdType,UTimeZoneNameType dstType,UDate date,UnicodeString & name,UTimeZoneFormatTimeType * timeType) const979 TimeZoneFormat::formatSpecific(const TimeZone& tz, UTimeZoneNameType stdType, UTimeZoneNameType dstType,
980 UDate date, UnicodeString& name, UTimeZoneFormatTimeType *timeType) const {
981 if (fTimeZoneNames == NULL) {
982 name.setToBogus();
983 return name;
984 }
985
986 UErrorCode status = U_ZERO_ERROR;
987 UBool isDaylight = tz.inDaylightTime(date, status);
988 const UChar* canonicalID = ZoneMeta::getCanonicalCLDRID(tz);
989
990 if (U_FAILURE(status) || canonicalID == NULL) {
991 name.setToBogus();
992 return name;
993 }
994
995 if (isDaylight) {
996 fTimeZoneNames->getDisplayName(UnicodeString(canonicalID), dstType, date, name);
997 } else {
998 fTimeZoneNames->getDisplayName(UnicodeString(canonicalID), stdType, date, name);
999 }
1000
1001 if (timeType && !name.isEmpty()) {
1002 *timeType = isDaylight ? UTZFMT_TIME_TYPE_DAYLIGHT : UTZFMT_TIME_TYPE_STANDARD;
1003 }
1004 return name;
1005 }
1006
1007 static UMutex gLock = U_MUTEX_INITIALIZER;
1008
1009 const TimeZoneGenericNames*
getTimeZoneGenericNames(UErrorCode & status) const1010 TimeZoneFormat::getTimeZoneGenericNames(UErrorCode& status) const {
1011 if (U_FAILURE(status)) {
1012 return NULL;
1013 }
1014
1015 UBool create;
1016 UMTX_CHECK(&gZoneMetaLock, (fTimeZoneGenericNames == NULL), create);
1017 if (create) {
1018 TimeZoneFormat *nonConstThis = const_cast<TimeZoneFormat *>(this);
1019 umtx_lock(&gLock);
1020 {
1021 if (fTimeZoneGenericNames == NULL) {
1022 nonConstThis->fTimeZoneGenericNames = TimeZoneGenericNames::createInstance(fLocale, status);
1023 }
1024 }
1025 umtx_unlock(&gLock);
1026 }
1027
1028 return fTimeZoneGenericNames;
1029 }
1030
1031 // ------------------------------------------------------------------
1032 // Zone offset format and parse
1033
1034 UnicodeString&
formatOffsetRFC822(int32_t offset,UnicodeString & result,UErrorCode & status) const1035 TimeZoneFormat::formatOffsetRFC822(int32_t offset, UnicodeString& result, UErrorCode& status) const {
1036 if (U_FAILURE(status)) {
1037 result.setToBogus();
1038 return result;
1039 }
1040 if (offset <= -MAX_OFFSET || offset >= MAX_OFFSET) {
1041 result.setToBogus();
1042 status = U_ILLEGAL_ARGUMENT_ERROR;
1043 return result;
1044 }
1045
1046 // Note: FIELDS_HMS as maxFields is an ICU extension. RFC822 specification
1047 // defines exactly 4 digits for the offset field in HHss format.
1048 return formatOffsetWithAsciiDigits(offset, 0, FIELDS_HM, FIELDS_HMS, result);
1049 }
1050
1051 UnicodeString&
formatOffsetISO8601(int32_t offset,UnicodeString & result,UErrorCode & status) const1052 TimeZoneFormat::formatOffsetISO8601(int32_t offset, UnicodeString& result, UErrorCode& status) const {
1053 if (U_FAILURE(status)) {
1054 result.setToBogus();
1055 return result;
1056 }
1057 if (offset <= -MAX_OFFSET || offset >= MAX_OFFSET) {
1058 result.setToBogus();
1059 status = U_ILLEGAL_ARGUMENT_ERROR;
1060 return result;
1061 }
1062
1063 if (offset == 0) {
1064 result.setTo(ISO8601_UTC);
1065 return result;
1066 }
1067 return formatOffsetWithAsciiDigits(offset, ISO8601_SEP, FIELDS_HM, FIELDS_HMS, result);
1068 }
1069
1070 UnicodeString&
formatOffsetLocalizedGMT(int32_t offset,UnicodeString & result,UErrorCode & status) const1071 TimeZoneFormat::formatOffsetLocalizedGMT(int32_t offset, UnicodeString& result, UErrorCode& status) const {
1072 if (U_FAILURE(status)) {
1073 result.setToBogus();
1074 return result;
1075 }
1076 if (offset <= -MAX_OFFSET || offset >= MAX_OFFSET) {
1077 result.setToBogus();
1078 status = U_ILLEGAL_ARGUMENT_ERROR;
1079 return result;
1080 }
1081
1082 if (offset == 0) {
1083 result.setTo(fGMTZeroFormat);
1084 return result;
1085 }
1086
1087 UBool positive = TRUE;
1088 if (offset < 0) {
1089 offset = -offset;
1090 positive = FALSE;
1091 }
1092
1093 int32_t offsetH = offset / MILLIS_PER_HOUR;
1094 offset = offset % MILLIS_PER_HOUR;
1095 int32_t offsetM = offset / MILLIS_PER_MINUTE;
1096 offset = offset % MILLIS_PER_MINUTE;
1097 int32_t offsetS = offset / MILLIS_PER_SECOND;
1098
1099 U_ASSERT(offsetH <= MAX_OFFSET_HOUR && offsetM <= MAX_OFFSET_MINUTE && offsetS <= MAX_OFFSET_SECOND);
1100
1101 const UVector* offsetPatternItems = NULL;
1102 if (positive) {
1103 offsetPatternItems = (offsetS == 0) ?
1104 fGMTOffsetPatternItems[UTZFMT_PAT_POSITIVE_HM] :
1105 fGMTOffsetPatternItems[UTZFMT_PAT_POSITIVE_HMS];
1106 } else {
1107 offsetPatternItems = (offsetS == 0) ?
1108 fGMTOffsetPatternItems[UTZFMT_PAT_NEGATIVE_HM] :
1109 fGMTOffsetPatternItems[UTZFMT_PAT_NEGATIVE_HMS];
1110 }
1111
1112 U_ASSERT(offsetPatternItems != NULL);
1113
1114 // Building the GMT format string
1115 result.setTo(fGMTPatternPrefix);
1116
1117 for (int32_t i = 0; i < offsetPatternItems->size(); i++) {
1118 const GMTOffsetField* item = (GMTOffsetField*)offsetPatternItems->elementAt(i);
1119 GMTOffsetField::FieldType type = item->getType();
1120
1121 switch (type) {
1122 case GMTOffsetField::TEXT:
1123 result.append(item->getPatternText(), -1);
1124 break;
1125
1126 case GMTOffsetField::HOUR:
1127 appendOffsetDigits(result, offsetH, item->getWidth());
1128 break;
1129
1130 case GMTOffsetField::MINUTE:
1131 appendOffsetDigits(result, offsetM, item->getWidth());
1132 break;
1133
1134 case GMTOffsetField::SECOND:
1135 appendOffsetDigits(result, offsetS, item->getWidth());
1136 break;
1137 }
1138 }
1139
1140 result.append(fGMTPatternSuffix);
1141 return result;
1142 }
1143
1144 int32_t
parseOffsetRFC822(const UnicodeString & text,ParsePosition & pos) const1145 TimeZoneFormat::parseOffsetRFC822(const UnicodeString& text, ParsePosition& pos) const {
1146 int32_t start = pos.getIndex();
1147 if (start >= text.length()) {
1148 pos.setErrorIndex(start);
1149 return 0;
1150 }
1151
1152 int32_t sign = 1;
1153 UChar signChar = text.charAt(start);
1154 if (signChar == PLUS) {
1155 sign = 1;
1156 } else if (signChar == MINUS) {
1157 sign = -1;
1158 } else {
1159 // Not an RFC822 offset string
1160 pos.setErrorIndex(start);
1161 return 0;
1162 }
1163
1164 // Parse digits
1165 pos.setIndex(start + 1);
1166 int32_t offset = parseAbuttingAsciiOffsetFields(text, pos, FIELDS_H, FIELDS_HMS, false);
1167
1168 if (pos.getErrorIndex() != -1) {
1169 pos.setIndex(start); // reset
1170 pos.setErrorIndex(start);
1171 return 0;
1172 }
1173
1174 return sign * offset;
1175 }
1176
1177 int32_t
parseOffsetISO8601(const UnicodeString & text,ParsePosition & pos) const1178 TimeZoneFormat::parseOffsetISO8601(const UnicodeString& text, ParsePosition& pos) const {
1179 return parseOffsetISO8601(text, pos, FALSE);
1180 }
1181
1182 int32_t
parseOffsetLocalizedGMT(const UnicodeString & text,ParsePosition & pos) const1183 TimeZoneFormat::parseOffsetLocalizedGMT(const UnicodeString& text, ParsePosition& pos) const {
1184 return parseOffsetLocalizedGMT(text, pos, NULL);
1185 }
1186
1187
1188
1189 // ------------------------------------------------------------------
1190 // Private zone offset format/parse implementation
1191
1192 int32_t
parseOffsetISO8601(const UnicodeString & text,ParsePosition & pos,UBool extendedOnly,UBool * hasDigitOffset) const1193 TimeZoneFormat::parseOffsetISO8601(const UnicodeString& text, ParsePosition& pos, UBool extendedOnly, UBool* hasDigitOffset /* = NULL */) const {
1194 if (hasDigitOffset) {
1195 *hasDigitOffset = FALSE;
1196 }
1197 int32_t start = pos.getIndex();
1198 if (start >= text.length()) {
1199 pos.setErrorIndex(start);
1200 return 0;
1201 }
1202
1203 UChar firstChar = text.charAt(start);
1204 if (firstChar == ISO8601_UTC || firstChar == (UChar)(ISO8601_UTC + 0x20)) {
1205 // "Z" (or "z") - indicates UTC
1206 pos.setIndex(start + 1);
1207 return 0;
1208 }
1209
1210 int32_t sign = 1;
1211 if (firstChar == PLUS) {
1212 sign = 1;
1213 } else if (firstChar == MINUS) {
1214 sign = -1;
1215 } else {
1216 // Not an ISO 8601 offset string
1217 pos.setErrorIndex(start);
1218 return 0;
1219 }
1220 ParsePosition posOffset(start + 1);
1221 int32_t offset = parseAsciiOffsetFields(text, posOffset, ISO8601_SEP, FIELDS_H, FIELDS_HMS, FALSE);
1222 if (posOffset.getErrorIndex() == -1 && !extendedOnly && (posOffset.getIndex() - start <= 3)) {
1223 // If the text is successfully parsed as extended format with the options above, it can be also parsed
1224 // as basic format. For example, "0230" can be parsed as offset 2:00 (only first digits are valid for
1225 // extended format), but it can be parsed as offset 2:30 with basic format. We use longer result.
1226 ParsePosition posBasic(start + 1);
1227 int32_t tmpOffset = parseAbuttingAsciiOffsetFields(text, posBasic, FIELDS_H, FIELDS_HMS, FALSE);
1228 if (posBasic.getErrorIndex() == -1 && posBasic.getIndex() > posOffset.getIndex()) {
1229 offset = tmpOffset;
1230 posOffset.setIndex(posBasic.getIndex());
1231 }
1232 }
1233
1234 if (posOffset.getErrorIndex() != -1) {
1235 pos.setErrorIndex(start);
1236 return 0;
1237 }
1238
1239 pos.setIndex(posOffset.getIndex());
1240 if (hasDigitOffset) {
1241 *hasDigitOffset = TRUE;
1242 }
1243 return sign * offset;
1244 }
1245
1246 int32_t
parseOffsetLocalizedGMT(const UnicodeString & text,ParsePosition & pos,UBool * hasDigitOffset) const1247 TimeZoneFormat::parseOffsetLocalizedGMT(const UnicodeString& text, ParsePosition& pos, UBool* hasDigitOffset) const {
1248 int32_t start = pos.getIndex();
1249 int32_t idx = start;
1250 UBool parsed = FALSE;
1251 int32_t offset = 0;
1252
1253 if (hasDigitOffset) {
1254 *hasDigitOffset = FALSE;
1255 }
1256
1257 do {
1258 // Prefix part
1259 int32_t len = fGMTPatternPrefix.length();
1260 if (len > 0 && text.caseCompare(idx, len, fGMTPatternPrefix, 0) != 0) {
1261 // prefix match failed
1262 break;
1263 }
1264 idx += len;
1265
1266 // Offset part
1267 offset = parseOffsetFields(text, idx, FALSE, len);
1268 if (len == 0) {
1269 // offset field match failed
1270 break;
1271 }
1272 idx += len;
1273
1274 // Suffix part
1275 len = fGMTPatternSuffix.length();
1276 if (len > 0 && text.caseCompare(idx, len, fGMTPatternSuffix, 0) != 0) {
1277 // no suffix match
1278 break;
1279 }
1280 idx += len;
1281 parsed = TRUE;
1282
1283 } while (false);
1284
1285 if (parsed) {
1286 if (hasDigitOffset) {
1287 *hasDigitOffset = TRUE;
1288 }
1289 pos.setIndex(idx);
1290 return offset;
1291 }
1292
1293 // Try the default patterns
1294 int32_t parsedLength = 0;
1295 offset = parseOffsetDefaultLocalizedGMT(text, start, parsedLength);
1296 if (parsedLength > 0) {
1297 if (hasDigitOffset) {
1298 *hasDigitOffset = TRUE;
1299 }
1300 pos.setIndex(start + parsedLength);
1301 return offset;
1302 }
1303
1304 // Check if this is a GMT zero format
1305 if (text.caseCompare(start, fGMTZeroFormat.length(), fGMTZeroFormat, 0) == 0) {
1306 pos.setIndex(start + fGMTZeroFormat.length());
1307 return 0;
1308 }
1309
1310 // Check if this is a default GMT zero format
1311 for (int32_t i = 0; ALT_GMT_STRINGS[i][0] != 0; i++) {
1312 const UChar* defGMTZero = ALT_GMT_STRINGS[i];
1313 int32_t defGMTZeroLen = u_strlen(defGMTZero);
1314 if (text.caseCompare(start, defGMTZeroLen, defGMTZero, 0) == 0) {
1315 pos.setIndex(start + defGMTZeroLen);
1316 return 0;
1317 }
1318 }
1319
1320 // Nothing matched
1321 pos.setErrorIndex(start);
1322 return 0;
1323 }
1324
1325 int32_t
parseOffsetFields(const UnicodeString & text,int32_t start,UBool minimumHourWidth,int32_t & parsedLen) const1326 TimeZoneFormat::parseOffsetFields(const UnicodeString& text, int32_t start, UBool minimumHourWidth, int32_t& parsedLen) const {
1327 int32_t offset = 0;
1328 UBool sawVarHourAndAbuttingField = FALSE;
1329
1330 parsedLen = 0;
1331
1332 for (int32_t patidx = 0; PARSE_GMT_OFFSET_TYPES[patidx] >= 0; patidx++) {
1333 int32_t gmtPatType = PARSE_GMT_OFFSET_TYPES[patidx];
1334 int32_t offsetH = 0, offsetM = 0, offsetS = 0;
1335 int32_t idx = start;
1336 UVector* items = fGMTOffsetPatternItems[gmtPatType];
1337 U_ASSERT(items != NULL);
1338
1339 UBool failed = FALSE;
1340 for (int32_t i = 0; i < items->size(); i++) {
1341 int32_t tmpParsedLen = 0;
1342 const GMTOffsetField* field = (const GMTOffsetField*)items->elementAt(i);
1343 GMTOffsetField::FieldType fieldType = field->getType();
1344 if (fieldType == GMTOffsetField::TEXT) {
1345 const UChar* patStr = field->getPatternText();
1346 tmpParsedLen = u_strlen(patStr);
1347 if (text.caseCompare(idx, tmpParsedLen, patStr, 0) != 0) {
1348 failed = TRUE;
1349 break;
1350 }
1351 idx += tmpParsedLen;
1352 } else {
1353 if (fieldType == GMTOffsetField::HOUR) {
1354 uint8_t minDigits = 1;
1355 uint8_t maxDigits = minimumHourWidth ? 1 : 2;
1356 if (!minimumHourWidth && !sawVarHourAndAbuttingField) {
1357 if (i + 1 < items->size()) {
1358 const GMTOffsetField* nextField = (const GMTOffsetField*)items->elementAt(i + 1);
1359 if (nextField->getType() != GMTOffsetField::TEXT) {
1360 sawVarHourAndAbuttingField = true;
1361 }
1362 }
1363 }
1364 offsetH = parseOffsetFieldWithLocalizedDigits(text, idx, minDigits, maxDigits, 0, MAX_OFFSET_HOUR, tmpParsedLen);
1365 } else if (fieldType == GMTOffsetField::MINUTE) {
1366 offsetM = parseOffsetFieldWithLocalizedDigits(text, idx, 2, 2, 0, MAX_OFFSET_MINUTE, tmpParsedLen);
1367 } else if (fieldType == GMTOffsetField::SECOND) {
1368 offsetS = parseOffsetFieldWithLocalizedDigits(text, idx, 2, 2, 0, MAX_OFFSET_SECOND, tmpParsedLen);
1369 }
1370
1371 if (tmpParsedLen == 0) {
1372 failed = TRUE;
1373 break;
1374 }
1375 idx += tmpParsedLen;
1376 }
1377 }
1378 if (!failed) {
1379 int32_t sign = (gmtPatType == UTZFMT_PAT_POSITIVE_HM || gmtPatType == UTZFMT_PAT_POSITIVE_HMS) ? 1 : -1;
1380 offset = ((((offsetH * 60) + offsetM) * 60) + offsetS) * 1000 * sign;
1381 parsedLen = idx - start;
1382 break;
1383 }
1384 }
1385
1386 if (parsedLen == 0 && sawVarHourAndAbuttingField && !minimumHourWidth) {
1387 // When hour field is variable width and another non-literal pattern
1388 // field follows, the parse loop above might eat up the digit from
1389 // the abutting field. For example, with pattern "-Hmm" and input "-100",
1390 // the hour is parsed as -10 and fails to parse minute field.
1391 //
1392 // If this is the case, try parsing the text one more time with the arg
1393 // minimumHourWidth = true
1394 //
1395 // Note: This fallback is not applicable when quitAtHourField is true, because
1396 // the option is designed for supporting the case like "GMT+5". In this case,
1397 // we should get better result for parsing hour digits as much as possible.
1398
1399 return parseOffsetFields(text, start, true, parsedLen);
1400 }
1401
1402 return offset;
1403 }
1404
1405 int32_t
parseAbuttingOffsetFields(const UnicodeString & text,int32_t start,int32_t & parsedLen) const1406 TimeZoneFormat::parseAbuttingOffsetFields(const UnicodeString& text, int32_t start, int32_t& parsedLen) const {
1407 int32_t digits[MAX_OFFSET_DIGITS];
1408 int32_t parsed[MAX_OFFSET_DIGITS]; // accumulative offsets
1409
1410 // Parse digits into int[]
1411 int32_t idx = start;
1412 int32_t len = 0;
1413 int32_t numDigits = 0;
1414 for (int32_t i = 0; i < MAX_OFFSET_DIGITS; i++) {
1415 digits[i] = parseSingleLocalizedDigit(text, idx, len);
1416 if (digits[i] < 0) {
1417 break;
1418 }
1419 idx += len;
1420 parsed[i] = idx - start;
1421 numDigits++;
1422 }
1423
1424 if (numDigits == 0) {
1425 parsedLen = 0;
1426 return 0;
1427 }
1428
1429 int32_t offset = 0;
1430 while (numDigits > 0) {
1431 int32_t hour = 0;
1432 int32_t min = 0;
1433 int32_t sec = 0;
1434
1435 U_ASSERT(numDigits > 0 && numDigits <= MAX_OFFSET_DIGITS);
1436 switch (numDigits) {
1437 case 1: // H
1438 hour = digits[0];
1439 break;
1440 case 2: // HH
1441 hour = digits[0] * 10 + digits[1];
1442 break;
1443 case 3: // Hmm
1444 hour = digits[0];
1445 min = digits[1] * 10 + digits[2];
1446 break;
1447 case 4: // HHmm
1448 hour = digits[0] * 10 + digits[1];
1449 min = digits[2] * 10 + digits[3];
1450 break;
1451 case 5: // Hmmss
1452 hour = digits[0];
1453 min = digits[1] * 10 + digits[2];
1454 sec = digits[3] * 10 + digits[4];
1455 break;
1456 case 6: // HHmmss
1457 hour = digits[0] * 10 + digits[1];
1458 min = digits[2] * 10 + digits[3];
1459 sec = digits[4] * 10 + digits[5];
1460 break;
1461 }
1462 if (hour <= MAX_OFFSET_HOUR && min <= MAX_OFFSET_MINUTE && sec <= MAX_OFFSET_SECOND) {
1463 // found a valid combination
1464 offset = hour * MILLIS_PER_HOUR + min * MILLIS_PER_MINUTE + sec * MILLIS_PER_SECOND;
1465 parsedLen = parsed[numDigits - 1];
1466 break;
1467 }
1468 numDigits--;
1469 }
1470 return offset;
1471 }
1472
1473 int32_t
parseOffsetDefaultLocalizedGMT(const UnicodeString & text,int start,int32_t & parsedLen) const1474 TimeZoneFormat::parseOffsetDefaultLocalizedGMT(const UnicodeString& text, int start, int32_t& parsedLen) const {
1475 int32_t idx = start;
1476 int32_t offset = 0;
1477 int32_t parsed = 0;
1478
1479 do {
1480 // check global default GMT alternatives
1481 int32_t gmtLen = 0;
1482
1483 for (int32_t i = 0; ALT_GMT_STRINGS[i][0] != 0; i++) {
1484 const UChar* gmt = ALT_GMT_STRINGS[i];
1485 int32_t len = u_strlen(gmt);
1486 if (text.caseCompare(start, len, gmt, 0) == 0) {
1487 gmtLen = len;
1488 break;
1489 }
1490 }
1491 if (gmtLen == 0) {
1492 break;
1493 }
1494 idx += gmtLen;
1495
1496 // offset needs a sign char and a digit at minimum
1497 if (idx + 1 >= text.length()) {
1498 break;
1499 }
1500
1501 // parse sign
1502 int32_t sign = 1;
1503 UChar c = text.charAt(idx);
1504 if (c == PLUS) {
1505 sign = 1;
1506 } else if (c == MINUS) {
1507 sign = -1;
1508 } else {
1509 break;
1510 }
1511 idx++;
1512
1513 // offset part
1514 // try the default pattern with the separator first
1515 int32_t lenWithSep = 0;
1516 int32_t offsetWithSep = parseDefaultOffsetFields(text, idx, DEFAULT_GMT_OFFSET_SEP, lenWithSep);
1517 if (lenWithSep == text.length() - idx) {
1518 // maximum match
1519 offset = offsetWithSep * sign;
1520 idx += lenWithSep;
1521 } else {
1522 // try abutting field pattern
1523 int32_t lenAbut = 0;
1524 int32_t offsetAbut = parseAbuttingOffsetFields(text, idx, lenAbut);
1525
1526 if (lenWithSep > lenAbut) {
1527 offset = offsetWithSep * sign;
1528 idx += lenWithSep;
1529 } else {
1530 offset = offsetAbut * sign;
1531 idx += lenAbut;
1532 }
1533 }
1534 parsed = idx - start;
1535 } while (false);
1536
1537 parsedLen = parsed;
1538 return offset;
1539 }
1540
1541 int32_t
parseDefaultOffsetFields(const UnicodeString & text,int32_t start,UChar separator,int32_t & parsedLen) const1542 TimeZoneFormat::parseDefaultOffsetFields(const UnicodeString& text, int32_t start, UChar separator, int32_t& parsedLen) const {
1543 int32_t max = text.length();
1544 int32_t idx = start;
1545 int32_t len = 0;
1546 int32_t hour = 0, min = 0, sec = 0;
1547
1548 parsedLen = 0;
1549
1550 do {
1551 hour = parseOffsetFieldWithLocalizedDigits(text, idx, 1, 2, 0, MAX_OFFSET_HOUR, len);
1552 if (len == 0) {
1553 break;
1554 }
1555 idx += len;
1556
1557 if (idx + 1 < max && text.charAt(idx) == separator) {
1558 min = parseOffsetFieldWithLocalizedDigits(text, idx + 1, 2, 2, 0, MAX_OFFSET_MINUTE, len);
1559 if (len == 0) {
1560 break;
1561 }
1562 idx += (1 + len);
1563
1564 if (idx + 1 < max && text.charAt(idx) == separator) {
1565 sec = parseOffsetFieldWithLocalizedDigits(text, idx + 1, 2, 2, 0, MAX_OFFSET_SECOND, len);
1566 if (len == 0) {
1567 break;
1568 }
1569 idx += (1 + len);
1570 }
1571 }
1572 } while (FALSE);
1573
1574 if (idx == start) {
1575 return 0;
1576 }
1577
1578 parsedLen = idx - start;
1579 return hour * MILLIS_PER_HOUR + min * MILLIS_PER_MINUTE + sec * MILLIS_PER_SECOND;
1580 }
1581
1582 int32_t
parseOffsetFieldWithLocalizedDigits(const UnicodeString & text,int32_t start,uint8_t minDigits,uint8_t maxDigits,uint16_t minVal,uint16_t maxVal,int32_t & parsedLen) const1583 TimeZoneFormat::parseOffsetFieldWithLocalizedDigits(const UnicodeString& text, int32_t start, uint8_t minDigits, uint8_t maxDigits, uint16_t minVal, uint16_t maxVal, int32_t& parsedLen) const {
1584 parsedLen = 0;
1585
1586 int32_t decVal = 0;
1587 int32_t numDigits = 0;
1588 int32_t idx = start;
1589 int32_t digitLen = 0;
1590
1591 while (idx < text.length() && numDigits < maxDigits) {
1592 int32_t digit = parseSingleLocalizedDigit(text, idx, digitLen);
1593 if (digit < 0) {
1594 break;
1595 }
1596 int32_t tmpVal = decVal * 10 + digit;
1597 if (tmpVal > maxVal) {
1598 break;
1599 }
1600 decVal = tmpVal;
1601 numDigits++;
1602 idx += digitLen;
1603 }
1604
1605 // Note: maxVal is checked in the while loop
1606 if (numDigits < minDigits || decVal < minVal) {
1607 decVal = -1;
1608 numDigits = 0;
1609 } else {
1610 parsedLen = idx - start;
1611 }
1612
1613 return decVal;
1614 }
1615
1616 int32_t
parseSingleLocalizedDigit(const UnicodeString & text,int32_t start,int32_t & len) const1617 TimeZoneFormat::parseSingleLocalizedDigit(const UnicodeString& text, int32_t start, int32_t& len) const {
1618 int32_t digit = -1;
1619 len = 0;
1620 if (start < text.length()) {
1621 UChar32 cp = text.char32At(start);
1622
1623 // First, try digits configured for this instance
1624 for (int32_t i = 0; i < 10; i++) {
1625 if (cp == fGMTOffsetDigits[i]) {
1626 digit = i;
1627 break;
1628 }
1629 }
1630 // If failed, check if this is a Unicode digit
1631 if (digit < 0) {
1632 int32_t tmp = u_charDigitValue(cp);
1633 digit = (tmp >= 0 && tmp <= 9) ? tmp : -1;
1634 }
1635
1636 if (digit >= 0) {
1637 int32_t next = text.moveIndex32(start, 1);
1638 len = next - start;
1639 }
1640 }
1641 return digit;
1642 }
1643
1644 UnicodeString&
formatOffsetWithAsciiDigits(int32_t offset,UChar sep,OffsetFields minFields,OffsetFields maxFields,UnicodeString & result)1645 TimeZoneFormat::formatOffsetWithAsciiDigits(int32_t offset, UChar sep, OffsetFields minFields, OffsetFields maxFields, UnicodeString& result) {
1646 U_ASSERT(maxFields >= minFields);
1647 U_ASSERT(offset > -MAX_OFFSET && offset < MAX_OFFSET);
1648
1649 UChar sign = PLUS;
1650 if (offset < 0) {
1651 sign = MINUS;
1652 offset = -offset;
1653 }
1654 result.setTo(sign);
1655
1656 int fields[3];
1657 fields[0] = offset / MILLIS_PER_HOUR;
1658 offset = offset % MILLIS_PER_HOUR;
1659 fields[1] = offset / MILLIS_PER_MINUTE;
1660 offset = offset % MILLIS_PER_MINUTE;
1661 fields[2] = offset / MILLIS_PER_SECOND;
1662
1663 U_ASSERT(fields[0] >= 0 && fields[0] <= MAX_OFFSET_HOUR);
1664 U_ASSERT(fields[1] >= 0 && fields[1] <= MAX_OFFSET_MINUTE);
1665 U_ASSERT(fields[2] >= 0 && fields[2] <= MAX_OFFSET_SECOND);
1666
1667 int32_t lastIdx = maxFields;
1668 while (lastIdx > minFields) {
1669 if (fields[lastIdx] != 0) {
1670 break;
1671 }
1672 lastIdx--;
1673 }
1674
1675 for (int32_t idx = 0; idx <= lastIdx; idx++) {
1676 if (sep && idx != 0) {
1677 result.append(sep);
1678 }
1679 result.append((UChar)(0x0030 + fields[idx]/10));
1680 result.append((UChar)(0x0030 + fields[idx]%10));
1681 }
1682
1683 return result;
1684 }
1685
1686 int32_t
parseAbuttingAsciiOffsetFields(const UnicodeString & text,ParsePosition & pos,OffsetFields minFields,OffsetFields maxFields,UBool fixedHourWidth)1687 TimeZoneFormat::parseAbuttingAsciiOffsetFields(const UnicodeString& text, ParsePosition& pos, OffsetFields minFields, OffsetFields maxFields, UBool fixedHourWidth) {
1688 int32_t start = pos.getIndex();
1689
1690 int32_t minDigits = 2 * (minFields + 1) - (fixedHourWidth ? 0 : 1);
1691 int32_t maxDigits = 2 * (maxFields + 1);
1692
1693 U_ASSERT(maxDigits <= MAX_OFFSET_DIGITS);
1694
1695 int32_t digits[MAX_OFFSET_DIGITS] = {};
1696 int32_t numDigits = 0;
1697 int32_t idx = start;
1698 while (numDigits < maxDigits && idx < text.length()) {
1699 UChar uch = text.charAt(idx);
1700 int32_t digit = DIGIT_VAL(uch);
1701 if (digit < 0) {
1702 break;
1703 }
1704 digits[numDigits] = digit;
1705 numDigits++;
1706 idx++;
1707 }
1708
1709 if (fixedHourWidth && (numDigits & 1)) {
1710 // Fixed digits, so the number of digits must be even number. Truncating.
1711 numDigits--;
1712 }
1713
1714 if (numDigits < minDigits) {
1715 pos.setErrorIndex(start);
1716 return 0;
1717 }
1718
1719 int32_t hour = 0, min = 0, sec = 0;
1720 UBool bParsed = FALSE;
1721 while (numDigits >= minDigits) {
1722 switch (numDigits) {
1723 case 1: //H
1724 hour = digits[0];
1725 break;
1726 case 2: //HH
1727 hour = digits[0] * 10 + digits[1];
1728 break;
1729 case 3: //Hmm
1730 hour = digits[0];
1731 min = digits[1] * 10 + digits[2];
1732 break;
1733 case 4: //HHmm
1734 hour = digits[0] * 10 + digits[1];
1735 min = digits[2] * 10 + digits[3];
1736 break;
1737 case 5: //Hmmss
1738 hour = digits[0];
1739 min = digits[1] * 10 + digits[2];
1740 sec = digits[3] * 10 + digits[4];
1741 break;
1742 case 6: //HHmmss
1743 hour = digits[0] * 10 + digits[1];
1744 min = digits[2] * 10 + digits[3];
1745 sec = digits[4] * 10 + digits[5];
1746 break;
1747 }
1748
1749 if (hour <= MAX_OFFSET_HOUR && min <= MAX_OFFSET_MINUTE && sec <= MAX_OFFSET_SECOND) {
1750 // Successfully parsed
1751 bParsed = true;
1752 break;
1753 }
1754
1755 // Truncating
1756 numDigits -= (fixedHourWidth ? 2 : 1);
1757 hour = min = sec = 0;
1758 }
1759
1760 if (!bParsed) {
1761 pos.setErrorIndex(start);
1762 return 0;
1763 }
1764 pos.setIndex(start + numDigits);
1765 return ((((hour * 60) + min) * 60) + sec) * 1000;
1766 }
1767
1768 int32_t
parseAsciiOffsetFields(const UnicodeString & text,ParsePosition & pos,UChar sep,OffsetFields minFields,OffsetFields maxFields,UBool fixedHourWidth)1769 TimeZoneFormat::parseAsciiOffsetFields(const UnicodeString& text, ParsePosition& pos, UChar sep, OffsetFields minFields, OffsetFields maxFields, UBool fixedHourWidth) {
1770 int32_t start = pos.getIndex();
1771 int32_t fieldVal[] = {0, 0, 0};
1772 int32_t fieldLen[] = {0, -1, -1};
1773 for (int32_t idx = start, fieldIdx = 0; idx < text.length() && fieldIdx <= maxFields; idx++) {
1774 UChar c = text.charAt(idx);
1775 if (c == sep) {
1776 if (fieldLen[fieldIdx] < 0) {
1777 // next field - expected
1778 fieldLen[fieldIdx] = 0;
1779 } else if (fieldIdx == 0 && !fixedHourWidth) {
1780 // 1 digit hour, move to next field
1781 fieldIdx++;
1782 fieldLen[fieldIdx] = 0;
1783 } else {
1784 // otherwise, premature field
1785 break;
1786 }
1787 continue;
1788 }
1789 int32_t digit = DIGIT_VAL(c);
1790 if (digit < 0) {
1791 // not a digit
1792 break;
1793 }
1794 fieldVal[fieldIdx] = fieldVal[fieldIdx] * 10 + digit;
1795 fieldLen[fieldIdx]++;
1796 if (fieldLen[fieldIdx] >= 2) {
1797 // parsed 2 digits, move to next field
1798 fieldIdx++;
1799 }
1800 }
1801
1802 int32_t offset = 0;
1803 int32_t parsedLen = 0;
1804 int32_t parsedFields = -1;
1805 do {
1806 // hour
1807 if (fieldLen[0] == 0 || (fieldLen[0] == 1 && fixedHourWidth)) {
1808 break;
1809 }
1810 if (fieldVal[0] > MAX_OFFSET_HOUR) {
1811 if (fixedHourWidth) {
1812 break;
1813 }
1814 offset = (fieldVal[0] / 10) * MILLIS_PER_HOUR;
1815 parsedFields = FIELDS_H;
1816 parsedLen = 1;
1817 break;
1818 }
1819 offset = fieldVal[0] * MILLIS_PER_HOUR;
1820 parsedLen = fieldLen[0];
1821 parsedFields = FIELDS_H;
1822
1823 // minute
1824 if (fieldLen[1] != 2 || fieldVal[1] > MAX_OFFSET_MINUTE) {
1825 break;
1826 }
1827 offset += fieldVal[1] * MILLIS_PER_MINUTE;
1828 parsedLen += (1 + fieldLen[1]);
1829 parsedFields = FIELDS_HM;
1830
1831 // second
1832 if (fieldLen[2] != 2 || fieldVal[2] > MAX_OFFSET_SECOND) {
1833 break;
1834 }
1835 offset += fieldVal[2] * MILLIS_PER_SECOND;
1836 parsedLen += (1 + fieldLen[2]);
1837 parsedFields = FIELDS_HMS;
1838 } while (false);
1839
1840 if (parsedFields < minFields) {
1841 pos.setErrorIndex(start);
1842 return 0;
1843 }
1844
1845 pos.setIndex(start + parsedLen);
1846 return offset;
1847 }
1848
1849 void
appendOffsetDigits(UnicodeString & buf,int32_t n,uint8_t minDigits) const1850 TimeZoneFormat::appendOffsetDigits(UnicodeString& buf, int32_t n, uint8_t minDigits) const {
1851 U_ASSERT(n >= 0 && n < 60);
1852 int32_t numDigits = n >= 10 ? 2 : 1;
1853 for (int32_t i = 0; i < minDigits - numDigits; i++) {
1854 buf.append(fGMTOffsetDigits[0]);
1855 }
1856 if (numDigits == 2) {
1857 buf.append(fGMTOffsetDigits[n / 10]);
1858 }
1859 buf.append(fGMTOffsetDigits[n % 10]);
1860 }
1861
1862 // ------------------------------------------------------------------
1863 // Private misc
1864 void
initGMTPattern(const UnicodeString & gmtPattern,UErrorCode & status)1865 TimeZoneFormat::initGMTPattern(const UnicodeString& gmtPattern, UErrorCode& status) {
1866 if (U_FAILURE(status)) {
1867 return;
1868 }
1869 // This implementation not perfect, but sufficient practically.
1870 int32_t idx = gmtPattern.indexOf(ARG0, ARG0_LEN, 0);
1871 if (idx < 0) {
1872 status = U_ILLEGAL_ARGUMENT_ERROR;
1873 return;
1874 }
1875 fGMTPattern.setTo(gmtPattern);
1876 unquote(gmtPattern.tempSubString(0, idx), fGMTPatternPrefix);
1877 unquote(gmtPattern.tempSubString(idx + ARG0_LEN), fGMTPatternSuffix);
1878 }
1879
1880 UnicodeString&
unquote(const UnicodeString & pattern,UnicodeString & result)1881 TimeZoneFormat::unquote(const UnicodeString& pattern, UnicodeString& result) {
1882 if (pattern.indexOf(SINGLEQUOTE) < 0) {
1883 result.setTo(pattern);
1884 return result;
1885 }
1886 result.remove();
1887 UBool isPrevQuote = FALSE;
1888 UBool inQuote = FALSE;
1889 for (int32_t i = 0; i < pattern.length(); i++) {
1890 UChar c = pattern.charAt(i);
1891 if (c == SINGLEQUOTE) {
1892 if (isPrevQuote) {
1893 result.append(c);
1894 isPrevQuote = FALSE;
1895 } else {
1896 isPrevQuote = TRUE;
1897 }
1898 inQuote = !inQuote;
1899 } else {
1900 isPrevQuote = FALSE;
1901 result.append(c);
1902 }
1903 }
1904 return result;
1905 }
1906
1907 UVector*
parseOffsetPattern(const UnicodeString & pattern,OffsetFields required,UErrorCode & status)1908 TimeZoneFormat::parseOffsetPattern(const UnicodeString& pattern, OffsetFields required, UErrorCode& status) {
1909 if (U_FAILURE(status)) {
1910 return NULL;
1911 }
1912 UVector* result = new UVector(deleteGMTOffsetField, NULL, status);
1913 if (result == NULL) {
1914 status = U_MEMORY_ALLOCATION_ERROR;
1915 return NULL;
1916 }
1917
1918 int32_t checkBits = 0;
1919 UBool isPrevQuote = FALSE;
1920 UBool inQuote = FALSE;
1921 UnicodeString text;
1922 GMTOffsetField::FieldType itemType = GMTOffsetField::TEXT;
1923 int32_t itemLength = 1;
1924
1925 for (int32_t i = 0; i < pattern.length(); i++) {
1926 UChar ch = pattern.charAt(i);
1927 if (ch == SINGLEQUOTE) {
1928 if (isPrevQuote) {
1929 text.append(SINGLEQUOTE);
1930 isPrevQuote = FALSE;
1931 } else {
1932 isPrevQuote = TRUE;
1933 if (itemType != GMTOffsetField::TEXT) {
1934 if (GMTOffsetField::isValid(itemType, itemLength)) {
1935 GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, (uint8_t)itemLength, status);
1936 result->addElement(fld, status);
1937 if (U_FAILURE(status)) {
1938 break;
1939 }
1940 } else {
1941 status = U_ILLEGAL_ARGUMENT_ERROR;
1942 break;
1943 }
1944 itemType = GMTOffsetField::TEXT;
1945 }
1946 }
1947 inQuote = !inQuote;
1948 } else {
1949 isPrevQuote = FALSE;
1950 if (inQuote) {
1951 text.append(ch);
1952 } else {
1953 GMTOffsetField::FieldType tmpType = GMTOffsetField::getTypeByLetter(ch);
1954 if (tmpType != GMTOffsetField::TEXT) {
1955 // an offset time pattern character
1956 if (tmpType == itemType) {
1957 itemLength++;
1958 } else {
1959 if (itemType == GMTOffsetField::TEXT) {
1960 if (text.length() > 0) {
1961 GMTOffsetField* textfld = GMTOffsetField::createText(text, status);
1962 result->addElement(textfld, status);
1963 if (U_FAILURE(status)) {
1964 break;
1965 }
1966 text.remove();
1967 }
1968 } else {
1969 if (GMTOffsetField::isValid(itemType, itemLength)) {
1970 GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, itemLength, status);
1971 result->addElement(fld, status);
1972 if (U_FAILURE(status)) {
1973 break;
1974 }
1975 } else {
1976 status = U_ILLEGAL_ARGUMENT_ERROR;
1977 break;
1978 }
1979 }
1980 itemType = tmpType;
1981 itemLength = 1;
1982 checkBits |= tmpType;
1983 }
1984 } else {
1985 // a string literal
1986 if (itemType != GMTOffsetField::TEXT) {
1987 if (GMTOffsetField::isValid(itemType, itemLength)) {
1988 GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, itemLength, status);
1989 result->addElement(fld, status);
1990 if (U_FAILURE(status)) {
1991 break;
1992 }
1993 } else {
1994 status = U_ILLEGAL_ARGUMENT_ERROR;
1995 break;
1996 }
1997 itemType = GMTOffsetField::TEXT;
1998 }
1999 text.append(ch);
2000 }
2001 }
2002 }
2003 }
2004 // handle last item
2005 if (U_SUCCESS(status)) {
2006 if (itemType == GMTOffsetField::TEXT) {
2007 if (text.length() > 0) {
2008 GMTOffsetField* tfld = GMTOffsetField::createText(text, status);
2009 result->addElement(tfld, status);
2010 }
2011 } else {
2012 if (GMTOffsetField::isValid(itemType, itemLength)) {
2013 GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, itemLength, status);
2014 result->addElement(fld, status);
2015 } else {
2016 status = U_ILLEGAL_ARGUMENT_ERROR;
2017 }
2018 }
2019
2020 // Check all required fields are set
2021 if (U_SUCCESS(status)) {
2022 int32_t reqBits = 0;
2023 switch (required) {
2024 case FIELDS_H:
2025 reqBits = GMTOffsetField::HOUR;
2026 break;
2027 case FIELDS_HM:
2028 reqBits = GMTOffsetField::HOUR | GMTOffsetField::MINUTE;
2029 break;
2030 case FIELDS_HMS:
2031 reqBits = GMTOffsetField::HOUR | GMTOffsetField::MINUTE | GMTOffsetField::SECOND;
2032 break;
2033 }
2034 if (checkBits == reqBits) {
2035 // all required fields are set, no extra fields
2036 return result;
2037 }
2038 }
2039 }
2040
2041 // error
2042 delete result;
2043 return NULL;
2044 }
2045
2046 UnicodeString&
expandOffsetPattern(const UnicodeString & offsetHM,UnicodeString & result)2047 TimeZoneFormat::expandOffsetPattern(const UnicodeString& offsetHM, UnicodeString& result) {
2048 U_ASSERT(u_strlen(DEFAULT_GMT_OFFSET_MINUTE_PATTERN) == 2);
2049
2050 int32_t idx_mm = offsetHM.indexOf(DEFAULT_GMT_OFFSET_MINUTE_PATTERN, 2, 0);
2051 if (idx_mm < 0) {
2052 // we cannot do anything with this...
2053 result.setTo(offsetHM);
2054 result.append(DEFAULT_GMT_OFFSET_SEP);
2055 result.append(DEFAULT_GMT_OFFSET_SECOND_PATTERN, -1);
2056 return result;
2057 }
2058
2059 UnicodeString sep;
2060 int32_t idx_H = offsetHM.tempSubString(0, idx_mm).lastIndexOf((UChar)0x0048 /* H */);
2061 if (idx_H >= 0) {
2062 sep = offsetHM.tempSubString(idx_H + 1, idx_mm - (idx_H + 1));
2063 }
2064 result.setTo(offsetHM.tempSubString(0, idx_mm + 2));
2065 result.append(sep);
2066 result.append(DEFAULT_GMT_OFFSET_SECOND_PATTERN, -1);
2067 result.append(offsetHM.tempSubString(idx_mm + 2));
2068 return result;
2069 }
2070
2071 void
initGMTOffsetPatterns(UErrorCode & status)2072 TimeZoneFormat::initGMTOffsetPatterns(UErrorCode& status) {
2073 for (int32_t type = 0; type <= UTZFMT_PAT_NEGATIVE_HMS; type++) {
2074 switch (type) {
2075 case UTZFMT_PAT_POSITIVE_HM:
2076 case UTZFMT_PAT_NEGATIVE_HM:
2077 fGMTOffsetPatternItems[type] = parseOffsetPattern(fGMTOffsetPatterns[type], FIELDS_HM, status);
2078 break;
2079 case UTZFMT_PAT_POSITIVE_HMS:
2080 case UTZFMT_PAT_NEGATIVE_HMS:
2081 fGMTOffsetPatternItems[type] = parseOffsetPattern(fGMTOffsetPatterns[type], FIELDS_HMS, status);
2082 break;
2083 }
2084 }
2085 }
2086
2087 UBool
toCodePoints(const UnicodeString & str,UChar32 * codeArray,int32_t size)2088 TimeZoneFormat::toCodePoints(const UnicodeString& str, UChar32* codeArray, int32_t size) {
2089 int32_t count = str.countChar32();
2090 if (count != size) {
2091 return FALSE;
2092 }
2093
2094 for (int32_t idx = 0, start = 0; idx < size; idx++) {
2095 codeArray[idx] = str.char32At(start);
2096 start = str.moveIndex32(start, 1);
2097 }
2098
2099 return TRUE;
2100 }
2101
2102 TimeZone*
createTimeZoneForOffset(int32_t offset) const2103 TimeZoneFormat::createTimeZoneForOffset(int32_t offset) const {
2104 if (offset == 0) {
2105 // when offset is 0, we should use "Etc/GMT"
2106 return TimeZone::createTimeZone(UnicodeString(TZID_GMT));
2107 }
2108 return ZoneMeta::createCustomTimeZone(offset);
2109 }
2110
2111 UTimeZoneFormatTimeType
getTimeType(UTimeZoneNameType nameType)2112 TimeZoneFormat::getTimeType(UTimeZoneNameType nameType) {
2113 switch (nameType) {
2114 case UTZNM_LONG_STANDARD:
2115 case UTZNM_SHORT_STANDARD:
2116 return UTZFMT_TIME_TYPE_STANDARD;
2117
2118 case UTZNM_LONG_DAYLIGHT:
2119 case UTZNM_SHORT_DAYLIGHT:
2120 return UTZFMT_TIME_TYPE_DAYLIGHT;
2121
2122 default:
2123 U_ASSERT(FALSE);
2124 }
2125 return UTZFMT_TIME_TYPE_UNKNOWN;
2126 }
2127
2128 UnicodeString&
getTimeZoneID(const TimeZoneNames::MatchInfoCollection * matches,int32_t idx,UnicodeString & tzID) const2129 TimeZoneFormat::getTimeZoneID(const TimeZoneNames::MatchInfoCollection* matches, int32_t idx, UnicodeString& tzID) const {
2130 if (!matches->getTimeZoneIDAt(idx, tzID)) {
2131 UnicodeString mzID;
2132 if (matches->getMetaZoneIDAt(idx, mzID)) {
2133 fTimeZoneNames->getReferenceZoneID(mzID, fTargetRegion, tzID);
2134 }
2135 }
2136 return tzID;
2137 }
2138
2139 U_NAMESPACE_END
2140
2141 #endif
2142