• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2011-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9 
10 #include "unicode/utypes.h"
11 
12 #if !UCONFIG_NO_FORMATTING
13 
14 #include "unicode/calendar.h"
15 #include "unicode/tzfmt.h"
16 #include "unicode/numsys.h"
17 #include "unicode/strenum.h"
18 #include "unicode/uchar.h"
19 #include "unicode/udat.h"
20 #include "unicode/ustring.h"
21 #include "unicode/utf16.h"
22 #include "charstr.h"
23 #include "tzgnames.h"
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "putilimp.h"
27 #include "uassert.h"
28 #include "ucln_in.h"
29 #include "ulocimp.h"
30 #include "umutex.h"
31 #include "uresimp.h"
32 #include "ureslocs.h"
33 #include "uvector.h"
34 #include "zonemeta.h"
35 #include "tznames_impl.h"   // TextTrieMap
36 #include "patternprops.h"
37 
38 U_NAMESPACE_BEGIN
39 
40 // Bit flags used by the parse method.
41 // The order must match UTimeZoneFormatStyle enum.
42 #define ISO_Z_STYLE_FLAG 0x0080
43 #define ISO_LOCAL_STYLE_FLAG 0x0100
44 static const int16_t STYLE_PARSE_FLAGS[] = {
45     0x0001, // UTZFMT_STYLE_GENERIC_LOCATION,
46     0x0002, // UTZFMT_STYLE_GENERIC_LONG,
47     0x0004, // UTZFMT_STYLE_GENERIC_SHORT,
48     0x0008, // UTZFMT_STYLE_SPECIFIC_LONG,
49     0x0010, // UTZFMT_STYLE_SPECIFIC_SHORT,
50     0x0020, // UTZFMT_STYLE_LOCALIZED_GMT,
51     0x0040, // UTZFMT_STYLE_LOCALIZED_GMT_SHORT,
52     ISO_Z_STYLE_FLAG,       // UTZFMT_STYLE_ISO_BASIC_SHORT,
53     ISO_LOCAL_STYLE_FLAG,   // UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT,
54     ISO_Z_STYLE_FLAG,       // UTZFMT_STYLE_ISO_BASIC_FIXED,
55     ISO_LOCAL_STYLE_FLAG,   // UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED,
56     ISO_Z_STYLE_FLAG,       // UTZFMT_STYLE_ISO_BASIC_FULL,
57     ISO_LOCAL_STYLE_FLAG,   // UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL,
58     ISO_Z_STYLE_FLAG,       // UTZFMT_STYLE_ISO_EXTENDED_FIXED,
59     ISO_LOCAL_STYLE_FLAG,   // UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED,
60     ISO_Z_STYLE_FLAG,       // UTZFMT_STYLE_ISO_EXTENDED_FULL,
61     ISO_LOCAL_STYLE_FLAG,   // UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL,
62     0x0200, // UTZFMT_STYLE_ZONE_ID,
63     0x0400, // UTZFMT_STYLE_ZONE_ID_SHORT,
64     0x0800  // UTZFMT_STYLE_EXEMPLAR_LOCATION
65 };
66 
67 static const char gZoneStringsTag[] = "zoneStrings";
68 static const char gGmtFormatTag[]= "gmtFormat";
69 static const char gGmtZeroFormatTag[] = "gmtZeroFormat";
70 static const char gHourFormatTag[]= "hourFormat";
71 
72 static const char16_t TZID_GMT[] = {0x0045, 0x0074, 0x0063, 0x002F, 0x0047, 0x004D, 0x0054, 0};    // Etc/GMT
73 static const char16_t UNKNOWN_ZONE_ID[] = {
74     0x0045, 0x0074, 0x0063, 0x002F, 0x0055, 0x006E, 0x006B, 0x006E, 0x006F, 0x0077, 0x006E, 0}; // Etc/Unknown
75 static const char16_t UNKNOWN_SHORT_ZONE_ID[] = {0x0075, 0x006E, 0x006B, 0};   // unk
76 static const char16_t UNKNOWN_LOCATION[] = {0x0055, 0x006E, 0x006B, 0x006E, 0x006F, 0x0077, 0x006E, 0};    // Unknown
77 
78 static const char16_t DEFAULT_GMT_PATTERN[] = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0}; // GMT{0}
79 //static const char16_t DEFAULT_GMT_ZERO[] = {0x0047, 0x004D, 0x0054, 0}; // GMT
80 static const char16_t DEFAULT_GMT_POSITIVE_HM[] = {0x002B, 0x0048, 0x003A, 0x006D, 0x006D, 0}; // +H:mm
81 static const char16_t DEFAULT_GMT_POSITIVE_HMS[] = {0x002B, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0}; // +H:mm:ss
82 static const char16_t DEFAULT_GMT_NEGATIVE_HM[] = {0x002D, 0x0048, 0x003A, 0x006D, 0x006D, 0}; // -H:mm
83 static const char16_t DEFAULT_GMT_NEGATIVE_HMS[] = {0x002D, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0}; // -H:mm:ss
84 static const char16_t DEFAULT_GMT_POSITIVE_H[] = {0x002B, 0x0048, 0}; // +H
85 static const char16_t DEFAULT_GMT_NEGATIVE_H[] = {0x002D, 0x0048, 0}; // -H
86 
87 static const UChar32 DEFAULT_GMT_DIGITS[] = {
88     0x0030, 0x0031, 0x0032, 0x0033, 0x0034,
89     0x0035, 0x0036, 0x0037, 0x0038, 0x0039
90 };
91 
92 static const char16_t DEFAULT_GMT_OFFSET_SEP = 0x003A; // ':'
93 
94 static const char16_t ARG0[] = {0x007B, 0x0030, 0x007D};   // "{0}"
95 static const int32_t ARG0_LEN = 3;
96 
97 static const char16_t DEFAULT_GMT_OFFSET_MINUTE_PATTERN[] = {0x006D, 0x006D, 0};   // "mm"
98 static const char16_t DEFAULT_GMT_OFFSET_SECOND_PATTERN[] = {0x0073, 0x0073, 0};   // "ss"
99 
100 static const char16_t ALT_GMT_STRINGS[][4] = {
101     {0x0047, 0x004D, 0x0054, 0},    // GMT
102     {0x0055, 0x0054, 0x0043, 0},    // UTC
103     {0x0055, 0x0054, 0, 0},         // UT
104     {0, 0, 0, 0}
105 };
106 
107 // Order of GMT offset pattern parsing, *_HMS must be evaluated first
108 // because *_HM is most likely a substring of *_HMS
109 static const int32_t PARSE_GMT_OFFSET_TYPES[] = {
110     UTZFMT_PAT_POSITIVE_HMS,
111     UTZFMT_PAT_NEGATIVE_HMS,
112     UTZFMT_PAT_POSITIVE_HM,
113     UTZFMT_PAT_NEGATIVE_HM,
114     UTZFMT_PAT_POSITIVE_H,
115     UTZFMT_PAT_NEGATIVE_H,
116     -1
117 };
118 
119 static const char16_t SINGLEQUOTE  = 0x0027;
120 static const char16_t PLUS         = 0x002B;
121 static const char16_t MINUS        = 0x002D;
122 static const char16_t ISO8601_UTC  = 0x005A;   // 'Z'
123 static const char16_t ISO8601_SEP  = 0x003A;   // ':'
124 
125 static const int32_t MILLIS_PER_HOUR = 60 * 60 * 1000;
126 static const int32_t MILLIS_PER_MINUTE = 60 * 1000;
127 static const int32_t MILLIS_PER_SECOND = 1000;
128 
129 // Maximum offset (exclusive) in millisecond supported by offset formats
130 static int32_t MAX_OFFSET = 24 * MILLIS_PER_HOUR;
131 
132 // Maximum values for GMT offset fields
133 static const int32_t MAX_OFFSET_HOUR = 23;
134 static const int32_t MAX_OFFSET_MINUTE = 59;
135 static const int32_t MAX_OFFSET_SECOND = 59;
136 
137 static const int32_t UNKNOWN_OFFSET = 0x7FFFFFFF;
138 
139 static const int32_t ALL_SIMPLE_NAME_TYPES = UTZNM_LONG_STANDARD | UTZNM_LONG_DAYLIGHT | UTZNM_SHORT_STANDARD | UTZNM_SHORT_DAYLIGHT | UTZNM_EXEMPLAR_LOCATION;
140 static const int32_t ALL_GENERIC_NAME_TYPES = UTZGNM_LOCATION | UTZGNM_LONG | UTZGNM_SHORT;
141 
142 #define DIGIT_VAL(c) (0x0030 <= (c) && (c) <= 0x0039 ? (c) - 0x0030 : -1)
143 #define MAX_OFFSET_DIGITS 6
144 
145 // Time Zone ID/Short ID trie
146 static TextTrieMap *gZoneIdTrie = nullptr;
147 static icu::UInitOnce gZoneIdTrieInitOnce {};
148 
149 static TextTrieMap *gShortZoneIdTrie = nullptr;
150 static icu::UInitOnce gShortZoneIdTrieInitOnce {};
151 
152 static UMutex gLock;
153 
154 U_CDECL_BEGIN
155 /**
156  * Cleanup callback func
157  */
tzfmt_cleanup()158 static UBool U_CALLCONV tzfmt_cleanup()
159 {
160     delete gZoneIdTrie;
161     gZoneIdTrie = nullptr;
162     gZoneIdTrieInitOnce.reset();
163 
164     delete gShortZoneIdTrie;
165     gShortZoneIdTrie = nullptr;
166     gShortZoneIdTrieInitOnce.reset();
167 
168     return true;
169 }
170 U_CDECL_END
171 
172 // ------------------------------------------------------------------
173 // GMTOffsetField
174 //
175 // This class represents a localized GMT offset pattern
176 // item and used by TimeZoneFormat
177 // ------------------------------------------------------------------
178 class GMTOffsetField : public UMemory {
179 public:
180     enum FieldType {
181         TEXT = 0,
182         HOUR = 1,
183         MINUTE = 2,
184         SECOND = 4
185     };
186 
187     virtual ~GMTOffsetField();
188 
189     static GMTOffsetField* createText(const UnicodeString& text, UErrorCode& status);
190     static GMTOffsetField* createTimeField(FieldType type, uint8_t width, UErrorCode& status);
191     static UBool isValid(FieldType type, int32_t width);
192     static FieldType getTypeByLetter(char16_t ch);
193 
194     FieldType getType() const;
195     uint8_t getWidth() const;
196     const char16_t* getPatternText() const;
197 
198 private:
199     char16_t* fText;
200     FieldType fType;
201     uint8_t fWidth;
202 
203     GMTOffsetField();
204 };
205 
GMTOffsetField()206 GMTOffsetField::GMTOffsetField()
207 : fText(nullptr), fType(TEXT), fWidth(0) {
208 }
209 
~GMTOffsetField()210 GMTOffsetField::~GMTOffsetField() {
211     if (fText) {
212         uprv_free(fText);
213     }
214 }
215 
216 GMTOffsetField*
createText(const UnicodeString & text,UErrorCode & status)217 GMTOffsetField::createText(const UnicodeString& text, UErrorCode& status) {
218     if (U_FAILURE(status)) {
219         return nullptr;
220     }
221     GMTOffsetField* result = new GMTOffsetField();
222     if (result == nullptr) {
223         status = U_MEMORY_ALLOCATION_ERROR;
224         return nullptr;
225     }
226 
227     int32_t len = text.length();
228     result->fText = (char16_t*)uprv_malloc((len + 1) * sizeof(char16_t));
229     if (result->fText == nullptr) {
230         status = U_MEMORY_ALLOCATION_ERROR;
231         delete result;
232         return nullptr;
233     }
234     u_strncpy(result->fText, text.getBuffer(), len);
235     result->fText[len] = 0;
236     result->fType = TEXT;
237 
238     return result;
239 }
240 
241 GMTOffsetField*
createTimeField(FieldType type,uint8_t width,UErrorCode & status)242 GMTOffsetField::createTimeField(FieldType type, uint8_t width, UErrorCode& status) {
243     U_ASSERT(type != TEXT);
244     if (U_FAILURE(status)) {
245         return nullptr;
246     }
247     GMTOffsetField* result = new GMTOffsetField();
248     if (result == nullptr) {
249         status = U_MEMORY_ALLOCATION_ERROR;
250         return nullptr;
251     }
252 
253     result->fType = type;
254     result->fWidth = width;
255 
256     return result;
257 }
258 
259 UBool
isValid(FieldType type,int32_t width)260 GMTOffsetField::isValid(FieldType type, int32_t width) {
261     switch (type) {
262     case HOUR:
263         return (width == 1 || width == 2);
264     case MINUTE:
265     case SECOND:
266         return (width == 2);
267     default:
268         UPRV_UNREACHABLE_EXIT;
269     }
270     return (width > 0);
271 }
272 
273 GMTOffsetField::FieldType
getTypeByLetter(char16_t ch)274 GMTOffsetField::getTypeByLetter(char16_t ch) {
275     if (ch == 0x0048 /* H */) {
276         return HOUR;
277     } else if (ch == 0x006D /* m */) {
278         return MINUTE;
279     } else if (ch == 0x0073 /* s */) {
280         return SECOND;
281     }
282     return TEXT;
283 }
284 
285 inline GMTOffsetField::FieldType
getType() const286 GMTOffsetField::getType() const {
287      return fType;
288  }
289 
290 inline uint8_t
getWidth() const291 GMTOffsetField::getWidth() const {
292     return fWidth;
293 }
294 
295 inline const char16_t*
getPatternText() const296 GMTOffsetField::getPatternText() const {
297     return fText;
298 }
299 
300 
301 U_CDECL_BEGIN
302 static void U_CALLCONV
deleteGMTOffsetField(void * obj)303 deleteGMTOffsetField(void *obj) {
304     delete static_cast<GMTOffsetField *>(obj);
305 }
306 U_CDECL_END
307 
308 
309 // ------------------------------------------------------------------
310 // TimeZoneFormat
311 // ------------------------------------------------------------------
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TimeZoneFormat)312 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TimeZoneFormat)
313 
314 TimeZoneFormat::TimeZoneFormat(const Locale& locale, UErrorCode& status)
315 : fLocale(locale), fTimeZoneNames(nullptr), fTimeZoneGenericNames(nullptr),
316   fDefParseOptionFlags(0), fTZDBTimeZoneNames(nullptr) {
317 
318     for (int32_t i = 0; i < UTZFMT_PAT_COUNT; i++) {
319         fGMTOffsetPatternItems[i] = nullptr;
320     }
321 
322     const char* region = fLocale.getCountry();
323     int32_t regionLen = static_cast<int32_t>(uprv_strlen(region));
324     if (regionLen == 0) {
325         UErrorCode tempStatus = U_ZERO_ERROR;
326         CharString loc = ulocimp_addLikelySubtags(fLocale.getName(), tempStatus);
327 
328         regionLen = uloc_getCountry(loc.data(), fTargetRegion, sizeof(fTargetRegion), &tempStatus);
329         if (U_SUCCESS(tempStatus)) {
330             fTargetRegion[regionLen] = 0;
331         } else {
332             fTargetRegion[0] = 0;
333         }
334     } else if (regionLen < (int32_t)sizeof(fTargetRegion)) {
335         uprv_strcpy(fTargetRegion, region);
336     } else {
337         fTargetRegion[0] = 0;
338     }
339 
340     fTimeZoneNames = TimeZoneNames::createInstance(locale, status);
341     // fTimeZoneGenericNames is lazily instantiated
342     if (U_FAILURE(status)) {
343         return;
344     }
345 
346     const char16_t* gmtPattern = nullptr;
347     const char16_t* hourFormats = nullptr;
348 
349     UResourceBundle *zoneBundle = ures_open(U_ICUDATA_ZONE, locale.getName(), &status);
350     UResourceBundle *zoneStringsArray = ures_getByKeyWithFallback(zoneBundle, gZoneStringsTag, nullptr, &status);
351     if (U_SUCCESS(status)) {
352         const char16_t* resStr;
353         int32_t len;
354         resStr = ures_getStringByKeyWithFallback(zoneStringsArray, gGmtFormatTag, &len, &status);
355         if (len > 0) {
356             gmtPattern = resStr;
357         }
358         resStr = ures_getStringByKeyWithFallback(zoneStringsArray, gGmtZeroFormatTag, &len, &status);
359         if (len > 0) {
360             fGMTZeroFormat.setTo(true, resStr, len);
361         }
362         resStr = ures_getStringByKeyWithFallback(zoneStringsArray, gHourFormatTag, &len, &status);
363         if (len > 0) {
364             hourFormats = resStr;
365         }
366         ures_close(zoneStringsArray);
367         ures_close(zoneBundle);
368     }
369 
370     if (gmtPattern == nullptr) {
371         gmtPattern = DEFAULT_GMT_PATTERN;
372     }
373     initGMTPattern(UnicodeString(true, gmtPattern, -1), status);
374 
375     UBool useDefaultOffsetPatterns = true;
376     if (hourFormats) {
377         char16_t *sep = u_strchr(hourFormats, (char16_t)0x003B /* ';' */);
378         if (sep != nullptr) {
379             UErrorCode tmpStatus = U_ZERO_ERROR;
380             fGMTOffsetPatterns[UTZFMT_PAT_POSITIVE_HM].setTo(false, hourFormats, (int32_t)(sep - hourFormats));
381             fGMTOffsetPatterns[UTZFMT_PAT_NEGATIVE_HM].setTo(true, sep + 1, -1);
382             expandOffsetPattern(fGMTOffsetPatterns[UTZFMT_PAT_POSITIVE_HM], fGMTOffsetPatterns[UTZFMT_PAT_POSITIVE_HMS], tmpStatus);
383             expandOffsetPattern(fGMTOffsetPatterns[UTZFMT_PAT_NEGATIVE_HM], fGMTOffsetPatterns[UTZFMT_PAT_NEGATIVE_HMS], tmpStatus);
384             truncateOffsetPattern(fGMTOffsetPatterns[UTZFMT_PAT_POSITIVE_HM], fGMTOffsetPatterns[UTZFMT_PAT_POSITIVE_H], tmpStatus);
385             truncateOffsetPattern(fGMTOffsetPatterns[UTZFMT_PAT_NEGATIVE_HM], fGMTOffsetPatterns[UTZFMT_PAT_NEGATIVE_H], tmpStatus);
386             if (U_SUCCESS(tmpStatus)) {
387                 useDefaultOffsetPatterns = false;
388             }
389         }
390     }
391     if (useDefaultOffsetPatterns) {
392         fGMTOffsetPatterns[UTZFMT_PAT_POSITIVE_H].setTo(true, DEFAULT_GMT_POSITIVE_H, -1);
393         fGMTOffsetPatterns[UTZFMT_PAT_POSITIVE_HM].setTo(true, DEFAULT_GMT_POSITIVE_HM, -1);
394         fGMTOffsetPatterns[UTZFMT_PAT_POSITIVE_HMS].setTo(true, DEFAULT_GMT_POSITIVE_HMS, -1);
395         fGMTOffsetPatterns[UTZFMT_PAT_NEGATIVE_H].setTo(true, DEFAULT_GMT_NEGATIVE_H, -1);
396         fGMTOffsetPatterns[UTZFMT_PAT_NEGATIVE_HM].setTo(true, DEFAULT_GMT_NEGATIVE_HM, -1);
397         fGMTOffsetPatterns[UTZFMT_PAT_NEGATIVE_HMS].setTo(true, DEFAULT_GMT_NEGATIVE_HMS, -1);
398     }
399     initGMTOffsetPatterns(status);
400 
401     NumberingSystem* ns = NumberingSystem::createInstance(locale, status);
402     UBool useDefDigits = true;
403     if (ns && !ns->isAlgorithmic()) {
404         UnicodeString digits = ns->getDescription();
405         useDefDigits = !toCodePoints(digits, fGMTOffsetDigits, 10);
406     }
407     if (useDefDigits) {
408         uprv_memcpy(fGMTOffsetDigits, DEFAULT_GMT_DIGITS, sizeof(UChar32) * 10);
409     }
410     delete ns;
411 }
412 
TimeZoneFormat(const TimeZoneFormat & other)413 TimeZoneFormat::TimeZoneFormat(const TimeZoneFormat& other)
414 : Format(other), fTimeZoneNames(nullptr), fTimeZoneGenericNames(nullptr),
415   fTZDBTimeZoneNames(nullptr) {
416 
417     for (int32_t i = 0; i < UTZFMT_PAT_COUNT; i++) {
418         fGMTOffsetPatternItems[i] = nullptr;
419     }
420     *this = other;
421 }
422 
423 
~TimeZoneFormat()424 TimeZoneFormat::~TimeZoneFormat() {
425     delete fTimeZoneNames;
426     delete fTimeZoneGenericNames;
427     delete fTZDBTimeZoneNames;
428     for (int32_t i = 0; i < UTZFMT_PAT_COUNT; i++) {
429         delete fGMTOffsetPatternItems[i];
430     }
431 }
432 
433 TimeZoneFormat&
operator =(const TimeZoneFormat & other)434 TimeZoneFormat::operator=(const TimeZoneFormat& other) {
435     if (this == &other) {
436         return *this;
437     }
438 
439     delete fTimeZoneNames;
440     delete fTimeZoneGenericNames;
441     fTimeZoneGenericNames = nullptr;
442     delete fTZDBTimeZoneNames;
443     fTZDBTimeZoneNames = nullptr;
444 
445     fLocale = other.fLocale;
446     uprv_memcpy(fTargetRegion, other.fTargetRegion, sizeof(fTargetRegion));
447 
448     fTimeZoneNames = other.fTimeZoneNames->clone();
449     if (other.fTimeZoneGenericNames) {
450         // TODO: this test has dubious thread safety.
451         fTimeZoneGenericNames = other.fTimeZoneGenericNames->clone();
452     }
453 
454     fGMTPattern = other.fGMTPattern;
455     fGMTPatternPrefix = other.fGMTPatternPrefix;
456     fGMTPatternSuffix = other.fGMTPatternSuffix;
457 
458     UErrorCode status = U_ZERO_ERROR;
459     for (int32_t i = 0; i < UTZFMT_PAT_COUNT; i++) {
460         fGMTOffsetPatterns[i] = other.fGMTOffsetPatterns[i];
461         delete fGMTOffsetPatternItems[i];
462         fGMTOffsetPatternItems[i] = nullptr;
463     }
464     initGMTOffsetPatterns(status);
465     U_ASSERT(U_SUCCESS(status));
466 
467     fGMTZeroFormat = other.fGMTZeroFormat;
468 
469     uprv_memcpy(fGMTOffsetDigits, other.fGMTOffsetDigits, sizeof(fGMTOffsetDigits));
470 
471     fDefParseOptionFlags = other.fDefParseOptionFlags;
472 
473     return *this;
474 }
475 
476 
477 bool
operator ==(const Format & other) const478 TimeZoneFormat::operator==(const Format& other) const {
479     TimeZoneFormat* tzfmt = (TimeZoneFormat*)&other;
480 
481     bool isEqual =
482             fLocale == tzfmt->fLocale
483             && fGMTPattern == tzfmt->fGMTPattern
484             && fGMTZeroFormat == tzfmt->fGMTZeroFormat
485             && *fTimeZoneNames == *tzfmt->fTimeZoneNames;
486 
487     for (int32_t i = 0; i < UTZFMT_PAT_COUNT && isEqual; i++) {
488         isEqual = fGMTOffsetPatterns[i] == tzfmt->fGMTOffsetPatterns[i];
489     }
490     for (int32_t i = 0; i < 10 && isEqual; i++) {
491         isEqual = fGMTOffsetDigits[i] == tzfmt->fGMTOffsetDigits[i];
492     }
493     // TODO
494     // Check fTimeZoneGenericNames. For now,
495     // if fTimeZoneNames is same, fTimeZoneGenericNames should
496     // be also equivalent.
497     return isEqual;
498 }
499 
500 TimeZoneFormat*
clone() const501 TimeZoneFormat::clone() const {
502     return new TimeZoneFormat(*this);
503 }
504 
505 TimeZoneFormat* U_EXPORT2
createInstance(const Locale & locale,UErrorCode & status)506 TimeZoneFormat::createInstance(const Locale& locale, UErrorCode& status) {
507     TimeZoneFormat* tzfmt = new TimeZoneFormat(locale, status);
508     if (U_SUCCESS(status)) {
509         return tzfmt;
510     }
511     delete tzfmt;
512     return nullptr;
513 }
514 
515 // ------------------------------------------------------------------
516 // Setter and Getter
517 
518 const TimeZoneNames*
getTimeZoneNames() const519 TimeZoneFormat::getTimeZoneNames() const {
520     return (const TimeZoneNames*)fTimeZoneNames;
521 }
522 
523 void
adoptTimeZoneNames(TimeZoneNames * tznames)524 TimeZoneFormat::adoptTimeZoneNames(TimeZoneNames *tznames) {
525     delete fTimeZoneNames;
526     fTimeZoneNames = tznames;
527 
528     // TODO - We should also update fTimeZoneGenericNames
529 }
530 
531 void
setTimeZoneNames(const TimeZoneNames & tznames)532 TimeZoneFormat::setTimeZoneNames(const TimeZoneNames &tznames) {
533     delete fTimeZoneNames;
534     fTimeZoneNames = tznames.clone();
535 
536     // TODO - We should also update fTimeZoneGenericNames
537 }
538 
539 void
setDefaultParseOptions(uint32_t flags)540 TimeZoneFormat::setDefaultParseOptions(uint32_t flags) {
541     fDefParseOptionFlags = flags;
542 }
543 
544 uint32_t
getDefaultParseOptions() const545 TimeZoneFormat::getDefaultParseOptions() const {
546     return fDefParseOptionFlags;
547 }
548 
549 
550 UnicodeString&
getGMTPattern(UnicodeString & pattern) const551 TimeZoneFormat::getGMTPattern(UnicodeString& pattern) const {
552     return pattern.setTo(fGMTPattern);
553 }
554 
555 void
setGMTPattern(const UnicodeString & pattern,UErrorCode & status)556 TimeZoneFormat::setGMTPattern(const UnicodeString& pattern, UErrorCode& status) {
557     initGMTPattern(pattern, status);
558 }
559 
560 UnicodeString&
getGMTOffsetPattern(UTimeZoneFormatGMTOffsetPatternType type,UnicodeString & pattern) const561 TimeZoneFormat::getGMTOffsetPattern(UTimeZoneFormatGMTOffsetPatternType type, UnicodeString& pattern) const {
562     return pattern.setTo(fGMTOffsetPatterns[type]);
563 }
564 
565 void
setGMTOffsetPattern(UTimeZoneFormatGMTOffsetPatternType type,const UnicodeString & pattern,UErrorCode & status)566 TimeZoneFormat::setGMTOffsetPattern(UTimeZoneFormatGMTOffsetPatternType type, const UnicodeString& pattern, UErrorCode& status) {
567     if (U_FAILURE(status)) {
568         return;
569     }
570     if (pattern == fGMTOffsetPatterns[type]) {
571         // No need to reset
572         return;
573     }
574 
575     OffsetFields required = FIELDS_HM;
576     switch (type) {
577     case UTZFMT_PAT_POSITIVE_H:
578     case UTZFMT_PAT_NEGATIVE_H:
579         required = FIELDS_H;
580         break;
581     case UTZFMT_PAT_POSITIVE_HM:
582     case UTZFMT_PAT_NEGATIVE_HM:
583         required = FIELDS_HM;
584         break;
585     case UTZFMT_PAT_POSITIVE_HMS:
586     case UTZFMT_PAT_NEGATIVE_HMS:
587         required = FIELDS_HMS;
588         break;
589     default:
590         UPRV_UNREACHABLE_EXIT;
591     }
592 
593     UVector* patternItems = parseOffsetPattern(pattern, required, status);
594     if (patternItems == nullptr) {
595         return;
596     }
597 
598     fGMTOffsetPatterns[type].setTo(pattern);
599     delete fGMTOffsetPatternItems[type];
600     fGMTOffsetPatternItems[type] = patternItems;
601     checkAbuttingHoursAndMinutes();
602 }
603 
604 UnicodeString&
getGMTOffsetDigits(UnicodeString & digits) const605 TimeZoneFormat::getGMTOffsetDigits(UnicodeString& digits) const {
606     digits.remove();
607     for (int32_t i = 0; i < 10; i++) {
608         digits.append(fGMTOffsetDigits[i]);
609     }
610     return digits;
611 }
612 
613 void
setGMTOffsetDigits(const UnicodeString & digits,UErrorCode & status)614 TimeZoneFormat::setGMTOffsetDigits(const UnicodeString& digits, UErrorCode& status) {
615     if (U_FAILURE(status)) {
616         return;
617     }
618     UChar32 digitArray[10];
619     if (!toCodePoints(digits, digitArray, 10)) {
620         status = U_ILLEGAL_ARGUMENT_ERROR;
621         return;
622     }
623     uprv_memcpy(fGMTOffsetDigits, digitArray, sizeof(UChar32)*10);
624 }
625 
626 UnicodeString&
getGMTZeroFormat(UnicodeString & gmtZeroFormat) const627 TimeZoneFormat::getGMTZeroFormat(UnicodeString& gmtZeroFormat) const {
628     return gmtZeroFormat.setTo(fGMTZeroFormat);
629 }
630 
631 void
setGMTZeroFormat(const UnicodeString & gmtZeroFormat,UErrorCode & status)632 TimeZoneFormat::setGMTZeroFormat(const UnicodeString& gmtZeroFormat, UErrorCode& status) {
633     if (U_SUCCESS(status)) {
634         if (gmtZeroFormat.isEmpty()) {
635             status = U_ILLEGAL_ARGUMENT_ERROR;
636         } else if (gmtZeroFormat != fGMTZeroFormat) {
637             fGMTZeroFormat.setTo(gmtZeroFormat);
638         }
639     }
640 }
641 
642 // ------------------------------------------------------------------
643 // Format and Parse
644 
645 UnicodeString&
format(UTimeZoneFormatStyle style,const TimeZone & tz,UDate date,UnicodeString & name,UTimeZoneFormatTimeType * timeType) const646 TimeZoneFormat::format(UTimeZoneFormatStyle style, const TimeZone& tz, UDate date,
647         UnicodeString& name, UTimeZoneFormatTimeType* timeType /* = nullptr */) const {
648     if (timeType) {
649         *timeType = UTZFMT_TIME_TYPE_UNKNOWN;
650     }
651 
652     UBool noOffsetFormatFallback = false;
653 
654     switch (style) {
655     case UTZFMT_STYLE_GENERIC_LOCATION:
656         formatGeneric(tz, UTZGNM_LOCATION, date, name);
657         break;
658     case UTZFMT_STYLE_GENERIC_LONG:
659         formatGeneric(tz, UTZGNM_LONG, date, name);
660         break;
661     case UTZFMT_STYLE_GENERIC_SHORT:
662         formatGeneric(tz, UTZGNM_SHORT, date, name);
663         break;
664     case UTZFMT_STYLE_SPECIFIC_LONG:
665         formatSpecific(tz, UTZNM_LONG_STANDARD, UTZNM_LONG_DAYLIGHT, date, name, timeType);
666         break;
667     case UTZFMT_STYLE_SPECIFIC_SHORT:
668         formatSpecific(tz, UTZNM_SHORT_STANDARD, UTZNM_SHORT_DAYLIGHT, date, name, timeType);
669         break;
670 
671     case UTZFMT_STYLE_ZONE_ID:
672         tz.getID(name);
673         noOffsetFormatFallback = true;
674         break;
675     case UTZFMT_STYLE_ZONE_ID_SHORT:
676         {
677             const char16_t* shortID = ZoneMeta::getShortID(tz);
678             if (shortID == nullptr) {
679                 shortID = UNKNOWN_SHORT_ZONE_ID;
680             }
681             name.setTo(shortID, -1);
682         }
683         noOffsetFormatFallback = true;
684         break;
685 
686     case UTZFMT_STYLE_EXEMPLAR_LOCATION:
687         formatExemplarLocation(tz, name);
688         noOffsetFormatFallback = true;
689         break;
690 
691     default:
692         // will be handled below
693         break;
694     }
695 
696     if (name.isEmpty() && !noOffsetFormatFallback) {
697         UErrorCode status = U_ZERO_ERROR;
698         int32_t rawOffset, dstOffset;
699         tz.getOffset(date, false, rawOffset, dstOffset, status);
700         int32_t offset = rawOffset + dstOffset;
701         if (U_SUCCESS(status)) {
702             switch (style) {
703             case UTZFMT_STYLE_GENERIC_LOCATION:
704             case UTZFMT_STYLE_GENERIC_LONG:
705             case UTZFMT_STYLE_SPECIFIC_LONG:
706             case UTZFMT_STYLE_LOCALIZED_GMT:
707                 formatOffsetLocalizedGMT(offset, name, status);
708                 break;
709 
710             case UTZFMT_STYLE_GENERIC_SHORT:
711             case UTZFMT_STYLE_SPECIFIC_SHORT:
712             case UTZFMT_STYLE_LOCALIZED_GMT_SHORT:
713                 formatOffsetShortLocalizedGMT(offset, name, status);
714                 break;
715 
716             case UTZFMT_STYLE_ISO_BASIC_SHORT:
717                 formatOffsetISO8601Basic(offset, true, true, true, name, status);
718                 break;
719 
720             case UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT:
721                 formatOffsetISO8601Basic(offset, false, true, true, name, status);
722                 break;
723 
724             case UTZFMT_STYLE_ISO_BASIC_FIXED:
725                 formatOffsetISO8601Basic(offset, true, false, true, name, status);
726                 break;
727 
728             case UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED:
729                 formatOffsetISO8601Basic(offset, false, false, true, name, status);
730                 break;
731 
732             case UTZFMT_STYLE_ISO_EXTENDED_FIXED:
733                 formatOffsetISO8601Extended(offset, true, false, true, name, status);
734                 break;
735 
736             case UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED:
737                 formatOffsetISO8601Extended(offset, false, false, true, name, status);
738                 break;
739 
740             case UTZFMT_STYLE_ISO_BASIC_FULL:
741                 formatOffsetISO8601Basic(offset, true, false, false, name, status);
742                 break;
743 
744             case UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL:
745                 formatOffsetISO8601Basic(offset, false, false, false, name, status);
746                 break;
747 
748             case UTZFMT_STYLE_ISO_EXTENDED_FULL:
749                 formatOffsetISO8601Extended(offset, true, false, false, name, status);
750                 break;
751 
752             case UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL:
753                 formatOffsetISO8601Extended(offset, false, false, false, name, status);
754                 break;
755 
756             default:
757               // UTZFMT_STYLE_ZONE_ID, UTZFMT_STYLE_ZONE_ID_SHORT, UTZFMT_STYLE_EXEMPLAR_LOCATION
758               break;
759             }
760 
761             if (timeType) {
762                 *timeType = (dstOffset != 0) ? UTZFMT_TIME_TYPE_DAYLIGHT : UTZFMT_TIME_TYPE_STANDARD;
763             }
764         }
765     }
766 
767     return name;
768 }
769 
770 UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const771 TimeZoneFormat::format(const Formattable& obj, UnicodeString& appendTo,
772         FieldPosition& pos, UErrorCode& status) const {
773     if (U_FAILURE(status)) {
774         return appendTo;
775     }
776     UDate date = Calendar::getNow();
777     if (obj.getType() == Formattable::kObject) {
778         const UObject* formatObj = obj.getObject();
779         const TimeZone* tz = dynamic_cast<const TimeZone*>(formatObj);
780         if (tz == nullptr) {
781             const Calendar* cal = dynamic_cast<const Calendar*>(formatObj);
782             if (cal != nullptr) {
783                 tz = &cal->getTimeZone();
784                 date = cal->getTime(status);
785             }
786         }
787         if (tz != nullptr) {
788             int32_t rawOffset, dstOffset;
789             tz->getOffset(date, false, rawOffset, dstOffset, status);
790             char16_t buf[ZONE_NAME_U16_MAX];
791             UnicodeString result(buf, 0, UPRV_LENGTHOF(buf));
792             formatOffsetLocalizedGMT(rawOffset + dstOffset, result, status);
793             if (U_SUCCESS(status)) {
794                 appendTo.append(result);
795                 if (pos.getField() == UDAT_TIMEZONE_FIELD) {
796                     pos.setBeginIndex(0);
797                     pos.setEndIndex(result.length());
798                 }
799             }
800         }
801     }
802     return appendTo;
803 }
804 
805 TimeZone*
parse(UTimeZoneFormatStyle style,const UnicodeString & text,ParsePosition & pos,UTimeZoneFormatTimeType * timeType) const806 TimeZoneFormat::parse(UTimeZoneFormatStyle style, const UnicodeString& text, ParsePosition& pos,
807         UTimeZoneFormatTimeType* timeType /*= nullptr*/) const {
808     return parse(style, text, pos, getDefaultParseOptions(), timeType);
809 }
810 
811 TimeZone*
parse(UTimeZoneFormatStyle style,const UnicodeString & text,ParsePosition & pos,int32_t parseOptions,UTimeZoneFormatTimeType * timeType) const812 TimeZoneFormat::parse(UTimeZoneFormatStyle style, const UnicodeString& text, ParsePosition& pos,
813         int32_t parseOptions, UTimeZoneFormatTimeType* timeType /* = nullptr */) const {
814     if (timeType) {
815         *timeType = UTZFMT_TIME_TYPE_UNKNOWN;
816     }
817 
818     int32_t startIdx = pos.getIndex();
819     int32_t maxPos = text.length();
820     int32_t offset;
821 
822     // Styles using localized GMT format as fallback
823     UBool fallbackLocalizedGMT =
824         (style == UTZFMT_STYLE_SPECIFIC_LONG || style == UTZFMT_STYLE_GENERIC_LONG || style == UTZFMT_STYLE_GENERIC_LOCATION);
825     UBool fallbackShortLocalizedGMT =
826         (style == UTZFMT_STYLE_SPECIFIC_SHORT || style == UTZFMT_STYLE_GENERIC_SHORT);
827 
828     int32_t evaluated = 0;  // bit flags representing already evaluated styles
829     ParsePosition tmpPos(startIdx);
830 
831     int32_t parsedOffset = UNKNOWN_OFFSET;  // stores successfully parsed offset for later use
832     int32_t parsedPos = -1;                 // stores successfully parsed offset position for later use
833 
834     // Try localized GMT format first if necessary
835     if (fallbackLocalizedGMT || fallbackShortLocalizedGMT) {
836         UBool hasDigitOffset = false;
837         offset = parseOffsetLocalizedGMT(text, tmpPos, fallbackShortLocalizedGMT, &hasDigitOffset);
838         if (tmpPos.getErrorIndex() == -1) {
839             // Even when the input text was successfully parsed as a localized GMT format text,
840             // we may still need to evaluate the specified style if -
841             //   1) GMT zero format was used, and
842             //   2) The input text was not completely processed
843             if (tmpPos.getIndex() == maxPos || hasDigitOffset) {
844                 pos.setIndex(tmpPos.getIndex());
845                 return createTimeZoneForOffset(offset);
846             }
847             parsedOffset = offset;
848             parsedPos = tmpPos.getIndex();
849         }
850         // Note: For now, no distinction between long/short localized GMT format in the parser.
851         // This might be changed in future.
852         // evaluated |= (fallbackLocalizedGMT ? STYLE_PARSE_FLAGS[UTZFMT_STYLE_LOCALIZED_GMT] : STYLE_PARSE_FLAGS[UTZFMT_STYLE_LOCALIZED_GMT_SHORT]);
853         evaluated |= STYLE_PARSE_FLAGS[UTZFMT_STYLE_LOCALIZED_GMT] | STYLE_PARSE_FLAGS[UTZFMT_STYLE_LOCALIZED_GMT_SHORT];
854     }
855 
856     UErrorCode status = U_ZERO_ERROR;
857     char16_t tzIDBuf[32];
858     UnicodeString tzID(tzIDBuf, 0, UPRV_LENGTHOF(tzIDBuf));
859 
860     UBool parseTZDBAbbrev = ((parseOptions & UTZFMT_PARSE_OPTION_TZ_DATABASE_ABBREVIATIONS) != 0);
861 
862     // Try the specified style
863     switch (style) {
864     case UTZFMT_STYLE_LOCALIZED_GMT:
865         {
866             tmpPos.setIndex(startIdx);
867             tmpPos.setErrorIndex(-1);
868 
869             offset = parseOffsetLocalizedGMT(text, tmpPos);
870             if (tmpPos.getErrorIndex() == -1) {
871                 pos.setIndex(tmpPos.getIndex());
872                 return createTimeZoneForOffset(offset);
873             }
874 
875             // Note: For now, no distinction between long/short localized GMT format in the parser.
876             // This might be changed in future.
877             evaluated |= STYLE_PARSE_FLAGS[UTZFMT_STYLE_LOCALIZED_GMT_SHORT];
878 
879             break;
880         }
881     case UTZFMT_STYLE_LOCALIZED_GMT_SHORT:
882         {
883             tmpPos.setIndex(startIdx);
884             tmpPos.setErrorIndex(-1);
885 
886             offset = parseOffsetShortLocalizedGMT(text, tmpPos);
887             if (tmpPos.getErrorIndex() == -1) {
888                 pos.setIndex(tmpPos.getIndex());
889                 return createTimeZoneForOffset(offset);
890             }
891 
892             // Note: For now, no distinction between long/short localized GMT format in the parser.
893             // This might be changed in future.
894             evaluated |= STYLE_PARSE_FLAGS[UTZFMT_STYLE_LOCALIZED_GMT];
895 
896             break;
897         }
898     case UTZFMT_STYLE_ISO_BASIC_SHORT:
899     case UTZFMT_STYLE_ISO_BASIC_FIXED:
900     case UTZFMT_STYLE_ISO_BASIC_FULL:
901     case UTZFMT_STYLE_ISO_EXTENDED_FIXED:
902     case UTZFMT_STYLE_ISO_EXTENDED_FULL:
903         {
904             tmpPos.setIndex(startIdx);
905             tmpPos.setErrorIndex(-1);
906 
907             offset = parseOffsetISO8601(text, tmpPos);
908             if (tmpPos.getErrorIndex() == -1) {
909                 pos.setIndex(tmpPos.getIndex());
910                 return createTimeZoneForOffset(offset);
911             }
912 
913             break;
914         }
915 
916     case UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT:
917     case UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED:
918     case UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL:
919     case UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED:
920     case UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL:
921         {
922             tmpPos.setIndex(startIdx);
923             tmpPos.setErrorIndex(-1);
924 
925             // Exclude the case of UTC Indicator "Z" here
926             UBool hasDigitOffset = false;
927             offset = parseOffsetISO8601(text, tmpPos, false, &hasDigitOffset);
928             if (tmpPos.getErrorIndex() == -1 && hasDigitOffset) {
929                 pos.setIndex(tmpPos.getIndex());
930                 return createTimeZoneForOffset(offset);
931             }
932 
933             break;
934         }
935 
936     case UTZFMT_STYLE_SPECIFIC_LONG:
937     case UTZFMT_STYLE_SPECIFIC_SHORT:
938         {
939             // Specific styles
940             int32_t nameTypes = 0;
941             if (style == UTZFMT_STYLE_SPECIFIC_LONG) {
942                 nameTypes = (UTZNM_LONG_STANDARD | UTZNM_LONG_DAYLIGHT);
943             } else {
944                 U_ASSERT(style == UTZFMT_STYLE_SPECIFIC_SHORT);
945                 nameTypes = (UTZNM_SHORT_STANDARD | UTZNM_SHORT_DAYLIGHT);
946             }
947             LocalPointer<TimeZoneNames::MatchInfoCollection> specificMatches(fTimeZoneNames->find(text, startIdx, nameTypes, status));
948             if (U_FAILURE(status)) {
949                 pos.setErrorIndex(startIdx);
950                 return nullptr;
951             }
952             if (!specificMatches.isNull()) {
953                 int32_t matchIdx = -1;
954                 int32_t matchPos = -1;
955                 for (int32_t i = 0; i < specificMatches->size(); i++) {
956                     matchPos  = startIdx + specificMatches->getMatchLengthAt(i);
957                     if (matchPos > parsedPos) {
958                         matchIdx = i;
959                         parsedPos = matchPos;
960                     }
961                 }
962                 if (matchIdx >= 0) {
963                     if (timeType) {
964                         *timeType = getTimeType(specificMatches->getNameTypeAt(matchIdx));
965                     }
966                     pos.setIndex(matchPos);
967                     getTimeZoneID(specificMatches.getAlias(), matchIdx, tzID);
968                     U_ASSERT(!tzID.isEmpty());
969                     return TimeZone::createTimeZone(tzID);
970                 }
971             }
972 
973             if (parseTZDBAbbrev && style == UTZFMT_STYLE_SPECIFIC_SHORT) {
974                 U_ASSERT((nameTypes & UTZNM_SHORT_STANDARD) != 0);
975                 U_ASSERT((nameTypes & UTZNM_SHORT_DAYLIGHT) != 0);
976 
977                 const TZDBTimeZoneNames *tzdbTimeZoneNames = getTZDBTimeZoneNames(status);
978                 if (U_SUCCESS(status)) {
979                     LocalPointer<TimeZoneNames::MatchInfoCollection> tzdbNameMatches(
980                         tzdbTimeZoneNames->find(text, startIdx, nameTypes, status));
981                     if (U_FAILURE(status)) {
982                         pos.setErrorIndex(startIdx);
983                         return nullptr;
984                     }
985                     if (!tzdbNameMatches.isNull()) {
986                         int32_t matchIdx = -1;
987                         int32_t matchPos = -1;
988                         for (int32_t i = 0; i < tzdbNameMatches->size(); i++) {
989                             matchPos = startIdx + tzdbNameMatches->getMatchLengthAt(i);
990                             if (matchPos > parsedPos) {
991                                 matchIdx = i;
992                                 parsedPos = matchPos;
993                             }
994                         }
995                         if (matchIdx >= 0) {
996                             if (timeType) {
997                                 *timeType = getTimeType(tzdbNameMatches->getNameTypeAt(matchIdx));
998                             }
999                             pos.setIndex(matchPos);
1000                             getTimeZoneID(tzdbNameMatches.getAlias(), matchIdx, tzID);
1001                             U_ASSERT(!tzID.isEmpty());
1002                             return TimeZone::createTimeZone(tzID);
1003                         }
1004                     }
1005                 }
1006             }
1007             break;
1008         }
1009     case UTZFMT_STYLE_GENERIC_LONG:
1010     case UTZFMT_STYLE_GENERIC_SHORT:
1011     case UTZFMT_STYLE_GENERIC_LOCATION:
1012         {
1013             int32_t genericNameTypes = 0;
1014             switch (style) {
1015             case UTZFMT_STYLE_GENERIC_LOCATION:
1016                 genericNameTypes = UTZGNM_LOCATION;
1017                 break;
1018 
1019             case UTZFMT_STYLE_GENERIC_LONG:
1020                 genericNameTypes = UTZGNM_LONG | UTZGNM_LOCATION;
1021                 break;
1022 
1023             case UTZFMT_STYLE_GENERIC_SHORT:
1024                 genericNameTypes = UTZGNM_SHORT | UTZGNM_LOCATION;
1025                 break;
1026 
1027             default:
1028                 UPRV_UNREACHABLE_EXIT;
1029             }
1030 
1031             int32_t len = 0;
1032             UTimeZoneFormatTimeType tt = UTZFMT_TIME_TYPE_UNKNOWN;
1033             const TimeZoneGenericNames *gnames = getTimeZoneGenericNames(status);
1034             if (U_SUCCESS(status)) {
1035                 len = gnames->findBestMatch(text, startIdx, genericNameTypes, tzID, tt, status);
1036             }
1037             if (U_FAILURE(status)) {
1038                 pos.setErrorIndex(startIdx);
1039                 return nullptr;
1040             }
1041             if (len > 0) {
1042                 // Found a match
1043                 if (timeType) {
1044                     *timeType = tt;
1045                 }
1046                 pos.setIndex(startIdx + len);
1047                 U_ASSERT(!tzID.isEmpty());
1048                 return TimeZone::createTimeZone(tzID);
1049             }
1050 
1051             break;
1052         }
1053     case UTZFMT_STYLE_ZONE_ID:
1054         {
1055             tmpPos.setIndex(startIdx);
1056             tmpPos.setErrorIndex(-1);
1057 
1058             parseZoneID(text, tmpPos, tzID);
1059             if (tmpPos.getErrorIndex() == -1) {
1060                 pos.setIndex(tmpPos.getIndex());
1061                 return TimeZone::createTimeZone(tzID);
1062             }
1063             break;
1064         }
1065     case UTZFMT_STYLE_ZONE_ID_SHORT:
1066         {
1067             tmpPos.setIndex(startIdx);
1068             tmpPos.setErrorIndex(-1);
1069 
1070             parseShortZoneID(text, tmpPos, tzID);
1071             if (tmpPos.getErrorIndex() == -1) {
1072                 pos.setIndex(tmpPos.getIndex());
1073                 return TimeZone::createTimeZone(tzID);
1074             }
1075             break;
1076         }
1077     case UTZFMT_STYLE_EXEMPLAR_LOCATION:
1078         {
1079             tmpPos.setIndex(startIdx);
1080             tmpPos.setErrorIndex(-1);
1081 
1082             parseExemplarLocation(text, tmpPos, tzID);
1083             if (tmpPos.getErrorIndex() == -1) {
1084                 pos.setIndex(tmpPos.getIndex());
1085                 return TimeZone::createTimeZone(tzID);
1086             }
1087             break;
1088         }
1089     }
1090     evaluated |= STYLE_PARSE_FLAGS[style];
1091 
1092 
1093     if (parsedPos > startIdx) {
1094         // When the specified style is one of SPECIFIC_XXX or GENERIC_XXX, we tried to parse the input
1095         // as localized GMT format earlier. If parsedOffset is positive, it means it was successfully
1096         // parsed as localized GMT format, but offset digits were not detected (more specifically, GMT
1097         // zero format). Then, it tried to find a match within the set of display names, but could not
1098         // find a match. At this point, we can safely assume the input text contains the localized
1099         // GMT format.
1100         U_ASSERT(parsedOffset != UNKNOWN_OFFSET);
1101         pos.setIndex(parsedPos);
1102         return createTimeZoneForOffset(parsedOffset);
1103     }
1104 
1105     // Failed to parse the input text as the time zone format in the specified style.
1106     // Check the longest match among other styles below.
1107     char16_t parsedIDBuf[32];
1108     UnicodeString parsedID(parsedIDBuf, 0, UPRV_LENGTHOF(parsedIDBuf));
1109     UTimeZoneFormatTimeType parsedTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
1110 
1111     U_ASSERT(parsedPos < 0);
1112     U_ASSERT(parsedOffset == UNKNOWN_OFFSET);
1113 
1114     // ISO 8601
1115     if (parsedPos < maxPos &&
1116         ((evaluated & ISO_Z_STYLE_FLAG) == 0 || (evaluated & ISO_LOCAL_STYLE_FLAG) == 0)) {
1117         tmpPos.setIndex(startIdx);
1118         tmpPos.setErrorIndex(-1);
1119 
1120         UBool hasDigitOffset = false;
1121         offset = parseOffsetISO8601(text, tmpPos, false, &hasDigitOffset);
1122         if (tmpPos.getErrorIndex() == -1) {
1123             if (tmpPos.getIndex() == maxPos || hasDigitOffset) {
1124                 pos.setIndex(tmpPos.getIndex());
1125                 return createTimeZoneForOffset(offset);
1126             }
1127             // Note: When ISO 8601 format contains offset digits, it should not
1128             // collide with other formats. However, ISO 8601 UTC format "Z" (single letter)
1129             // may collide with other names. In this case, we need to evaluate other names.
1130             if (parsedPos < tmpPos.getIndex()) {
1131                 parsedOffset = offset;
1132                 parsedID.setToBogus();
1133                 parsedTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
1134                 parsedPos = tmpPos.getIndex();
1135                 U_ASSERT(parsedPos == startIdx + 1);    // only when "Z" is used
1136             }
1137         }
1138     }
1139 
1140     // Localized GMT format
1141     if (parsedPos < maxPos &&
1142         (evaluated & STYLE_PARSE_FLAGS[UTZFMT_STYLE_LOCALIZED_GMT]) == 0) {
1143         tmpPos.setIndex(startIdx);
1144         tmpPos.setErrorIndex(-1);
1145 
1146         UBool hasDigitOffset = false;
1147         offset = parseOffsetLocalizedGMT(text, tmpPos, false, &hasDigitOffset);
1148         if (tmpPos.getErrorIndex() == -1) {
1149             if (tmpPos.getIndex() == maxPos || hasDigitOffset) {
1150                 pos.setIndex(tmpPos.getIndex());
1151                 return createTimeZoneForOffset(offset);
1152             }
1153             // Evaluate other names - see the comment earlier in this method.
1154             if (parsedPos < tmpPos.getIndex()) {
1155                 parsedOffset = offset;
1156                 parsedID.setToBogus();
1157                 parsedTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
1158                 parsedPos = tmpPos.getIndex();
1159             }
1160         }
1161     }
1162 
1163     if (parsedPos < maxPos &&
1164         (evaluated & STYLE_PARSE_FLAGS[UTZFMT_STYLE_LOCALIZED_GMT_SHORT]) == 0) {
1165         tmpPos.setIndex(startIdx);
1166         tmpPos.setErrorIndex(-1);
1167 
1168         UBool hasDigitOffset = false;
1169         offset = parseOffsetLocalizedGMT(text, tmpPos, true, &hasDigitOffset);
1170         if (tmpPos.getErrorIndex() == -1) {
1171             if (tmpPos.getIndex() == maxPos || hasDigitOffset) {
1172                 pos.setIndex(tmpPos.getIndex());
1173                 return createTimeZoneForOffset(offset);
1174             }
1175             // Evaluate other names - see the comment earlier in this method.
1176             if (parsedPos < tmpPos.getIndex()) {
1177                 parsedOffset = offset;
1178                 parsedID.setToBogus();
1179                 parsedTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
1180                 parsedPos = tmpPos.getIndex();
1181             }
1182         }
1183     }
1184 
1185     // When ParseOption.ALL_STYLES is available, we also try to look all possible display names and IDs.
1186     // For example, when style is GENERIC_LONG, "EST" (SPECIFIC_SHORT) is never
1187     // used for America/New_York. With parseAllStyles true, this code parses "EST"
1188     // as America/New_York.
1189 
1190     // Note: Adding all possible names into the trie used by the implementation is quite heavy operation,
1191     // which we want to avoid normally (note that we cache the trie, so this is applicable to the
1192     // first time only as long as the cache does not expire).
1193 
1194     if (parseOptions & UTZFMT_PARSE_OPTION_ALL_STYLES) {
1195         // Try all specific names and exemplar location names
1196         if (parsedPos < maxPos) {
1197             LocalPointer<TimeZoneNames::MatchInfoCollection> specificMatches(fTimeZoneNames->find(text, startIdx, ALL_SIMPLE_NAME_TYPES, status));
1198             if (U_FAILURE(status)) {
1199                 pos.setErrorIndex(startIdx);
1200                 return nullptr;
1201             }
1202             int32_t specificMatchIdx = -1;
1203             int32_t matchPos = -1;
1204             if (!specificMatches.isNull()) {
1205                 for (int32_t i = 0; i < specificMatches->size(); i++) {
1206                     if (startIdx + specificMatches->getMatchLengthAt(i) > matchPos) {
1207                         specificMatchIdx = i;
1208                         matchPos = startIdx + specificMatches->getMatchLengthAt(i);
1209                     }
1210                 }
1211             }
1212             if (parsedPos < matchPos) {
1213                 U_ASSERT(specificMatchIdx >= 0);
1214                 parsedPos = matchPos;
1215                 getTimeZoneID(specificMatches.getAlias(), specificMatchIdx, parsedID);
1216                 parsedTimeType = getTimeType(specificMatches->getNameTypeAt(specificMatchIdx));
1217                 parsedOffset = UNKNOWN_OFFSET;
1218             }
1219         }
1220         if (parseTZDBAbbrev && parsedPos < maxPos && (evaluated & STYLE_PARSE_FLAGS[UTZFMT_STYLE_SPECIFIC_SHORT]) == 0) {
1221             const TZDBTimeZoneNames *tzdbTimeZoneNames = getTZDBTimeZoneNames(status);
1222             if (U_SUCCESS(status)) {
1223                 LocalPointer<TimeZoneNames::MatchInfoCollection> tzdbNameMatches(
1224                     tzdbTimeZoneNames->find(text, startIdx, ALL_SIMPLE_NAME_TYPES, status));
1225                 if (U_FAILURE(status)) {
1226                     pos.setErrorIndex(startIdx);
1227                     return nullptr;
1228                 }
1229                 int32_t tzdbNameMatchIdx = -1;
1230                 int32_t matchPos = -1;
1231                 if (!tzdbNameMatches.isNull()) {
1232                     for (int32_t i = 0; i < tzdbNameMatches->size(); i++) {
1233                         if (startIdx + tzdbNameMatches->getMatchLengthAt(i) > matchPos) {
1234                             tzdbNameMatchIdx = i;
1235                             matchPos = startIdx + tzdbNameMatches->getMatchLengthAt(i);
1236                         }
1237                     }
1238                 }
1239                 if (parsedPos < matchPos) {
1240                     U_ASSERT(tzdbNameMatchIdx >= 0);
1241                     parsedPos = matchPos;
1242                     getTimeZoneID(tzdbNameMatches.getAlias(), tzdbNameMatchIdx, parsedID);
1243                     parsedTimeType = getTimeType(tzdbNameMatches->getNameTypeAt(tzdbNameMatchIdx));
1244                     parsedOffset = UNKNOWN_OFFSET;
1245                 }
1246             }
1247         }
1248         // Try generic names
1249         if (parsedPos < maxPos) {
1250             int32_t genMatchLen = -1;
1251             UTimeZoneFormatTimeType tt = UTZFMT_TIME_TYPE_UNKNOWN;
1252 
1253             const TimeZoneGenericNames *gnames = getTimeZoneGenericNames(status);
1254             if (U_SUCCESS(status)) {
1255                 genMatchLen = gnames->findBestMatch(text, startIdx, ALL_GENERIC_NAME_TYPES, tzID, tt, status);
1256             }
1257             if (U_FAILURE(status)) {
1258                 pos.setErrorIndex(startIdx);
1259                 return nullptr;
1260             }
1261 
1262             if (genMatchLen > 0 && parsedPos < startIdx + genMatchLen) {
1263                 parsedPos = startIdx + genMatchLen;
1264                 parsedID.setTo(tzID);
1265                 parsedTimeType = tt;
1266                 parsedOffset = UNKNOWN_OFFSET;
1267             }
1268         }
1269 
1270         // Try time zone ID
1271         if (parsedPos < maxPos && (evaluated & STYLE_PARSE_FLAGS[UTZFMT_STYLE_ZONE_ID]) == 0) {
1272             tmpPos.setIndex(startIdx);
1273             tmpPos.setErrorIndex(-1);
1274 
1275             parseZoneID(text, tmpPos, tzID);
1276             if (tmpPos.getErrorIndex() == -1 && parsedPos < tmpPos.getIndex()) {
1277                 parsedPos = tmpPos.getIndex();
1278                 parsedID.setTo(tzID);
1279                 parsedTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
1280                 parsedOffset = UNKNOWN_OFFSET;
1281             }
1282         }
1283         // Try short time zone ID
1284         if (parsedPos < maxPos && (evaluated & STYLE_PARSE_FLAGS[UTZFMT_STYLE_ZONE_ID]) == 0) {
1285             tmpPos.setIndex(startIdx);
1286             tmpPos.setErrorIndex(-1);
1287 
1288             parseShortZoneID(text, tmpPos, tzID);
1289             if (tmpPos.getErrorIndex() == -1 && parsedPos < tmpPos.getIndex()) {
1290                 parsedPos = tmpPos.getIndex();
1291                 parsedID.setTo(tzID);
1292                 parsedTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
1293                 parsedOffset = UNKNOWN_OFFSET;
1294             }
1295         }
1296     }
1297 
1298     if (parsedPos > startIdx) {
1299         // Parsed successfully
1300         TimeZone* parsedTZ;
1301         if (parsedID.length() > 0) {
1302             parsedTZ = TimeZone::createTimeZone(parsedID);
1303         } else {
1304             U_ASSERT(parsedOffset != UNKNOWN_OFFSET);
1305             parsedTZ = createTimeZoneForOffset(parsedOffset);
1306         }
1307         if (timeType) {
1308             *timeType = parsedTimeType;
1309         }
1310         pos.setIndex(parsedPos);
1311         return parsedTZ;
1312     }
1313 
1314     pos.setErrorIndex(startIdx);
1315     return nullptr;
1316 }
1317 
1318 void
parseObject(const UnicodeString & source,Formattable & result,ParsePosition & parse_pos) const1319 TimeZoneFormat::parseObject(const UnicodeString& source, Formattable& result,
1320         ParsePosition& parse_pos) const {
1321     result.adoptObject(parse(UTZFMT_STYLE_GENERIC_LOCATION, source, parse_pos, UTZFMT_PARSE_OPTION_ALL_STYLES));
1322 }
1323 
1324 
1325 // ------------------------------------------------------------------
1326 // Private zone name format/parse implementation
1327 
1328 UnicodeString&
formatGeneric(const TimeZone & tz,int32_t genType,UDate date,UnicodeString & name) const1329 TimeZoneFormat::formatGeneric(const TimeZone& tz, int32_t genType, UDate date, UnicodeString& name) const {
1330     UErrorCode status = U_ZERO_ERROR;
1331     const TimeZoneGenericNames* gnames = getTimeZoneGenericNames(status);
1332     if (U_FAILURE(status)) {
1333         name.setToBogus();
1334         return name;
1335     }
1336 
1337     if (genType == UTZGNM_LOCATION) {
1338         const char16_t* canonicalID = ZoneMeta::getCanonicalCLDRID(tz);
1339         if (canonicalID == nullptr) {
1340             name.setToBogus();
1341             return name;
1342         }
1343         return gnames->getGenericLocationName(UnicodeString(true, canonicalID, -1), name);
1344     }
1345     return gnames->getDisplayName(tz, (UTimeZoneGenericNameType)genType, date, name);
1346 }
1347 
1348 UnicodeString&
formatSpecific(const TimeZone & tz,UTimeZoneNameType stdType,UTimeZoneNameType dstType,UDate date,UnicodeString & name,UTimeZoneFormatTimeType * timeType) const1349 TimeZoneFormat::formatSpecific(const TimeZone& tz, UTimeZoneNameType stdType, UTimeZoneNameType dstType,
1350         UDate date, UnicodeString& name, UTimeZoneFormatTimeType *timeType) const {
1351     if (fTimeZoneNames == nullptr) {
1352         name.setToBogus();
1353         return name;
1354     }
1355 
1356     UErrorCode status = U_ZERO_ERROR;
1357     UBool isDaylight = tz.inDaylightTime(date, status);
1358     const char16_t* canonicalID = ZoneMeta::getCanonicalCLDRID(tz);
1359 
1360     if (U_FAILURE(status) || canonicalID == nullptr) {
1361         name.setToBogus();
1362         return name;
1363     }
1364 
1365     if (isDaylight) {
1366         fTimeZoneNames->getDisplayName(UnicodeString(true, canonicalID, -1), dstType, date, name);
1367     } else {
1368         fTimeZoneNames->getDisplayName(UnicodeString(true, canonicalID, -1), stdType, date, name);
1369     }
1370 
1371     if (timeType && !name.isEmpty()) {
1372         *timeType = isDaylight ? UTZFMT_TIME_TYPE_DAYLIGHT : UTZFMT_TIME_TYPE_STANDARD;
1373     }
1374     return name;
1375 }
1376 
1377 const TimeZoneGenericNames*
getTimeZoneGenericNames(UErrorCode & status) const1378 TimeZoneFormat::getTimeZoneGenericNames(UErrorCode& status) const {
1379     if (U_FAILURE(status)) {
1380         return nullptr;
1381     }
1382 
1383     umtx_lock(&gLock);
1384     if (fTimeZoneGenericNames == nullptr) {
1385         TimeZoneFormat *nonConstThis = const_cast<TimeZoneFormat *>(this);
1386         nonConstThis->fTimeZoneGenericNames = TimeZoneGenericNames::createInstance(fLocale, status);
1387     }
1388     umtx_unlock(&gLock);
1389 
1390     return fTimeZoneGenericNames;
1391 }
1392 
1393 const TZDBTimeZoneNames*
getTZDBTimeZoneNames(UErrorCode & status) const1394 TimeZoneFormat::getTZDBTimeZoneNames(UErrorCode& status) const {
1395     if (U_FAILURE(status)) {
1396         return nullptr;
1397     }
1398 
1399     umtx_lock(&gLock);
1400     if (fTZDBTimeZoneNames == nullptr) {
1401         TZDBTimeZoneNames *tzdbNames = new TZDBTimeZoneNames(fLocale);
1402         if (tzdbNames == nullptr) {
1403             status = U_MEMORY_ALLOCATION_ERROR;
1404         } else {
1405             TimeZoneFormat *nonConstThis = const_cast<TimeZoneFormat *>(this);
1406             nonConstThis->fTZDBTimeZoneNames = tzdbNames;
1407         }
1408     }
1409     umtx_unlock(&gLock);
1410 
1411     return fTZDBTimeZoneNames;
1412 }
1413 
1414 UnicodeString&
formatExemplarLocation(const TimeZone & tz,UnicodeString & name) const1415 TimeZoneFormat::formatExemplarLocation(const TimeZone& tz, UnicodeString& name) const {
1416     char16_t locationBuf[ZONE_NAME_U16_MAX];
1417     UnicodeString location(locationBuf, 0, UPRV_LENGTHOF(locationBuf));
1418     const char16_t* canonicalID = ZoneMeta::getCanonicalCLDRID(tz);
1419 
1420     if (canonicalID) {
1421         fTimeZoneNames->getExemplarLocationName(UnicodeString(true, canonicalID, -1), location);
1422     }
1423     if (location.length() > 0) {
1424         name.setTo(location);
1425     } else {
1426         // Use "unknown" location
1427         fTimeZoneNames->getExemplarLocationName(UnicodeString(true, UNKNOWN_ZONE_ID, -1), location);
1428         if (location.length() > 0) {
1429             name.setTo(location);
1430         } else {
1431             // last resort
1432             name.setTo(UNKNOWN_LOCATION, -1);
1433         }
1434     }
1435     return name;
1436 }
1437 
1438 
1439 // ------------------------------------------------------------------
1440 // Zone offset format and parse
1441 
1442 UnicodeString&
formatOffsetISO8601Basic(int32_t offset,UBool useUtcIndicator,UBool isShort,UBool ignoreSeconds,UnicodeString & result,UErrorCode & status) const1443 TimeZoneFormat::formatOffsetISO8601Basic(int32_t offset, UBool useUtcIndicator, UBool isShort, UBool ignoreSeconds,
1444         UnicodeString& result, UErrorCode& status) const {
1445     return formatOffsetISO8601(offset, true, useUtcIndicator, isShort, ignoreSeconds, result, status);
1446 }
1447 
1448 UnicodeString&
formatOffsetISO8601Extended(int32_t offset,UBool useUtcIndicator,UBool isShort,UBool ignoreSeconds,UnicodeString & result,UErrorCode & status) const1449 TimeZoneFormat::formatOffsetISO8601Extended(int32_t offset, UBool useUtcIndicator, UBool isShort, UBool ignoreSeconds,
1450         UnicodeString& result, UErrorCode& status) const {
1451     return formatOffsetISO8601(offset, false, useUtcIndicator, isShort, ignoreSeconds, result, status);
1452 }
1453 
1454 UnicodeString&
formatOffsetLocalizedGMT(int32_t offset,UnicodeString & result,UErrorCode & status) const1455 TimeZoneFormat::formatOffsetLocalizedGMT(int32_t offset, UnicodeString& result, UErrorCode& status) const {
1456     return formatOffsetLocalizedGMT(offset, false, result, status);
1457 }
1458 
1459 UnicodeString&
formatOffsetShortLocalizedGMT(int32_t offset,UnicodeString & result,UErrorCode & status) const1460 TimeZoneFormat::formatOffsetShortLocalizedGMT(int32_t offset, UnicodeString& result, UErrorCode& status) const {
1461     return formatOffsetLocalizedGMT(offset, true, result, status);
1462 }
1463 
1464 int32_t
parseOffsetISO8601(const UnicodeString & text,ParsePosition & pos) const1465 TimeZoneFormat::parseOffsetISO8601(const UnicodeString& text, ParsePosition& pos) const {
1466     return parseOffsetISO8601(text, pos, false);
1467 }
1468 
1469 int32_t
parseOffsetLocalizedGMT(const UnicodeString & text,ParsePosition & pos) const1470 TimeZoneFormat::parseOffsetLocalizedGMT(const UnicodeString& text, ParsePosition& pos) const {
1471     return parseOffsetLocalizedGMT(text, pos, false, nullptr);
1472 }
1473 
1474 int32_t
parseOffsetShortLocalizedGMT(const UnicodeString & text,ParsePosition & pos) const1475 TimeZoneFormat::parseOffsetShortLocalizedGMT(const UnicodeString& text, ParsePosition& pos) const {
1476     return parseOffsetLocalizedGMT(text, pos, true, nullptr);
1477 }
1478 
1479 // ------------------------------------------------------------------
1480 // Private zone offset format/parse implementation
1481 
1482 UnicodeString&
formatOffsetISO8601(int32_t offset,UBool isBasic,UBool useUtcIndicator,UBool isShort,UBool ignoreSeconds,UnicodeString & result,UErrorCode & status) const1483 TimeZoneFormat::formatOffsetISO8601(int32_t offset, UBool isBasic, UBool useUtcIndicator,
1484         UBool isShort, UBool ignoreSeconds, UnicodeString& result, UErrorCode& status) const {
1485     if (U_FAILURE(status)) {
1486         result.setToBogus();
1487         return result;
1488     }
1489     int32_t absOffset = offset < 0 ? -offset : offset;
1490     if (useUtcIndicator && (absOffset < MILLIS_PER_SECOND || (ignoreSeconds && absOffset < MILLIS_PER_MINUTE))) {
1491         result.setTo(ISO8601_UTC);
1492         return result;
1493     }
1494 
1495     OffsetFields minFields = isShort ? FIELDS_H : FIELDS_HM;
1496     OffsetFields maxFields = ignoreSeconds ? FIELDS_HM : FIELDS_HMS;
1497     char16_t sep = isBasic ? 0 : ISO8601_SEP;
1498 
1499     // Note: FIELDS_HMS as maxFields is a CLDR/ICU extension. ISO 8601 specification does
1500     // not support seconds field.
1501 
1502     if (absOffset >= MAX_OFFSET) {
1503         result.setToBogus();
1504         status = U_ILLEGAL_ARGUMENT_ERROR;
1505         return result;
1506     }
1507 
1508     int fields[3];
1509     fields[0] = absOffset / MILLIS_PER_HOUR;
1510     absOffset = absOffset % MILLIS_PER_HOUR;
1511     fields[1] = absOffset / MILLIS_PER_MINUTE;
1512     absOffset = absOffset % MILLIS_PER_MINUTE;
1513     fields[2] = absOffset / MILLIS_PER_SECOND;
1514 
1515     U_ASSERT(fields[0] >= 0 && fields[0] <= MAX_OFFSET_HOUR);
1516     U_ASSERT(fields[1] >= 0 && fields[1] <= MAX_OFFSET_MINUTE);
1517     U_ASSERT(fields[2] >= 0 && fields[2] <= MAX_OFFSET_SECOND);
1518 
1519     int32_t lastIdx = maxFields;
1520     while (lastIdx > minFields) {
1521         if (fields[lastIdx] != 0) {
1522             break;
1523         }
1524         lastIdx--;
1525     }
1526 
1527     char16_t sign = PLUS;
1528     if (offset < 0) {
1529         // if all output fields are 0s, do not use negative sign
1530         for (int32_t idx = 0; idx <= lastIdx; idx++) {
1531             if (fields[idx] != 0) {
1532                 sign = MINUS;
1533                 break;
1534             }
1535         }
1536     }
1537     result.setTo(sign);
1538 
1539     for (int32_t idx = 0; idx <= lastIdx; idx++) {
1540         if (sep && idx != 0) {
1541             result.append(sep);
1542         }
1543         result.append((char16_t)(0x0030 + fields[idx]/10));
1544         result.append((char16_t)(0x0030 + fields[idx]%10));
1545     }
1546 
1547     return result;
1548 }
1549 
1550 UnicodeString&
formatOffsetLocalizedGMT(int32_t offset,UBool isShort,UnicodeString & result,UErrorCode & status) const1551 TimeZoneFormat::formatOffsetLocalizedGMT(int32_t offset, UBool isShort, UnicodeString& result, UErrorCode& status) const {
1552     if (U_FAILURE(status)) {
1553         result.setToBogus();
1554         return result;
1555     }
1556     if (offset <= -MAX_OFFSET || offset >= MAX_OFFSET) {
1557         result.setToBogus();
1558         status = U_ILLEGAL_ARGUMENT_ERROR;
1559         return result;
1560     }
1561 
1562     if (offset == 0) {
1563         result.setTo(fGMTZeroFormat);
1564         return result;
1565     }
1566 
1567     UBool positive = true;
1568     if (offset < 0) {
1569         offset = -offset;
1570         positive = false;
1571     }
1572 
1573     int32_t offsetH = offset / MILLIS_PER_HOUR;
1574     offset = offset % MILLIS_PER_HOUR;
1575     int32_t offsetM = offset / MILLIS_PER_MINUTE;
1576     offset = offset % MILLIS_PER_MINUTE;
1577     int32_t offsetS = offset / MILLIS_PER_SECOND;
1578 
1579     U_ASSERT(offsetH <= MAX_OFFSET_HOUR && offsetM <= MAX_OFFSET_MINUTE && offsetS <= MAX_OFFSET_SECOND);
1580 
1581     const UVector* offsetPatternItems = nullptr;
1582     if (positive) {
1583         if (offsetS != 0) {
1584             offsetPatternItems = fGMTOffsetPatternItems[UTZFMT_PAT_POSITIVE_HMS];
1585         } else if (offsetM != 0 || !isShort) {
1586             offsetPatternItems = fGMTOffsetPatternItems[UTZFMT_PAT_POSITIVE_HM];
1587         } else {
1588             offsetPatternItems = fGMTOffsetPatternItems[UTZFMT_PAT_POSITIVE_H];
1589         }
1590     } else {
1591         if (offsetS != 0) {
1592             offsetPatternItems = fGMTOffsetPatternItems[UTZFMT_PAT_NEGATIVE_HMS];
1593         } else if (offsetM != 0 || !isShort) {
1594             offsetPatternItems = fGMTOffsetPatternItems[UTZFMT_PAT_NEGATIVE_HM];
1595         } else {
1596             offsetPatternItems = fGMTOffsetPatternItems[UTZFMT_PAT_NEGATIVE_H];
1597         }
1598     }
1599 
1600     U_ASSERT(offsetPatternItems != nullptr);
1601 
1602     // Building the GMT format string
1603     result.setTo(fGMTPatternPrefix);
1604 
1605     for (int32_t i = 0; i < offsetPatternItems->size(); i++) {
1606         const GMTOffsetField* item = (GMTOffsetField*)offsetPatternItems->elementAt(i);
1607         GMTOffsetField::FieldType type = item->getType();
1608 
1609         switch (type) {
1610         case GMTOffsetField::TEXT:
1611             result.append(item->getPatternText(), -1);
1612             break;
1613 
1614         case GMTOffsetField::HOUR:
1615             appendOffsetDigits(result, offsetH, (isShort ? 1 : 2));
1616             break;
1617 
1618         case GMTOffsetField::MINUTE:
1619             appendOffsetDigits(result, offsetM, 2);
1620             break;
1621 
1622         case GMTOffsetField::SECOND:
1623             appendOffsetDigits(result, offsetS, 2);
1624             break;
1625         }
1626     }
1627 
1628     result.append(fGMTPatternSuffix);
1629     return result;
1630 }
1631 
1632 int32_t
parseOffsetISO8601(const UnicodeString & text,ParsePosition & pos,UBool extendedOnly,UBool * hasDigitOffset) const1633 TimeZoneFormat::parseOffsetISO8601(const UnicodeString& text, ParsePosition& pos, UBool extendedOnly, UBool* hasDigitOffset /* = nullptr */) const {
1634     if (hasDigitOffset) {
1635         *hasDigitOffset = false;
1636     }
1637     int32_t start = pos.getIndex();
1638     if (start >= text.length()) {
1639         pos.setErrorIndex(start);
1640         return 0;
1641     }
1642 
1643     char16_t firstChar = text.charAt(start);
1644     if (firstChar == ISO8601_UTC || firstChar == (char16_t)(ISO8601_UTC + 0x20)) {
1645         // "Z" (or "z") - indicates UTC
1646         pos.setIndex(start + 1);
1647         return 0;
1648     }
1649 
1650     int32_t sign = 1;
1651     if (firstChar == PLUS) {
1652         sign = 1;
1653     } else if (firstChar == MINUS) {
1654         sign = -1;
1655     } else {
1656         // Not an ISO 8601 offset string
1657         pos.setErrorIndex(start);
1658         return 0;
1659     }
1660     ParsePosition posOffset(start + 1);
1661     int32_t offset = parseAsciiOffsetFields(text, posOffset, ISO8601_SEP, FIELDS_H, FIELDS_HMS);
1662     if (posOffset.getErrorIndex() == -1 && !extendedOnly && (posOffset.getIndex() - start <= 3)) {
1663         // If the text is successfully parsed as extended format with the options above, it can be also parsed
1664         // as basic format. For example, "0230" can be parsed as offset 2:00 (only first digits are valid for
1665         // extended format), but it can be parsed as offset 2:30 with basic format. We use longer result.
1666         ParsePosition posBasic(start + 1);
1667         int32_t tmpOffset = parseAbuttingAsciiOffsetFields(text, posBasic, FIELDS_H, FIELDS_HMS, false);
1668         if (posBasic.getErrorIndex() == -1 && posBasic.getIndex() > posOffset.getIndex()) {
1669             offset = tmpOffset;
1670             posOffset.setIndex(posBasic.getIndex());
1671         }
1672     }
1673 
1674     if (posOffset.getErrorIndex() != -1) {
1675         pos.setErrorIndex(start);
1676         return 0;
1677     }
1678 
1679     pos.setIndex(posOffset.getIndex());
1680     if (hasDigitOffset) {
1681         *hasDigitOffset = true;
1682     }
1683     return sign * offset;
1684 }
1685 
1686 int32_t
parseOffsetLocalizedGMT(const UnicodeString & text,ParsePosition & pos,UBool isShort,UBool * hasDigitOffset) const1687 TimeZoneFormat::parseOffsetLocalizedGMT(const UnicodeString& text, ParsePosition& pos, UBool isShort, UBool* hasDigitOffset) const {
1688     int32_t start = pos.getIndex();
1689     int32_t offset = 0;
1690     int32_t parsedLength = 0;
1691 
1692     if (hasDigitOffset) {
1693         *hasDigitOffset = false;
1694     }
1695 
1696     offset = parseOffsetLocalizedGMTPattern(text, start, isShort, parsedLength);
1697 
1698     // For now, parseOffsetLocalizedGMTPattern handles both long and short
1699     // formats, no matter isShort is true or false. This might be changed in future
1700     // when strict parsing is necessary, or different set of patterns are used for
1701     // short/long formats.
1702 #if 0
1703     if (parsedLength == 0) {
1704         offset = parseOffsetLocalizedGMTPattern(text, start, !isShort, parsedLength);
1705     }
1706 #endif
1707 
1708     if (parsedLength > 0) {
1709         if (hasDigitOffset) {
1710             *hasDigitOffset = true;
1711         }
1712         pos.setIndex(start + parsedLength);
1713         return offset;
1714     }
1715 
1716     // Try the default patterns
1717     offset = parseOffsetDefaultLocalizedGMT(text, start, parsedLength);
1718     if (parsedLength > 0) {
1719         if (hasDigitOffset) {
1720             *hasDigitOffset = true;
1721         }
1722         pos.setIndex(start + parsedLength);
1723         return offset;
1724     }
1725 
1726     // Check if this is a GMT zero format
1727     if (text.caseCompare(start, fGMTZeroFormat.length(), fGMTZeroFormat, 0) == 0) {
1728         pos.setIndex(start + fGMTZeroFormat.length());
1729         return 0;
1730     }
1731 
1732     // Check if this is a default GMT zero format
1733     for (int32_t i = 0; ALT_GMT_STRINGS[i][0] != 0; i++) {
1734         const char16_t* defGMTZero = ALT_GMT_STRINGS[i];
1735         int32_t defGMTZeroLen = u_strlen(defGMTZero);
1736         if (text.caseCompare(start, defGMTZeroLen, defGMTZero, 0) == 0) {
1737             pos.setIndex(start + defGMTZeroLen);
1738             return 0;
1739         }
1740     }
1741 
1742     // Nothing matched
1743     pos.setErrorIndex(start);
1744     return 0;
1745 }
1746 
1747 int32_t
parseOffsetLocalizedGMTPattern(const UnicodeString & text,int32_t start,UBool,int32_t & parsedLen) const1748 TimeZoneFormat::parseOffsetLocalizedGMTPattern(const UnicodeString& text, int32_t start, UBool /*isShort*/, int32_t& parsedLen) const {
1749     int32_t idx = start;
1750     int32_t offset = 0;
1751     UBool parsed = false;
1752 
1753     do {
1754         // Prefix part
1755         int32_t len = fGMTPatternPrefix.length();
1756         if (len > 0 && text.caseCompare(idx, len, fGMTPatternPrefix, 0) != 0) {
1757             // prefix match failed
1758             break;
1759         }
1760         idx += len;
1761 
1762         // Offset part
1763         offset = parseOffsetFields(text, idx, false, len);
1764         if (len == 0) {
1765             // offset field match failed
1766             break;
1767         }
1768         idx += len;
1769 
1770         len = fGMTPatternSuffix.length();
1771         if (len > 0 && text.caseCompare(idx, len, fGMTPatternSuffix, 0) != 0) {
1772             // no suffix match
1773             break;
1774         }
1775         idx += len;
1776         parsed = true;
1777     } while (false);
1778 
1779     parsedLen = parsed ? idx - start : 0;
1780     return offset;
1781 }
1782 
1783 int32_t
parseOffsetFields(const UnicodeString & text,int32_t start,UBool,int32_t & parsedLen) const1784 TimeZoneFormat::parseOffsetFields(const UnicodeString& text, int32_t start, UBool /*isShort*/, int32_t& parsedLen) const {
1785     int32_t outLen = 0;
1786     int32_t offset = 0;
1787     int32_t sign = 1;
1788 
1789     parsedLen = 0;
1790 
1791     int32_t offsetH, offsetM, offsetS;
1792     offsetH = offsetM = offsetS = 0;
1793 
1794     for (int32_t patidx = 0; PARSE_GMT_OFFSET_TYPES[patidx] >= 0; patidx++) {
1795         int32_t gmtPatType = PARSE_GMT_OFFSET_TYPES[patidx];
1796         UVector* items = fGMTOffsetPatternItems[gmtPatType];
1797         U_ASSERT(items != nullptr);
1798 
1799         outLen = parseOffsetFieldsWithPattern(text, start, items, false, offsetH, offsetM, offsetS);
1800         if (outLen > 0) {
1801             sign = (gmtPatType == UTZFMT_PAT_POSITIVE_H || gmtPatType == UTZFMT_PAT_POSITIVE_HM || gmtPatType == UTZFMT_PAT_POSITIVE_HMS) ?
1802                 1 : -1;
1803             break;
1804         }
1805     }
1806 
1807     if (outLen > 0 && fAbuttingOffsetHoursAndMinutes) {
1808         // When hours field is sabutting minutes field,
1809         // the parse result above may not be appropriate.
1810         // For example, "01020" is parsed as 01:02: above,
1811         // but it should be parsed as 00:10:20.
1812         int32_t tmpLen = 0;
1813         int32_t tmpSign = 1;
1814         int32_t tmpH = 0;
1815         int32_t tmpM = 0;
1816         int32_t tmpS = 0;
1817 
1818         for (int32_t patidx = 0; PARSE_GMT_OFFSET_TYPES[patidx] >= 0; patidx++) {
1819             int32_t gmtPatType = PARSE_GMT_OFFSET_TYPES[patidx];
1820             UVector* items = fGMTOffsetPatternItems[gmtPatType];
1821             U_ASSERT(items != nullptr);
1822 
1823             // forcing parse to use single hour digit
1824             tmpLen = parseOffsetFieldsWithPattern(text, start, items, true, tmpH, tmpM, tmpS);
1825             if (tmpLen > 0) {
1826                 tmpSign = (gmtPatType == UTZFMT_PAT_POSITIVE_H || gmtPatType == UTZFMT_PAT_POSITIVE_HM || gmtPatType == UTZFMT_PAT_POSITIVE_HMS) ?
1827                     1 : -1;
1828                 break;
1829             }
1830         }
1831         if (tmpLen > outLen) {
1832             // Better parse result with single hour digit
1833             outLen = tmpLen;
1834             sign = tmpSign;
1835             offsetH = tmpH;
1836             offsetM = tmpM;
1837             offsetS = tmpS;
1838         }
1839     }
1840 
1841     if (outLen > 0) {
1842         offset = ((((offsetH * 60) + offsetM) * 60) + offsetS) * 1000 * sign;
1843         parsedLen = outLen;
1844     }
1845 
1846     return offset;
1847 }
1848 
1849 int32_t
parseOffsetFieldsWithPattern(const UnicodeString & text,int32_t start,UVector * patternItems,UBool forceSingleHourDigit,int32_t & hour,int32_t & min,int32_t & sec) const1850 TimeZoneFormat::parseOffsetFieldsWithPattern(const UnicodeString& text, int32_t start,
1851         UVector* patternItems, UBool forceSingleHourDigit, int32_t& hour, int32_t& min, int32_t& sec) const {
1852     UBool failed = false;
1853     int32_t offsetH, offsetM, offsetS;
1854     offsetH = offsetM = offsetS = 0;
1855     int32_t idx = start;
1856 
1857     for (int32_t i = 0; i < patternItems->size(); i++) {
1858         int32_t len = 0;
1859         const GMTOffsetField* field = (const GMTOffsetField*)patternItems->elementAt(i);
1860         GMTOffsetField::FieldType fieldType = field->getType();
1861         if (fieldType == GMTOffsetField::TEXT) {
1862             const char16_t* patStr = field->getPatternText();
1863             len = u_strlen(patStr);
1864             if (i == 0) {
1865                 // When TimeZoneFormat parse() is called from SimpleDateFormat,
1866                 // leading space characters might be truncated. If the first pattern text
1867                 // starts with such character (e.g. Bidi control), then we need to
1868                 // skip the leading space characters.
1869                 if (idx < text.length() && !PatternProps::isWhiteSpace(text.char32At(idx))) {
1870                     while (len > 0) {
1871                         UChar32 ch;
1872                         int32_t chLen;
1873                         U16_GET(patStr, 0, 0, len, ch);
1874                         if (PatternProps::isWhiteSpace(ch)) {
1875                             chLen = U16_LENGTH(ch);
1876                             len -= chLen;
1877                             patStr += chLen;
1878                         }
1879                         else {
1880                             break;
1881                         }
1882                     }
1883                 }
1884             }
1885             if (text.caseCompare(idx, len, patStr, 0) != 0) {
1886                 failed = true;
1887                 break;
1888             }
1889             idx += len;
1890         } else {
1891             if (fieldType == GMTOffsetField::HOUR) {
1892                 uint8_t maxDigits = forceSingleHourDigit ? 1 : 2;
1893                 offsetH = parseOffsetFieldWithLocalizedDigits(text, idx, 1, maxDigits, 0, MAX_OFFSET_HOUR, len);
1894             } else if (fieldType == GMTOffsetField::MINUTE) {
1895                 offsetM = parseOffsetFieldWithLocalizedDigits(text, idx, 2, 2, 0, MAX_OFFSET_MINUTE, len);
1896             } else if (fieldType == GMTOffsetField::SECOND) {
1897                 offsetS = parseOffsetFieldWithLocalizedDigits(text, idx, 2, 2, 0, MAX_OFFSET_SECOND, len);
1898             }
1899 
1900             if (len == 0) {
1901                 failed = true;
1902                 break;
1903             }
1904             idx += len;
1905         }
1906     }
1907 
1908     if (failed) {
1909         hour = min = sec = 0;
1910         return 0;
1911     }
1912 
1913     hour = offsetH;
1914     min = offsetM;
1915     sec = offsetS;
1916 
1917     return idx - start;
1918 }
1919 
1920 int32_t
parseAbuttingOffsetFields(const UnicodeString & text,int32_t start,int32_t & parsedLen) const1921 TimeZoneFormat::parseAbuttingOffsetFields(const UnicodeString& text, int32_t start, int32_t& parsedLen) const {
1922     int32_t digits[MAX_OFFSET_DIGITS];
1923     int32_t parsed[MAX_OFFSET_DIGITS];  // accumulative offsets
1924 
1925     // Parse digits into int[]
1926     int32_t idx = start;
1927     int32_t len = 0;
1928     int32_t numDigits = 0;
1929     for (int32_t i = 0; i < MAX_OFFSET_DIGITS; i++) {
1930         digits[i] = parseSingleLocalizedDigit(text, idx, len);
1931         if (digits[i] < 0) {
1932             break;
1933         }
1934         idx += len;
1935         parsed[i] = idx - start;
1936         numDigits++;
1937     }
1938 
1939     if (numDigits == 0) {
1940         parsedLen = 0;
1941         return 0;
1942     }
1943 
1944     int32_t offset = 0;
1945     while (numDigits > 0) {
1946         int32_t hour = 0;
1947         int32_t min = 0;
1948         int32_t sec = 0;
1949 
1950         U_ASSERT(numDigits > 0 && numDigits <= MAX_OFFSET_DIGITS);
1951         switch (numDigits) {
1952         case 1: // H
1953             hour = digits[0];
1954             break;
1955         case 2: // HH
1956             hour = digits[0] * 10 + digits[1];
1957             break;
1958         case 3: // Hmm
1959             hour = digits[0];
1960             min = digits[1] * 10 + digits[2];
1961             break;
1962         case 4: // HHmm
1963             hour = digits[0] * 10 + digits[1];
1964             min = digits[2] * 10 + digits[3];
1965             break;
1966         case 5: // Hmmss
1967             hour = digits[0];
1968             min = digits[1] * 10 + digits[2];
1969             sec = digits[3] * 10 + digits[4];
1970             break;
1971         case 6: // HHmmss
1972             hour = digits[0] * 10 + digits[1];
1973             min = digits[2] * 10 + digits[3];
1974             sec = digits[4] * 10 + digits[5];
1975             break;
1976         }
1977         if (hour <= MAX_OFFSET_HOUR && min <= MAX_OFFSET_MINUTE && sec <= MAX_OFFSET_SECOND) {
1978             // found a valid combination
1979             offset = hour * MILLIS_PER_HOUR + min * MILLIS_PER_MINUTE + sec * MILLIS_PER_SECOND;
1980             parsedLen = parsed[numDigits - 1];
1981             break;
1982         }
1983         numDigits--;
1984     }
1985     return offset;
1986 }
1987 
1988 int32_t
parseOffsetDefaultLocalizedGMT(const UnicodeString & text,int start,int32_t & parsedLen) const1989 TimeZoneFormat::parseOffsetDefaultLocalizedGMT(const UnicodeString& text, int start, int32_t& parsedLen) const {
1990     int32_t idx = start;
1991     int32_t offset = 0;
1992     int32_t parsed = 0;
1993 
1994     do {
1995         // check global default GMT alternatives
1996         int32_t gmtLen = 0;
1997 
1998         for (int32_t i = 0; ALT_GMT_STRINGS[i][0] != 0; i++) {
1999             const char16_t* gmt = ALT_GMT_STRINGS[i];
2000             int32_t len = u_strlen(gmt);
2001             if (text.caseCompare(start, len, gmt, 0) == 0) {
2002                 gmtLen = len;
2003                 break;
2004             }
2005         }
2006         if (gmtLen == 0) {
2007             break;
2008         }
2009         idx += gmtLen;
2010 
2011         // offset needs a sign char and a digit at minimum
2012         if (idx + 1 >= text.length()) {
2013             break;
2014         }
2015 
2016         // parse sign
2017         int32_t sign = 1;
2018         char16_t c = text.charAt(idx);
2019         if (c == PLUS) {
2020             sign = 1;
2021         } else if (c == MINUS) {
2022             sign = -1;
2023         } else {
2024             break;
2025         }
2026         idx++;
2027 
2028         // offset part
2029         // try the default pattern with the separator first
2030         int32_t lenWithSep = 0;
2031         int32_t offsetWithSep = parseDefaultOffsetFields(text, idx, DEFAULT_GMT_OFFSET_SEP, lenWithSep);
2032         if (lenWithSep == text.length() - idx) {
2033             // maximum match
2034             offset = offsetWithSep * sign;
2035             idx += lenWithSep;
2036         } else {
2037             // try abutting field pattern
2038             int32_t lenAbut = 0;
2039             int32_t offsetAbut = parseAbuttingOffsetFields(text, idx, lenAbut);
2040 
2041             if (lenWithSep > lenAbut) {
2042                 offset = offsetWithSep * sign;
2043                 idx += lenWithSep;
2044             } else {
2045                 offset = offsetAbut * sign;
2046                 idx += lenAbut;
2047             }
2048         }
2049         parsed = idx - start;
2050     } while (false);
2051 
2052     parsedLen = parsed;
2053     return offset;
2054 }
2055 
2056 int32_t
parseDefaultOffsetFields(const UnicodeString & text,int32_t start,char16_t separator,int32_t & parsedLen) const2057 TimeZoneFormat::parseDefaultOffsetFields(const UnicodeString& text, int32_t start, char16_t separator, int32_t& parsedLen) const {
2058     int32_t max = text.length();
2059     int32_t idx = start;
2060     int32_t len = 0;
2061     int32_t hour = 0, min = 0, sec = 0;
2062 
2063     parsedLen = 0;
2064 
2065     do {
2066         hour = parseOffsetFieldWithLocalizedDigits(text, idx, 1, 2, 0, MAX_OFFSET_HOUR, len);
2067         if (len == 0) {
2068             break;
2069         }
2070         idx += len;
2071 
2072         if (idx + 1 < max && text.charAt(idx) == separator) {
2073             min = parseOffsetFieldWithLocalizedDigits(text, idx + 1, 2, 2, 0, MAX_OFFSET_MINUTE, len);
2074             if (len == 0) {
2075                 break;
2076             }
2077             idx += (1 + len);
2078 
2079             if (idx + 1 < max && text.charAt(idx) == separator) {
2080                 sec = parseOffsetFieldWithLocalizedDigits(text, idx + 1, 2, 2, 0, MAX_OFFSET_SECOND, len);
2081                 if (len == 0) {
2082                     break;
2083                 }
2084                 idx += (1 + len);
2085             }
2086         }
2087     } while (false);
2088 
2089     if (idx == start) {
2090         return 0;
2091     }
2092 
2093     parsedLen = idx - start;
2094     return hour * MILLIS_PER_HOUR + min * MILLIS_PER_MINUTE + sec * MILLIS_PER_SECOND;
2095 }
2096 
2097 int32_t
parseOffsetFieldWithLocalizedDigits(const UnicodeString & text,int32_t start,uint8_t minDigits,uint8_t maxDigits,uint16_t minVal,uint16_t maxVal,int32_t & parsedLen) const2098 TimeZoneFormat::parseOffsetFieldWithLocalizedDigits(const UnicodeString& text, int32_t start, uint8_t minDigits, uint8_t maxDigits, uint16_t minVal, uint16_t maxVal, int32_t& parsedLen) const {
2099     parsedLen = 0;
2100 
2101     int32_t decVal = 0;
2102     int32_t numDigits = 0;
2103     int32_t idx = start;
2104     int32_t digitLen = 0;
2105 
2106     while (idx < text.length() && numDigits < maxDigits) {
2107         int32_t digit = parseSingleLocalizedDigit(text, idx, digitLen);
2108         if (digit < 0) {
2109             break;
2110         }
2111         int32_t tmpVal = decVal * 10 + digit;
2112         if (tmpVal > maxVal) {
2113             break;
2114         }
2115         decVal = tmpVal;
2116         numDigits++;
2117         idx += digitLen;
2118     }
2119 
2120     // Note: maxVal is checked in the while loop
2121     if (numDigits < minDigits || decVal < minVal) {
2122         decVal = -1;
2123         numDigits = 0;
2124     } else {
2125         parsedLen = idx - start;
2126     }
2127 
2128     return decVal;
2129 }
2130 
2131 int32_t
parseSingleLocalizedDigit(const UnicodeString & text,int32_t start,int32_t & len) const2132 TimeZoneFormat::parseSingleLocalizedDigit(const UnicodeString& text, int32_t start, int32_t& len) const {
2133     int32_t digit = -1;
2134     len = 0;
2135     if (start < text.length()) {
2136         UChar32 cp = text.char32At(start);
2137 
2138         // First, try digits configured for this instance
2139         for (int32_t i = 0; i < 10; i++) {
2140             if (cp == fGMTOffsetDigits[i]) {
2141                 digit = i;
2142                 break;
2143             }
2144         }
2145         // If failed, check if this is a Unicode digit
2146         if (digit < 0) {
2147             int32_t tmp = u_charDigitValue(cp);
2148             digit = (tmp >= 0 && tmp <= 9) ? tmp : -1;
2149         }
2150 
2151         if (digit >= 0) {
2152             int32_t next = text.moveIndex32(start, 1);
2153             len = next - start;
2154         }
2155     }
2156     return digit;
2157 }
2158 
2159 UnicodeString&
formatOffsetWithAsciiDigits(int32_t offset,char16_t sep,OffsetFields minFields,OffsetFields maxFields,UnicodeString & result)2160 TimeZoneFormat::formatOffsetWithAsciiDigits(int32_t offset, char16_t sep, OffsetFields minFields, OffsetFields maxFields, UnicodeString& result) {
2161     U_ASSERT(maxFields >= minFields);
2162     U_ASSERT(offset > -MAX_OFFSET && offset < MAX_OFFSET);
2163 
2164     char16_t sign = PLUS;
2165     if (offset < 0) {
2166         sign = MINUS;
2167         offset = -offset;
2168     }
2169     result.setTo(sign);
2170 
2171     int fields[3];
2172     fields[0] = offset / MILLIS_PER_HOUR;
2173     offset = offset % MILLIS_PER_HOUR;
2174     fields[1] = offset / MILLIS_PER_MINUTE;
2175     offset = offset % MILLIS_PER_MINUTE;
2176     fields[2] = offset / MILLIS_PER_SECOND;
2177 
2178     U_ASSERT(fields[0] >= 0 && fields[0] <= MAX_OFFSET_HOUR);
2179     U_ASSERT(fields[1] >= 0 && fields[1] <= MAX_OFFSET_MINUTE);
2180     U_ASSERT(fields[2] >= 0 && fields[2] <= MAX_OFFSET_SECOND);
2181 
2182     int32_t lastIdx = maxFields;
2183     while (lastIdx > minFields) {
2184         if (fields[lastIdx] != 0) {
2185             break;
2186         }
2187         lastIdx--;
2188     }
2189 
2190     for (int32_t idx = 0; idx <= lastIdx; idx++) {
2191         if (sep && idx != 0) {
2192             result.append(sep);
2193         }
2194         result.append((char16_t)(0x0030 + fields[idx]/10));
2195         result.append((char16_t)(0x0030 + fields[idx]%10));
2196     }
2197 
2198     return result;
2199 }
2200 
2201 int32_t
parseAbuttingAsciiOffsetFields(const UnicodeString & text,ParsePosition & pos,OffsetFields minFields,OffsetFields maxFields,UBool fixedHourWidth)2202 TimeZoneFormat::parseAbuttingAsciiOffsetFields(const UnicodeString& text, ParsePosition& pos, OffsetFields minFields, OffsetFields maxFields, UBool fixedHourWidth) {
2203     int32_t start = pos.getIndex();
2204 
2205     int32_t minDigits = 2 * (minFields + 1) - (fixedHourWidth ? 0 : 1);
2206     int32_t maxDigits = 2 * (maxFields + 1);
2207 
2208     U_ASSERT(maxDigits <= MAX_OFFSET_DIGITS);
2209 
2210     int32_t digits[MAX_OFFSET_DIGITS] = {};
2211     int32_t numDigits = 0;
2212     int32_t idx = start;
2213     while (numDigits < maxDigits && idx < text.length()) {
2214         char16_t uch = text.charAt(idx);
2215         int32_t digit = DIGIT_VAL(uch);
2216         if (digit < 0) {
2217             break;
2218         }
2219         digits[numDigits] = digit;
2220         numDigits++;
2221         idx++;
2222     }
2223 
2224     if (fixedHourWidth && (numDigits & 1)) {
2225         // Fixed digits, so the number of digits must be even number. Truncating.
2226         numDigits--;
2227     }
2228 
2229     if (numDigits < minDigits) {
2230         pos.setErrorIndex(start);
2231         return 0;
2232     }
2233 
2234     int32_t hour = 0, min = 0, sec = 0;
2235     UBool bParsed = false;
2236     while (numDigits >= minDigits) {
2237         switch (numDigits) {
2238         case 1: //H
2239             hour = digits[0];
2240             break;
2241         case 2: //HH
2242             hour = digits[0] * 10 + digits[1];
2243             break;
2244         case 3: //Hmm
2245             hour = digits[0];
2246             min = digits[1] * 10 + digits[2];
2247             break;
2248         case 4: //HHmm
2249             hour = digits[0] * 10 + digits[1];
2250             min = digits[2] * 10 + digits[3];
2251             break;
2252         case 5: //Hmmss
2253             hour = digits[0];
2254             min = digits[1] * 10 + digits[2];
2255             sec = digits[3] * 10 + digits[4];
2256             break;
2257         case 6: //HHmmss
2258             hour = digits[0] * 10 + digits[1];
2259             min = digits[2] * 10 + digits[3];
2260             sec = digits[4] * 10 + digits[5];
2261             break;
2262         }
2263 
2264         if (hour <= MAX_OFFSET_HOUR && min <= MAX_OFFSET_MINUTE && sec <= MAX_OFFSET_SECOND) {
2265             // Successfully parsed
2266             bParsed = true;
2267             break;
2268         }
2269 
2270         // Truncating
2271         numDigits -= (fixedHourWidth ? 2 : 1);
2272         hour = min = sec = 0;
2273     }
2274 
2275     if (!bParsed) {
2276         pos.setErrorIndex(start);
2277         return 0;
2278     }
2279     pos.setIndex(start + numDigits);
2280     return ((((hour * 60) + min) * 60) + sec) * 1000;
2281 }
2282 
2283 int32_t
parseAsciiOffsetFields(const UnicodeString & text,ParsePosition & pos,char16_t sep,OffsetFields minFields,OffsetFields maxFields)2284 TimeZoneFormat::parseAsciiOffsetFields(const UnicodeString& text, ParsePosition& pos, char16_t sep, OffsetFields minFields, OffsetFields maxFields) {
2285     int32_t start = pos.getIndex();
2286     int32_t fieldVal[] = {0, 0, 0};
2287     int32_t fieldLen[] = {0, -1, -1};
2288     for (int32_t idx = start, fieldIdx = 0; idx < text.length() && fieldIdx <= maxFields; idx++) {
2289         char16_t c = text.charAt(idx);
2290         if (c == sep) {
2291             if (fieldIdx == 0) {
2292                 if (fieldLen[0] == 0) {
2293                     // no hours field
2294                     break;
2295                 }
2296                 // 1 digit hour, move to next field
2297             } else {
2298                 if (fieldLen[fieldIdx] != -1) {
2299                     // premature minute or seconds field
2300                     break;
2301                 }
2302                 fieldLen[fieldIdx] = 0;
2303             }
2304             continue;
2305         } else if (fieldLen[fieldIdx] == -1) {
2306             // no separator after 2 digit field
2307             break;
2308         }
2309         int32_t digit = DIGIT_VAL(c);
2310         if (digit < 0) {
2311             // not a digit
2312             break;
2313         }
2314         fieldVal[fieldIdx] = fieldVal[fieldIdx] * 10 + digit;
2315         fieldLen[fieldIdx]++;
2316         if (fieldLen[fieldIdx] >= 2) {
2317             // parsed 2 digits, move to next field
2318             fieldIdx++;
2319         }
2320     }
2321 
2322     int32_t offset = 0;
2323     int32_t parsedLen = 0;
2324     int32_t parsedFields = -1;
2325     do {
2326         // hour
2327         if (fieldLen[0] == 0) {
2328             break;
2329         }
2330         if (fieldVal[0] > MAX_OFFSET_HOUR) {
2331             offset = (fieldVal[0] / 10) * MILLIS_PER_HOUR;
2332             parsedFields = FIELDS_H;
2333             parsedLen = 1;
2334             break;
2335         }
2336         offset = fieldVal[0] * MILLIS_PER_HOUR;
2337         parsedLen = fieldLen[0];
2338         parsedFields = FIELDS_H;
2339 
2340         // minute
2341         if (fieldLen[1] != 2 || fieldVal[1] > MAX_OFFSET_MINUTE) {
2342             break;
2343         }
2344         offset += fieldVal[1] * MILLIS_PER_MINUTE;
2345         parsedLen += (1 + fieldLen[1]);
2346         parsedFields = FIELDS_HM;
2347 
2348         // second
2349         if (fieldLen[2] != 2 || fieldVal[2] > MAX_OFFSET_SECOND) {
2350             break;
2351         }
2352         offset += fieldVal[2] * MILLIS_PER_SECOND;
2353         parsedLen += (1 + fieldLen[2]);
2354         parsedFields = FIELDS_HMS;
2355     } while (false);
2356 
2357     if (parsedFields < minFields) {
2358         pos.setErrorIndex(start);
2359         return 0;
2360     }
2361 
2362     pos.setIndex(start + parsedLen);
2363     return offset;
2364 }
2365 
2366 void
appendOffsetDigits(UnicodeString & buf,int32_t n,uint8_t minDigits) const2367 TimeZoneFormat::appendOffsetDigits(UnicodeString& buf, int32_t n, uint8_t minDigits) const {
2368     U_ASSERT(n >= 0 && n < 60);
2369     int32_t numDigits = n >= 10 ? 2 : 1;
2370     for (int32_t i = 0; i < minDigits - numDigits; i++) {
2371         buf.append(fGMTOffsetDigits[0]);
2372     }
2373     if (numDigits == 2) {
2374         buf.append(fGMTOffsetDigits[n / 10]);
2375     }
2376     buf.append(fGMTOffsetDigits[n % 10]);
2377 }
2378 
2379 // ------------------------------------------------------------------
2380 // Private misc
2381 void
initGMTPattern(const UnicodeString & gmtPattern,UErrorCode & status)2382 TimeZoneFormat::initGMTPattern(const UnicodeString& gmtPattern, UErrorCode& status) {
2383     if (U_FAILURE(status)) {
2384         return;
2385     }
2386     // This implementation not perfect, but sufficient practically.
2387     int32_t idx = gmtPattern.indexOf(ARG0, ARG0_LEN, 0);
2388     if (idx < 0) {
2389         status = U_ILLEGAL_ARGUMENT_ERROR;
2390         return;
2391     }
2392     fGMTPattern.setTo(gmtPattern);
2393     unquote(gmtPattern.tempSubString(0, idx), fGMTPatternPrefix);
2394     unquote(gmtPattern.tempSubString(idx + ARG0_LEN), fGMTPatternSuffix);
2395 }
2396 
2397 UnicodeString&
unquote(const UnicodeString & pattern,UnicodeString & result)2398 TimeZoneFormat::unquote(const UnicodeString& pattern, UnicodeString& result) {
2399     if (pattern.indexOf(SINGLEQUOTE) < 0) {
2400         result.setTo(pattern);
2401         return result;
2402     }
2403     result.remove();
2404     UBool isPrevQuote = false;
2405     UBool inQuote = false;
2406     for (int32_t i = 0; i < pattern.length(); i++) {
2407         char16_t c = pattern.charAt(i);
2408         if (c == SINGLEQUOTE) {
2409             if (isPrevQuote) {
2410                 result.append(c);
2411                 isPrevQuote = false;
2412             } else {
2413                 isPrevQuote = true;
2414             }
2415             inQuote = !inQuote;
2416         } else {
2417             isPrevQuote = false;
2418             result.append(c);
2419         }
2420     }
2421     return result;
2422 }
2423 
2424 UVector*
parseOffsetPattern(const UnicodeString & pattern,OffsetFields required,UErrorCode & status)2425 TimeZoneFormat::parseOffsetPattern(const UnicodeString& pattern, OffsetFields required, UErrorCode& status) {
2426     if (U_FAILURE(status)) {
2427         return nullptr;
2428     }
2429     UVector* result = new UVector(deleteGMTOffsetField, nullptr, status);
2430     if (result == nullptr) {
2431         status = U_MEMORY_ALLOCATION_ERROR;
2432         return nullptr;
2433     }
2434 
2435     int32_t checkBits = 0;
2436     UBool isPrevQuote = false;
2437     UBool inQuote = false;
2438     char16_t textBuf[32];
2439     UnicodeString text(textBuf, 0, UPRV_LENGTHOF(textBuf));
2440     GMTOffsetField::FieldType itemType = GMTOffsetField::TEXT;
2441     int32_t itemLength = 1;
2442 
2443     for (int32_t i = 0; i < pattern.length(); i++) {
2444         char16_t ch = pattern.charAt(i);
2445         if (ch == SINGLEQUOTE) {
2446             if (isPrevQuote) {
2447                 text.append(SINGLEQUOTE);
2448                 isPrevQuote = false;
2449             } else {
2450                 isPrevQuote = true;
2451                 if (itemType != GMTOffsetField::TEXT) {
2452                     if (GMTOffsetField::isValid(itemType, itemLength)) {
2453                         GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, static_cast<uint8_t>(itemLength), status);
2454                         result->adoptElement(fld, status);
2455                         if (U_FAILURE(status)) {
2456                             break;
2457                         }
2458                     } else {
2459                         status = U_ILLEGAL_ARGUMENT_ERROR;
2460                         break;
2461                     }
2462                     itemType = GMTOffsetField::TEXT;
2463                 }
2464             }
2465             inQuote = !inQuote;
2466         } else {
2467             isPrevQuote = false;
2468             if (inQuote) {
2469                 text.append(ch);
2470             } else {
2471                 GMTOffsetField::FieldType tmpType = GMTOffsetField::getTypeByLetter(ch);
2472                 if (tmpType != GMTOffsetField::TEXT) {
2473                     // an offset time pattern character
2474                     if (tmpType == itemType) {
2475                         itemLength++;
2476                     } else {
2477                         if (itemType == GMTOffsetField::TEXT) {
2478                             if (text.length() > 0) {
2479                                 GMTOffsetField* textfld = GMTOffsetField::createText(text, status);
2480                                 result->adoptElement(textfld, status);
2481                                 if (U_FAILURE(status)) {
2482                                     break;
2483                                 }
2484                                 text.remove();
2485                             }
2486                         } else {
2487                             if (GMTOffsetField::isValid(itemType, itemLength)) {
2488                                 GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, static_cast<uint8_t>(itemLength), status);
2489                                 result->adoptElement(fld, status);
2490                                 if (U_FAILURE(status)) {
2491                                     break;
2492                                 }
2493                             } else {
2494                                 status = U_ILLEGAL_ARGUMENT_ERROR;
2495                                 break;
2496                             }
2497                         }
2498                         itemType = tmpType;
2499                         itemLength = 1;
2500                         checkBits |= tmpType;
2501                     }
2502                 } else {
2503                     // a string literal
2504                     if (itemType != GMTOffsetField::TEXT) {
2505                         if (GMTOffsetField::isValid(itemType, itemLength)) {
2506                             GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, static_cast<uint8_t>(itemLength), status);
2507                             result->adoptElement(fld, status);
2508                             if (U_FAILURE(status)) {
2509                                 break;
2510                             }
2511                         } else {
2512                             status = U_ILLEGAL_ARGUMENT_ERROR;
2513                             break;
2514                         }
2515                         itemType = GMTOffsetField::TEXT;
2516                     }
2517                     text.append(ch);
2518                 }
2519             }
2520         }
2521     }
2522     // handle last item
2523     if (U_SUCCESS(status)) {
2524         if (itemType == GMTOffsetField::TEXT) {
2525             if (text.length() > 0) {
2526                 GMTOffsetField* tfld = GMTOffsetField::createText(text, status);
2527                 result->adoptElement(tfld, status);
2528             }
2529         } else {
2530             if (GMTOffsetField::isValid(itemType, itemLength)) {
2531                 GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, static_cast<uint8_t>(itemLength), status);
2532                 result->adoptElement(fld, status);
2533             } else {
2534                 status = U_ILLEGAL_ARGUMENT_ERROR;
2535             }
2536         }
2537 
2538         // Check all required fields are set
2539         if (U_SUCCESS(status)) {
2540             int32_t reqBits = 0;
2541             switch (required) {
2542             case FIELDS_H:
2543                 reqBits = GMTOffsetField::HOUR;
2544                 break;
2545             case FIELDS_HM:
2546                 reqBits = GMTOffsetField::HOUR | GMTOffsetField::MINUTE;
2547                 break;
2548             case FIELDS_HMS:
2549                 reqBits = GMTOffsetField::HOUR | GMTOffsetField::MINUTE | GMTOffsetField::SECOND;
2550                 break;
2551             }
2552             if (checkBits == reqBits) {
2553                 // all required fields are set, no extra fields
2554                 return result;
2555             }
2556         }
2557     }
2558 
2559     // error
2560     delete result;
2561     return nullptr;
2562 }
2563 
2564 UnicodeString&
expandOffsetPattern(const UnicodeString & offsetHM,UnicodeString & result,UErrorCode & status)2565 TimeZoneFormat::expandOffsetPattern(const UnicodeString& offsetHM, UnicodeString& result, UErrorCode& status) {
2566     result.setToBogus();
2567     if (U_FAILURE(status)) {
2568         return result;
2569     }
2570     U_ASSERT(u_strlen(DEFAULT_GMT_OFFSET_MINUTE_PATTERN) == 2);
2571 
2572     int32_t idx_mm = offsetHM.indexOf(DEFAULT_GMT_OFFSET_MINUTE_PATTERN, 2, 0);
2573     if (idx_mm < 0) {
2574         // Bad time zone hour pattern data
2575         status = U_ILLEGAL_ARGUMENT_ERROR;
2576         return result;
2577     }
2578 
2579     UnicodeString sep;
2580     int32_t idx_H = offsetHM.tempSubString(0, idx_mm).lastIndexOf((char16_t)0x0048 /* H */);
2581     if (idx_H >= 0) {
2582         sep = offsetHM.tempSubString(idx_H + 1, idx_mm - (idx_H + 1));
2583     }
2584     result.setTo(offsetHM.tempSubString(0, idx_mm + 2));
2585     result.append(sep);
2586     result.append(DEFAULT_GMT_OFFSET_SECOND_PATTERN, -1);
2587     result.append(offsetHM.tempSubString(idx_mm + 2));
2588     return result;
2589 }
2590 
2591 UnicodeString&
truncateOffsetPattern(const UnicodeString & offsetHM,UnicodeString & result,UErrorCode & status)2592 TimeZoneFormat::truncateOffsetPattern(const UnicodeString& offsetHM, UnicodeString& result, UErrorCode& status) {
2593     result.setToBogus();
2594     if (U_FAILURE(status)) {
2595         return result;
2596     }
2597     U_ASSERT(u_strlen(DEFAULT_GMT_OFFSET_MINUTE_PATTERN) == 2);
2598 
2599     int32_t idx_mm = offsetHM.indexOf(DEFAULT_GMT_OFFSET_MINUTE_PATTERN, 2, 0);
2600     if (idx_mm < 0) {
2601         // Bad time zone hour pattern data
2602         status = U_ILLEGAL_ARGUMENT_ERROR;
2603         return result;
2604     }
2605     char16_t HH[] = {0x0048, 0x0048};
2606     int32_t idx_HH = offsetHM.tempSubString(0, idx_mm).lastIndexOf(HH, 2, 0);
2607     if (idx_HH >= 0) {
2608         return result.setTo(offsetHM.tempSubString(0, idx_HH + 2));
2609     }
2610     int32_t idx_H = offsetHM.tempSubString(0, idx_mm).lastIndexOf((char16_t)0x0048, 0);
2611     if (idx_H >= 0) {
2612         return result.setTo(offsetHM.tempSubString(0, idx_H + 1));
2613     }
2614     // Bad time zone hour pattern data
2615     status = U_ILLEGAL_ARGUMENT_ERROR;
2616     return result;
2617 }
2618 
2619 void
initGMTOffsetPatterns(UErrorCode & status)2620 TimeZoneFormat::initGMTOffsetPatterns(UErrorCode& status) {
2621     for (int32_t type = 0; type < UTZFMT_PAT_COUNT; type++) {
2622         switch (type) {
2623         case UTZFMT_PAT_POSITIVE_H:
2624         case UTZFMT_PAT_NEGATIVE_H:
2625             fGMTOffsetPatternItems[type] = parseOffsetPattern(fGMTOffsetPatterns[type], FIELDS_H, status);
2626             break;
2627         case UTZFMT_PAT_POSITIVE_HM:
2628         case UTZFMT_PAT_NEGATIVE_HM:
2629             fGMTOffsetPatternItems[type] = parseOffsetPattern(fGMTOffsetPatterns[type], FIELDS_HM, status);
2630             break;
2631         case UTZFMT_PAT_POSITIVE_HMS:
2632         case UTZFMT_PAT_NEGATIVE_HMS:
2633             fGMTOffsetPatternItems[type] = parseOffsetPattern(fGMTOffsetPatterns[type], FIELDS_HMS, status);
2634             break;
2635         }
2636     }
2637     if (U_FAILURE(status)) {
2638         return;
2639     }
2640     checkAbuttingHoursAndMinutes();
2641 }
2642 
2643 void
checkAbuttingHoursAndMinutes()2644 TimeZoneFormat::checkAbuttingHoursAndMinutes() {
2645     fAbuttingOffsetHoursAndMinutes= false;
2646     for (int32_t type = 0; type < UTZFMT_PAT_COUNT; type++) {
2647         UBool afterH = false;
2648         UVector *items = fGMTOffsetPatternItems[type];
2649         for (int32_t i = 0; i < items->size(); i++) {
2650             const GMTOffsetField* item = (GMTOffsetField*)items->elementAt(i);
2651             GMTOffsetField::FieldType fieldType = item->getType();
2652             if (fieldType != GMTOffsetField::TEXT) {
2653                 if (afterH) {
2654                     fAbuttingOffsetHoursAndMinutes = true;
2655                     break;
2656                 } else if (fieldType == GMTOffsetField::HOUR) {
2657                     afterH = true;
2658                 }
2659             } else if (afterH) {
2660                 break;
2661             }
2662         }
2663         if (fAbuttingOffsetHoursAndMinutes) {
2664             break;
2665         }
2666     }
2667 }
2668 
2669 UBool
toCodePoints(const UnicodeString & str,UChar32 * codeArray,int32_t size)2670 TimeZoneFormat::toCodePoints(const UnicodeString& str, UChar32* codeArray, int32_t size) {
2671     int32_t count = str.countChar32();
2672     if (count != size) {
2673         return false;
2674     }
2675 
2676     for (int32_t idx = 0, start = 0; idx < size; idx++) {
2677         codeArray[idx] = str.char32At(start);
2678         start = str.moveIndex32(start, 1);
2679     }
2680 
2681     return true;
2682 }
2683 
2684 TimeZone*
createTimeZoneForOffset(int32_t offset) const2685 TimeZoneFormat::createTimeZoneForOffset(int32_t offset) const {
2686     if (offset == 0) {
2687         // when offset is 0, we should use "Etc/GMT"
2688         return TimeZone::createTimeZone(UnicodeString(true, TZID_GMT, -1));
2689     }
2690     return ZoneMeta::createCustomTimeZone(offset);
2691 }
2692 
2693 UTimeZoneFormatTimeType
getTimeType(UTimeZoneNameType nameType)2694 TimeZoneFormat::getTimeType(UTimeZoneNameType nameType) {
2695     switch (nameType) {
2696     case UTZNM_LONG_STANDARD:
2697     case UTZNM_SHORT_STANDARD:
2698         return UTZFMT_TIME_TYPE_STANDARD;
2699 
2700     case UTZNM_LONG_DAYLIGHT:
2701     case UTZNM_SHORT_DAYLIGHT:
2702         return UTZFMT_TIME_TYPE_DAYLIGHT;
2703 
2704     default:
2705         return UTZFMT_TIME_TYPE_UNKNOWN;
2706     }
2707 }
2708 
2709 UnicodeString&
getTimeZoneID(const TimeZoneNames::MatchInfoCollection * matches,int32_t idx,UnicodeString & tzID) const2710 TimeZoneFormat::getTimeZoneID(const TimeZoneNames::MatchInfoCollection* matches, int32_t idx, UnicodeString& tzID) const {
2711     if (!matches->getTimeZoneIDAt(idx, tzID)) {
2712         char16_t mzIDBuf[32];
2713         UnicodeString mzID(mzIDBuf, 0, UPRV_LENGTHOF(mzIDBuf));
2714         if (matches->getMetaZoneIDAt(idx, mzID)) {
2715             fTimeZoneNames->getReferenceZoneID(mzID, fTargetRegion, tzID);
2716         }
2717     }
2718     return tzID;
2719 }
2720 
2721 
2722 class ZoneIdMatchHandler : public TextTrieMapSearchResultHandler {
2723 public:
2724     ZoneIdMatchHandler();
2725     virtual ~ZoneIdMatchHandler();
2726 
2727     UBool handleMatch(int32_t matchLength, const CharacterNode *node, UErrorCode &status) override;
2728     const char16_t* getID();
2729     int32_t getMatchLen();
2730 private:
2731     int32_t fLen;
2732     const char16_t* fID;
2733 };
2734 
ZoneIdMatchHandler()2735 ZoneIdMatchHandler::ZoneIdMatchHandler()
2736 : fLen(0), fID(nullptr) {
2737 }
2738 
~ZoneIdMatchHandler()2739 ZoneIdMatchHandler::~ZoneIdMatchHandler() {
2740 }
2741 
2742 UBool
handleMatch(int32_t matchLength,const CharacterNode * node,UErrorCode & status)2743 ZoneIdMatchHandler::handleMatch(int32_t matchLength, const CharacterNode *node, UErrorCode &status) {
2744     if (U_FAILURE(status)) {
2745         return false;
2746     }
2747     if (node->hasValues()) {
2748         const char16_t* id = (const char16_t*)node->getValue(0);
2749         if (id != nullptr) {
2750             if (fLen < matchLength) {
2751                 fID = id;
2752                 fLen = matchLength;
2753             }
2754         }
2755     }
2756     return true;
2757 }
2758 
2759 const char16_t*
getID()2760 ZoneIdMatchHandler::getID() {
2761     return fID;
2762 }
2763 
2764 int32_t
getMatchLen()2765 ZoneIdMatchHandler::getMatchLen() {
2766     return fLen;
2767 }
2768 
2769 
initZoneIdTrie(UErrorCode & status)2770 static void U_CALLCONV initZoneIdTrie(UErrorCode &status) {
2771     U_ASSERT(gZoneIdTrie == nullptr);
2772     ucln_i18n_registerCleanup(UCLN_I18N_TIMEZONEFORMAT, tzfmt_cleanup);
2773     gZoneIdTrie = new TextTrieMap(true, nullptr);    // No deleter, because values are pooled by ZoneMeta
2774     if (gZoneIdTrie == nullptr) {
2775         status = U_MEMORY_ALLOCATION_ERROR;
2776         return;
2777     }
2778     StringEnumeration *tzenum = TimeZone::createEnumeration(status);
2779     if (U_SUCCESS(status)) {
2780         const UnicodeString *id;
2781         while ((id = tzenum->snext(status)) != nullptr) {
2782             const char16_t* uid = ZoneMeta::findTimeZoneID(*id);
2783             if (uid) {
2784                 gZoneIdTrie->put(uid, const_cast<char16_t *>(uid), status);
2785             }
2786         }
2787         delete tzenum;
2788     }
2789 }
2790 
2791 
2792 UnicodeString&
parseZoneID(const UnicodeString & text,ParsePosition & pos,UnicodeString & tzID) const2793 TimeZoneFormat::parseZoneID(const UnicodeString& text, ParsePosition& pos, UnicodeString& tzID) const {
2794     UErrorCode status = U_ZERO_ERROR;
2795     umtx_initOnce(gZoneIdTrieInitOnce, &initZoneIdTrie, status);
2796 
2797     int32_t start = pos.getIndex();
2798     int32_t len = 0;
2799     tzID.setToBogus();
2800 
2801     if (U_SUCCESS(status)) {
2802         LocalPointer<ZoneIdMatchHandler> handler(new ZoneIdMatchHandler());
2803         gZoneIdTrie->search(text, start, handler.getAlias(), status);
2804         len = handler->getMatchLen();
2805         if (len > 0) {
2806             tzID.setTo(handler->getID(), -1);
2807         }
2808     }
2809 
2810     if (len > 0) {
2811         pos.setIndex(start + len);
2812     } else {
2813         pos.setErrorIndex(start);
2814     }
2815 
2816     return tzID;
2817 }
2818 
initShortZoneIdTrie(UErrorCode & status)2819 static void U_CALLCONV initShortZoneIdTrie(UErrorCode &status) {
2820     U_ASSERT(gShortZoneIdTrie == nullptr);
2821     ucln_i18n_registerCleanup(UCLN_I18N_TIMEZONEFORMAT, tzfmt_cleanup);
2822     StringEnumeration *tzenum = TimeZone::createTimeZoneIDEnumeration(UCAL_ZONE_TYPE_CANONICAL, nullptr, nullptr, status);
2823     if (U_SUCCESS(status)) {
2824         gShortZoneIdTrie = new TextTrieMap(true, nullptr);    // No deleter, because values are pooled by ZoneMeta
2825         if (gShortZoneIdTrie == nullptr) {
2826             status = U_MEMORY_ALLOCATION_ERROR;
2827         } else {
2828             const UnicodeString *id;
2829             while ((id = tzenum->snext(status)) != nullptr) {
2830                 const char16_t* uID = ZoneMeta::findTimeZoneID(*id);
2831                 const char16_t* shortID = ZoneMeta::getShortID(*id);
2832                 if (shortID && uID) {
2833                     gShortZoneIdTrie->put(shortID, const_cast<char16_t *>(uID), status);
2834                 }
2835             }
2836         }
2837     }
2838     delete tzenum;
2839 }
2840 
2841 
2842 UnicodeString&
parseShortZoneID(const UnicodeString & text,ParsePosition & pos,UnicodeString & tzID) const2843 TimeZoneFormat::parseShortZoneID(const UnicodeString& text, ParsePosition& pos, UnicodeString& tzID) const {
2844     UErrorCode status = U_ZERO_ERROR;
2845     umtx_initOnce(gShortZoneIdTrieInitOnce, &initShortZoneIdTrie, status);
2846 
2847     int32_t start = pos.getIndex();
2848     int32_t len = 0;
2849     tzID.setToBogus();
2850 
2851     if (U_SUCCESS(status)) {
2852         LocalPointer<ZoneIdMatchHandler> handler(new ZoneIdMatchHandler());
2853         gShortZoneIdTrie->search(text, start, handler.getAlias(), status);
2854         len = handler->getMatchLen();
2855         if (len > 0) {
2856             tzID.setTo(handler->getID(), -1);
2857         }
2858     }
2859 
2860     if (len > 0) {
2861         pos.setIndex(start + len);
2862     } else {
2863         pos.setErrorIndex(start);
2864     }
2865 
2866     return tzID;
2867 }
2868 
2869 
2870 UnicodeString&
parseExemplarLocation(const UnicodeString & text,ParsePosition & pos,UnicodeString & tzID) const2871 TimeZoneFormat::parseExemplarLocation(const UnicodeString& text, ParsePosition& pos, UnicodeString& tzID) const {
2872     int32_t startIdx = pos.getIndex();
2873     int32_t parsedPos = -1;
2874     tzID.setToBogus();
2875 
2876     UErrorCode status = U_ZERO_ERROR;
2877     LocalPointer<TimeZoneNames::MatchInfoCollection> exemplarMatches(fTimeZoneNames->find(text, startIdx, UTZNM_EXEMPLAR_LOCATION, status));
2878     if (U_FAILURE(status)) {
2879         pos.setErrorIndex(startIdx);
2880         return tzID;
2881     }
2882     int32_t matchIdx = -1;
2883     if (!exemplarMatches.isNull()) {
2884         for (int32_t i = 0; i < exemplarMatches->size(); i++) {
2885             if (startIdx + exemplarMatches->getMatchLengthAt(i) > parsedPos) {
2886                 matchIdx = i;
2887                 parsedPos = startIdx + exemplarMatches->getMatchLengthAt(i);
2888             }
2889         }
2890         if (parsedPos > 0) {
2891             pos.setIndex(parsedPos);
2892             getTimeZoneID(exemplarMatches.getAlias(), matchIdx, tzID);
2893         }
2894     }
2895 
2896     if (tzID.length() == 0) {
2897         pos.setErrorIndex(startIdx);
2898     }
2899 
2900     return tzID;
2901 }
2902 
2903 U_NAMESPACE_END
2904 
2905 #endif
2906