1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1997-2016, International Business Machines Corporation and *
6 * others. All Rights Reserved. *
7 *******************************************************************************
8 *
9 * File SMPDTFMT.CPP
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 02/19/97 aliu Converted from java.
15 * 03/31/97 aliu Modified extensively to work with 50 locales.
16 * 04/01/97 aliu Added support for centuries.
17 * 07/09/97 helena Made ParsePosition into a class.
18 * 07/21/98 stephen Added initializeDefaultCentury.
19 * Removed getZoneIndex (added in DateFormatSymbols)
20 * Removed subParseLong
21 * Removed chk
22 * 02/22/99 stephen Removed character literals for EBCDIC safety
23 * 10/14/99 aliu Updated 2-digit year parsing so that only "00" thru
24 * "99" are recognized. {j28 4182066}
25 * 11/15/99 weiv Added support for week of year/day of week format
26 ********************************************************************************
27 */
28
29 #define ZID_KEY_MAX 128
30
31 #include "unicode/utypes.h"
32
33 #if !UCONFIG_NO_FORMATTING
34 #include "unicode/smpdtfmt.h"
35 #include "unicode/dtfmtsym.h"
36 #include "unicode/ures.h"
37 #include "unicode/msgfmt.h"
38 #include "unicode/calendar.h"
39 #include "unicode/gregocal.h"
40 #include "unicode/timezone.h"
41 #include "unicode/decimfmt.h"
42 #include "unicode/dcfmtsym.h"
43 #include "unicode/uchar.h"
44 #include "unicode/uniset.h"
45 #include "unicode/ustring.h"
46 #include "unicode/basictz.h"
47 #include "unicode/simpleformatter.h"
48 #include "unicode/simplenumberformatter.h"
49 #include "unicode/simpletz.h"
50 #include "unicode/rbtz.h"
51 #include "unicode/tzfmt.h"
52 #include "unicode/ucasemap.h"
53 #include "unicode/utf16.h"
54 #include "unicode/vtzone.h"
55 #include "unicode/udisplaycontext.h"
56 #include "unicode/brkiter.h"
57 #include "unicode/rbnf.h"
58 #include "unicode/dtptngen.h"
59 #include "uresimp.h"
60 #include "olsontz.h"
61 #include "patternprops.h"
62 #include "fphdlimp.h"
63 #include "hebrwcal.h"
64 #include "cstring.h"
65 #include "uassert.h"
66 #include "cmemory.h"
67 #include "umutex.h"
68 #include "mutex.h"
69 #include <float.h>
70 #include "smpdtfst.h"
71 #include "sharednumberformat.h"
72 #include "ucasemap_imp.h"
73 #include "ustr_imp.h"
74 #include "charstr.h"
75 #include "uvector.h"
76 #include "cstr.h"
77 #include "dayperiodrules.h"
78 #include "tznames_impl.h" // ZONE_NAME_U16_MAX
79 #include "number_utypes.h"
80
81 #if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL)
82 #include <stdio.h>
83 #endif
84
85 // *****************************************************************************
86 // class SimpleDateFormat
87 // *****************************************************************************
88
89 U_NAMESPACE_BEGIN
90
91 /**
92 * Last-resort string to use for "GMT" when constructing time zone strings.
93 */
94 // For time zones that have no names, use strings GMT+minutes and
95 // GMT-minutes. For instance, in France the time zone is GMT+60.
96 // Also accepted are GMT+H:MM or GMT-H:MM.
97 // Currently not being used
98 //static const char16_t gGmt[] = {0x0047, 0x004D, 0x0054, 0x0000}; // "GMT"
99 //static const char16_t gGmtPlus[] = {0x0047, 0x004D, 0x0054, 0x002B, 0x0000}; // "GMT+"
100 //static const char16_t gGmtMinus[] = {0x0047, 0x004D, 0x0054, 0x002D, 0x0000}; // "GMT-"
101 //static const char16_t gDefGmtPat[] = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0x0000}; /* GMT{0} */
102 //static const char16_t gDefGmtNegHmsPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* -HH:mm:ss */
103 //static const char16_t gDefGmtNegHmPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* -HH:mm */
104 //static const char16_t gDefGmtPosHmsPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* +HH:mm:ss */
105 //static const char16_t gDefGmtPosHmPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* +HH:mm */
106 //static const char16_t gUt[] = {0x0055, 0x0054, 0x0000}; // "UT"
107 //static const char16_t gUtc[] = {0x0055, 0x0054, 0x0043, 0x0000}; // "UT"
108
109 typedef enum GmtPatSize {
110 kGmtLen = 3,
111 kGmtPatLen = 6,
112 kNegHmsLen = 9,
113 kNegHmLen = 6,
114 kPosHmsLen = 9,
115 kPosHmLen = 6,
116 kUtLen = 2,
117 kUtcLen = 3
118 } GmtPatSize;
119
120 // Stuff needed for numbering system overrides
121
122 typedef enum OvrStrType {
123 kOvrStrDate = 0,
124 kOvrStrTime = 1,
125 kOvrStrBoth = 2
126 } OvrStrType;
127
128 static const UDateFormatField kDateFields[] = {
129 UDAT_YEAR_FIELD,
130 UDAT_MONTH_FIELD,
131 UDAT_DATE_FIELD,
132 UDAT_DAY_OF_YEAR_FIELD,
133 UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
134 UDAT_WEEK_OF_YEAR_FIELD,
135 UDAT_WEEK_OF_MONTH_FIELD,
136 UDAT_YEAR_WOY_FIELD,
137 UDAT_EXTENDED_YEAR_FIELD,
138 UDAT_JULIAN_DAY_FIELD,
139 UDAT_STANDALONE_DAY_FIELD,
140 UDAT_STANDALONE_MONTH_FIELD,
141 UDAT_QUARTER_FIELD,
142 UDAT_STANDALONE_QUARTER_FIELD,
143 UDAT_YEAR_NAME_FIELD,
144 UDAT_RELATED_YEAR_FIELD };
145 static const int8_t kDateFieldsCount = 16;
146
147 static const UDateFormatField kTimeFields[] = {
148 UDAT_HOUR_OF_DAY1_FIELD,
149 UDAT_HOUR_OF_DAY0_FIELD,
150 UDAT_MINUTE_FIELD,
151 UDAT_SECOND_FIELD,
152 UDAT_FRACTIONAL_SECOND_FIELD,
153 UDAT_HOUR1_FIELD,
154 UDAT_HOUR0_FIELD,
155 UDAT_MILLISECONDS_IN_DAY_FIELD,
156 UDAT_TIMEZONE_RFC_FIELD,
157 UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD };
158 static const int8_t kTimeFieldsCount = 10;
159
160
161 // This is a pattern-of-last-resort used when we can't load a usable pattern out
162 // of a resource.
163 static const char16_t gDefaultPattern[] =
164 {
165 0x79, 0x4D, 0x4D, 0x64, 0x64, 0x20, 0x68, 0x68, 0x3A, 0x6D, 0x6D, 0x20, 0x61, 0
166 }; /* "yMMdd hh:mm a" */
167
168 // This prefix is designed to NEVER MATCH real text, in order to
169 // suppress the parsing of negative numbers. Adjust as needed (if
170 // this becomes valid Unicode).
171 static const char16_t SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0};
172
173 /**
174 * These are the tags we expect to see in normal resource bundle files associated
175 * with a locale.
176 */
177 static const char16_t QUOTE = 0x27; // Single quote
178
179 /*
180 * The field range check bias for each UDateFormatField.
181 * The bias is added to the minimum and maximum values
182 * before they are compared to the parsed number.
183 * For example, the calendar stores zero-based month numbers
184 * but the parsed month numbers start at 1, so the bias is 1.
185 *
186 * A value of -1 means that the value is not checked.
187 */
188 static const int32_t gFieldRangeBias[] = {
189 -1, // 'G' - UDAT_ERA_FIELD
190 -1, // 'y' - UDAT_YEAR_FIELD
191 1, // 'M' - UDAT_MONTH_FIELD
192 0, // 'd' - UDAT_DATE_FIELD
193 -1, // 'k' - UDAT_HOUR_OF_DAY1_FIELD
194 -1, // 'H' - UDAT_HOUR_OF_DAY0_FIELD
195 0, // 'm' - UDAT_MINUTE_FIELD
196 0, // 's' - UDAT_SECOND_FIELD
197 -1, // 'S' - UDAT_FRACTIONAL_SECOND_FIELD (0-999?)
198 -1, // 'E' - UDAT_DAY_OF_WEEK_FIELD (1-7?)
199 -1, // 'D' - UDAT_DAY_OF_YEAR_FIELD (1 - 366?)
200 -1, // 'F' - UDAT_DAY_OF_WEEK_IN_MONTH_FIELD (1-5?)
201 -1, // 'w' - UDAT_WEEK_OF_YEAR_FIELD (1-52?)
202 -1, // 'W' - UDAT_WEEK_OF_MONTH_FIELD (1-5?)
203 -1, // 'a' - UDAT_AM_PM_FIELD
204 -1, // 'h' - UDAT_HOUR1_FIELD
205 -1, // 'K' - UDAT_HOUR0_FIELD
206 -1, // 'z' - UDAT_TIMEZONE_FIELD
207 -1, // 'Y' - UDAT_YEAR_WOY_FIELD
208 -1, // 'e' - UDAT_DOW_LOCAL_FIELD
209 -1, // 'u' - UDAT_EXTENDED_YEAR_FIELD
210 -1, // 'g' - UDAT_JULIAN_DAY_FIELD
211 -1, // 'A' - UDAT_MILLISECONDS_IN_DAY_FIELD
212 -1, // 'Z' - UDAT_TIMEZONE_RFC_FIELD
213 -1, // 'v' - UDAT_TIMEZONE_GENERIC_FIELD
214 0, // 'c' - UDAT_STANDALONE_DAY_FIELD
215 1, // 'L' - UDAT_STANDALONE_MONTH_FIELD
216 -1, // 'Q' - UDAT_QUARTER_FIELD (1-4?)
217 -1, // 'q' - UDAT_STANDALONE_QUARTER_FIELD
218 -1, // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD
219 -1, // 'U' - UDAT_YEAR_NAME_FIELD
220 -1, // 'O' - UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD
221 -1, // 'X' - UDAT_TIMEZONE_ISO_FIELD
222 -1, // 'x' - UDAT_TIMEZONE_ISO_LOCAL_FIELD
223 -1, // 'r' - UDAT_RELATED_YEAR_FIELD
224 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
225 -1, // ':' - UDAT_TIME_SEPARATOR_FIELD
226 #else
227 -1, // (no pattern character currently) - UDAT_TIME_SEPARATOR_FIELD
228 #endif
229 };
230
231 // When calendar uses hebr numbering (i.e. he@calendar=hebrew),
232 // offset the years within the current millennium down to 1-999
233 static const int32_t HEBREW_CAL_CUR_MILLENIUM_START_YEAR = 5000;
234 static const int32_t HEBREW_CAL_CUR_MILLENIUM_END_YEAR = 6000;
235
236 /**
237 * Maximum range for detecting daylight offset of a time zone when parsed time zone
238 * string indicates it's daylight saving time, but the detected time zone does not
239 * observe daylight saving time at the parsed date.
240 */
241 static const double MAX_DAYLIGHT_DETECTION_RANGE = 30*365*24*60*60*1000.0;
242
243 static UMutex LOCK;
244
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)245 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)
246
247 SimpleDateFormat::NSOverride::~NSOverride() {
248 if (snf != nullptr) {
249 snf->removeRef();
250 }
251 }
252
253
free()254 void SimpleDateFormat::NSOverride::free() {
255 NSOverride *cur = this;
256 while (cur) {
257 NSOverride *next_temp = cur->next;
258 delete cur;
259 cur = next_temp;
260 }
261 }
262
263 // no matter what the locale's default number format looked like, we want
264 // to modify it so that it doesn't use thousands separators, doesn't always
265 // show the decimal point, and recognizes integers only when parsing
fixNumberFormatForDates(NumberFormat & nf)266 static void fixNumberFormatForDates(NumberFormat &nf) {
267 nf.setGroupingUsed(false);
268 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(&nf);
269 if (decfmt != nullptr) {
270 decfmt->setDecimalSeparatorAlwaysShown(false);
271 }
272 nf.setParseIntegerOnly(true);
273 nf.setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00"
274 }
275
createSharedNumberFormat(NumberFormat * nfToAdopt)276 static const SharedNumberFormat *createSharedNumberFormat(
277 NumberFormat *nfToAdopt) {
278 fixNumberFormatForDates(*nfToAdopt);
279 const SharedNumberFormat *result = new SharedNumberFormat(nfToAdopt);
280 if (result == nullptr) {
281 delete nfToAdopt;
282 }
283 return result;
284 }
285
createSharedNumberFormat(const Locale & loc,UErrorCode & status)286 static const SharedNumberFormat *createSharedNumberFormat(
287 const Locale &loc, UErrorCode &status) {
288 NumberFormat *nf = NumberFormat::createInstance(loc, status);
289 if (U_FAILURE(status)) {
290 return nullptr;
291 }
292 const SharedNumberFormat *result = createSharedNumberFormat(nf);
293 if (result == nullptr) {
294 status = U_MEMORY_ALLOCATION_ERROR;
295 }
296 return result;
297 }
298
allocSharedNumberFormatters()299 static const SharedNumberFormat **allocSharedNumberFormatters() {
300 const SharedNumberFormat **result = (const SharedNumberFormat**)
301 uprv_malloc(UDAT_FIELD_COUNT * sizeof(const SharedNumberFormat*));
302 if (result == nullptr) {
303 return nullptr;
304 }
305 for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) {
306 result[i] = nullptr;
307 }
308 return result;
309 }
310
freeSharedNumberFormatters(const SharedNumberFormat ** list)311 static void freeSharedNumberFormatters(const SharedNumberFormat ** list) {
312 for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) {
313 SharedObject::clearPtr(list[i]);
314 }
315 uprv_free(list);
316 }
317
getNumberFormatByIndex(UDateFormatField index) const318 const NumberFormat *SimpleDateFormat::getNumberFormatByIndex(
319 UDateFormatField index) const {
320 if (fSharedNumberFormatters == nullptr ||
321 fSharedNumberFormatters[index] == nullptr) {
322 return fNumberFormat;
323 }
324 return &(**fSharedNumberFormatters[index]);
325 }
326
327 //----------------------------------------------------------------------
328
~SimpleDateFormat()329 SimpleDateFormat::~SimpleDateFormat()
330 {
331 delete fSymbols;
332 if (fSharedNumberFormatters) {
333 freeSharedNumberFormatters(fSharedNumberFormatters);
334 }
335 if (fTimeZoneFormat) {
336 delete fTimeZoneFormat;
337 }
338 delete fSimpleNumberFormatter;
339
340 #if !UCONFIG_NO_BREAK_ITERATION
341 delete fCapitalizationBrkIter;
342 #endif
343 }
344
345 //----------------------------------------------------------------------
346
SimpleDateFormat(UErrorCode & status)347 SimpleDateFormat::SimpleDateFormat(UErrorCode& status)
348 : fLocale(Locale::getDefault())
349 {
350 initializeBooleanAttributes();
351 construct(kShort, (EStyle) (kShort + kDateOffset), fLocale, status);
352 initializeDefaultCentury();
353 }
354
355 //----------------------------------------------------------------------
356
SimpleDateFormat(const UnicodeString & pattern,UErrorCode & status)357 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
358 UErrorCode &status)
359 : fPattern(pattern),
360 fLocale(Locale::getDefault())
361 {
362 fDateOverride.setToBogus();
363 fTimeOverride.setToBogus();
364 initializeBooleanAttributes();
365 initializeCalendar(nullptr,fLocale,status);
366 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
367 initialize(fLocale, status);
368 initializeDefaultCentury();
369
370 }
371 //----------------------------------------------------------------------
372
SimpleDateFormat(const UnicodeString & pattern,const UnicodeString & override,UErrorCode & status)373 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
374 const UnicodeString& override,
375 UErrorCode &status)
376 : fPattern(pattern),
377 fLocale(Locale::getDefault())
378 {
379 fDateOverride.setTo(override);
380 fTimeOverride.setToBogus();
381 initializeBooleanAttributes();
382 initializeCalendar(nullptr,fLocale,status);
383 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
384 initialize(fLocale, status);
385 initializeDefaultCentury();
386
387 processOverrideString(fLocale,override,kOvrStrBoth,status);
388
389 }
390
391 //----------------------------------------------------------------------
392
SimpleDateFormat(const UnicodeString & pattern,const Locale & locale,UErrorCode & status)393 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
394 const Locale& locale,
395 UErrorCode& status)
396 : fPattern(pattern),
397 fLocale(locale)
398 {
399
400 fDateOverride.setToBogus();
401 fTimeOverride.setToBogus();
402 initializeBooleanAttributes();
403
404 initializeCalendar(nullptr,fLocale,status);
405 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
406 initialize(fLocale, status);
407 initializeDefaultCentury();
408 }
409
410 //----------------------------------------------------------------------
411
SimpleDateFormat(const UnicodeString & pattern,const UnicodeString & override,const Locale & locale,UErrorCode & status)412 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
413 const UnicodeString& override,
414 const Locale& locale,
415 UErrorCode& status)
416 : fPattern(pattern),
417 fLocale(locale)
418 {
419
420 fDateOverride.setTo(override);
421 fTimeOverride.setToBogus();
422 initializeBooleanAttributes();
423
424 initializeCalendar(nullptr,fLocale,status);
425 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
426 initialize(fLocale, status);
427 initializeDefaultCentury();
428
429 processOverrideString(locale,override,kOvrStrBoth,status);
430
431 }
432
433 //----------------------------------------------------------------------
434
SimpleDateFormat(const UnicodeString & pattern,DateFormatSymbols * symbolsToAdopt,UErrorCode & status)435 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
436 DateFormatSymbols* symbolsToAdopt,
437 UErrorCode& status)
438 : fPattern(pattern),
439 fLocale(Locale::getDefault()),
440 fSymbols(symbolsToAdopt)
441 {
442
443 fDateOverride.setToBogus();
444 fTimeOverride.setToBogus();
445 initializeBooleanAttributes();
446
447 initializeCalendar(nullptr,fLocale,status);
448 initialize(fLocale, status);
449 initializeDefaultCentury();
450 }
451
452 //----------------------------------------------------------------------
453
SimpleDateFormat(const UnicodeString & pattern,const DateFormatSymbols & symbols,UErrorCode & status)454 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
455 const DateFormatSymbols& symbols,
456 UErrorCode& status)
457 : fPattern(pattern),
458 fLocale(Locale::getDefault()),
459 fSymbols(new DateFormatSymbols(symbols))
460 {
461
462 fDateOverride.setToBogus();
463 fTimeOverride.setToBogus();
464 initializeBooleanAttributes();
465
466 initializeCalendar(nullptr, fLocale, status);
467 initialize(fLocale, status);
468 initializeDefaultCentury();
469 }
470
471 //----------------------------------------------------------------------
472
473 // Not for public consumption; used by DateFormat
SimpleDateFormat(EStyle timeStyle,EStyle dateStyle,const Locale & locale,UErrorCode & status)474 SimpleDateFormat::SimpleDateFormat(EStyle timeStyle,
475 EStyle dateStyle,
476 const Locale& locale,
477 UErrorCode& status)
478 : fLocale(locale)
479 {
480 initializeBooleanAttributes();
481 construct(timeStyle, dateStyle, fLocale, status);
482 if(U_SUCCESS(status)) {
483 initializeDefaultCentury();
484 }
485 }
486
487 //----------------------------------------------------------------------
488
489 /**
490 * Not for public consumption; used by DateFormat. This constructor
491 * never fails. If the resource data is not available, it uses the
492 * the last resort symbols.
493 */
SimpleDateFormat(const Locale & locale,UErrorCode & status)494 SimpleDateFormat::SimpleDateFormat(const Locale& locale,
495 UErrorCode& status)
496 : fPattern(gDefaultPattern),
497 fLocale(locale)
498 {
499 if (U_FAILURE(status)) return;
500 initializeBooleanAttributes();
501 initializeCalendar(nullptr, fLocale, status);
502 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
503 if (U_FAILURE(status))
504 {
505 status = U_ZERO_ERROR;
506 delete fSymbols;
507 // This constructor doesn't fail; it uses last resort data
508 fSymbols = new DateFormatSymbols(status);
509 /* test for nullptr */
510 if (fSymbols == 0) {
511 status = U_MEMORY_ALLOCATION_ERROR;
512 return;
513 }
514 }
515
516 fDateOverride.setToBogus();
517 fTimeOverride.setToBogus();
518
519 initialize(fLocale, status);
520 if(U_SUCCESS(status)) {
521 initializeDefaultCentury();
522 }
523 }
524
525 //----------------------------------------------------------------------
526
SimpleDateFormat(const SimpleDateFormat & other)527 SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other)
528 : DateFormat(other),
529 fLocale(other.fLocale)
530 {
531 initializeBooleanAttributes();
532 *this = other;
533 }
534
535 //----------------------------------------------------------------------
536
operator =(const SimpleDateFormat & other)537 SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other)
538 {
539 if (this == &other) {
540 return *this;
541 }
542
543 // fSimpleNumberFormatter references fNumberFormatter, delete it
544 // before we call the = operator which may invalidate fNumberFormatter
545 delete fSimpleNumberFormatter;
546 fSimpleNumberFormatter = nullptr;
547
548 DateFormat::operator=(other);
549 fDateOverride = other.fDateOverride;
550 fTimeOverride = other.fTimeOverride;
551
552 delete fSymbols;
553 fSymbols = nullptr;
554
555 if (other.fSymbols)
556 fSymbols = new DateFormatSymbols(*other.fSymbols);
557
558 fDefaultCenturyStart = other.fDefaultCenturyStart;
559 fDefaultCenturyStartYear = other.fDefaultCenturyStartYear;
560 fHaveDefaultCentury = other.fHaveDefaultCentury;
561
562 fPattern = other.fPattern;
563 fHasMinute = other.fHasMinute;
564 fHasSecond = other.fHasSecond;
565
566 fLocale = other.fLocale;
567
568 // TimeZoneFormat can now be set independently via setter.
569 // If it is nullptr, it will be lazily initialized from locale.
570 delete fTimeZoneFormat;
571 fTimeZoneFormat = nullptr;
572 TimeZoneFormat *otherTZFormat;
573 {
574 // Synchronization is required here, when accessing other.fTimeZoneFormat,
575 // because another thread may be concurrently executing other.tzFormat(),
576 // a logically const function that lazily creates other.fTimeZoneFormat.
577 //
578 // Without synchronization, reordered memory writes could allow us
579 // to see a non-null fTimeZoneFormat before the object itself was
580 // fully initialized. In case of a race, it doesn't matter whether
581 // we see a null or a fully initialized other.fTimeZoneFormat,
582 // only that we avoid seeing a partially initialized object.
583 //
584 // Once initialized, no const function can modify fTimeZoneFormat,
585 // meaning that once we have safely grabbed the other.fTimeZoneFormat
586 // pointer, continued synchronization is not required to use it.
587 Mutex m(&LOCK);
588 otherTZFormat = other.fTimeZoneFormat;
589 }
590 if (otherTZFormat) {
591 fTimeZoneFormat = new TimeZoneFormat(*otherTZFormat);
592 }
593
594 #if !UCONFIG_NO_BREAK_ITERATION
595 if (other.fCapitalizationBrkIter != nullptr) {
596 fCapitalizationBrkIter = (other.fCapitalizationBrkIter)->clone();
597 }
598 #endif
599
600 if (fSharedNumberFormatters != nullptr) {
601 freeSharedNumberFormatters(fSharedNumberFormatters);
602 fSharedNumberFormatters = nullptr;
603 }
604 if (other.fSharedNumberFormatters != nullptr) {
605 fSharedNumberFormatters = allocSharedNumberFormatters();
606 if (fSharedNumberFormatters) {
607 for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) {
608 SharedObject::copyPtr(
609 other.fSharedNumberFormatters[i],
610 fSharedNumberFormatters[i]);
611 }
612 }
613 }
614
615 UErrorCode localStatus = U_ZERO_ERROR;
616 // SimpleNumberFormatter does not have a copy constructor. Furthermore,
617 // it references data from an internal field, fNumberFormatter,
618 // so we must rematerialize that reference after copying over the number formatter.
619 initSimpleNumberFormatter(localStatus);
620 return *this;
621 }
622
623 //----------------------------------------------------------------------
624
625 SimpleDateFormat*
clone() const626 SimpleDateFormat::clone() const
627 {
628 return new SimpleDateFormat(*this);
629 }
630
631 //----------------------------------------------------------------------
632
633 bool
operator ==(const Format & other) const634 SimpleDateFormat::operator==(const Format& other) const
635 {
636 if (DateFormat::operator==(other)) {
637 // The DateFormat::operator== check for fCapitalizationContext equality above
638 // is sufficient to check equality of all derived context-related data.
639 // DateFormat::operator== guarantees following cast is safe
640 SimpleDateFormat* that = (SimpleDateFormat*)&other;
641 return (fPattern == that->fPattern &&
642 fSymbols != nullptr && // Check for pathological object
643 that->fSymbols != nullptr && // Check for pathological object
644 *fSymbols == *that->fSymbols &&
645 fHaveDefaultCentury == that->fHaveDefaultCentury &&
646 fDefaultCenturyStart == that->fDefaultCenturyStart);
647 }
648 return false;
649 }
650
651 //----------------------------------------------------------------------
652 static const char16_t* timeSkeletons[4] = {
653 u"jmmsszzzz", // kFull
654 u"jmmssz", // kLong
655 u"jmmss", // kMedium
656 u"jmm", // kShort
657 };
658
construct(EStyle timeStyle,EStyle dateStyle,const Locale & locale,UErrorCode & status)659 void SimpleDateFormat::construct(EStyle timeStyle,
660 EStyle dateStyle,
661 const Locale& locale,
662 UErrorCode& status)
663 {
664 // called by several constructors to load pattern data from the resources
665 if (U_FAILURE(status)) return;
666
667 // We will need the calendar to know what type of symbols to load.
668 initializeCalendar(nullptr, locale, status);
669 if (U_FAILURE(status)) return;
670
671 // Load date time patterns directly from resources.
672 const char* cType = fCalendar ? fCalendar->getType() : nullptr;
673 LocalUResourceBundlePointer bundle(ures_open(nullptr, locale.getBaseName(), &status));
674 if (U_FAILURE(status)) return;
675
676 UBool cTypeIsGregorian = true;
677 LocalUResourceBundlePointer dateTimePatterns;
678 if (cType != nullptr && uprv_strcmp(cType, "gregorian") != 0) {
679 CharString resourcePath("calendar/", status);
680 resourcePath.append(cType, status).append("/DateTimePatterns", status);
681 dateTimePatterns.adoptInstead(
682 ures_getByKeyWithFallback(bundle.getAlias(), resourcePath.data(),
683 (UResourceBundle*)nullptr, &status));
684 cTypeIsGregorian = false;
685 }
686
687 // Check for "gregorian" fallback.
688 if (cTypeIsGregorian || status == U_MISSING_RESOURCE_ERROR) {
689 status = U_ZERO_ERROR;
690 dateTimePatterns.adoptInstead(
691 ures_getByKeyWithFallback(bundle.getAlias(),
692 "calendar/gregorian/DateTimePatterns",
693 (UResourceBundle*)nullptr, &status));
694 }
695 if (U_FAILURE(status)) return;
696
697 LocalUResourceBundlePointer currentBundle;
698
699 if (ures_getSize(dateTimePatterns.getAlias()) <= kDateTime)
700 {
701 status = U_INVALID_FORMAT_ERROR;
702 return;
703 }
704
705 setLocaleIDs(ures_getLocaleByType(dateTimePatterns.getAlias(), ULOC_VALID_LOCALE, &status),
706 ures_getLocaleByType(dateTimePatterns.getAlias(), ULOC_ACTUAL_LOCALE, &status));
707
708 // create a symbols object from the locale
709 fSymbols = DateFormatSymbols::createForLocale(locale, status);
710 if (U_FAILURE(status)) return;
711 /* test for nullptr */
712 if (fSymbols == 0) {
713 status = U_MEMORY_ALLOCATION_ERROR;
714 return;
715 }
716
717 const char16_t *resStr,*ovrStr;
718 int32_t resStrLen,ovrStrLen = 0;
719 fDateOverride.setToBogus();
720 fTimeOverride.setToBogus();
721
722 UnicodeString timePattern;
723 if (timeStyle >= kFull && timeStyle <= kShort) {
724 const char* baseLocID = locale.getBaseName();
725 if (baseLocID[0]!=0 && uprv_strcmp(baseLocID,"und")!=0) {
726 UErrorCode useStatus = U_ZERO_ERROR;
727 Locale baseLoc(baseLocID);
728 Locale validLoc(getLocale(ULOC_VALID_LOCALE, useStatus));
729 if (U_SUCCESS(useStatus) && validLoc!=baseLoc) {
730 bool useDTPG = false;
731 const char* baseReg = baseLoc.getCountry(); // empty string if no region
732 if ((baseReg[0]!=0 && uprv_strncmp(baseReg,validLoc.getCountry(),ULOC_COUNTRY_CAPACITY)!=0)
733 || uprv_strncmp(baseLoc.getLanguage(),validLoc.getLanguage(),ULOC_LANG_CAPACITY)!=0) {
734 // use DTPG if
735 // * baseLoc has a region and validLoc does not have the same one (or has none), OR
736 // * validLoc has a different language code than baseLoc
737 useDTPG = true;
738 }
739 if (useDTPG) {
740 // The standard time formats may have the wrong time cycle, because:
741 // the valid locale differs in important ways (region, language) from
742 // the base locale.
743 // We could *also* check whether they do actually have a mismatch with
744 // the time cycle preferences for the region, but that is a lot more
745 // work for little or no additional benefit, since just going ahead
746 // and always synthesizing the time format as per the following should
747 // create a locale-appropriate pattern with cycle that matches the
748 // region preferences anyway.
749 LocalPointer<DateTimePatternGenerator> dtpg(DateTimePatternGenerator::createInstanceNoStdPat(locale, useStatus));
750 if (U_SUCCESS(useStatus)) {
751 UnicodeString timeSkeleton(true, timeSkeletons[timeStyle], -1);
752 timePattern = dtpg->getBestPattern(timeSkeleton, useStatus);
753 }
754 }
755 }
756 }
757 }
758
759 // if the pattern should include both date and time information, use the date/time
760 // pattern string as a guide to tell use how to glue together the appropriate date
761 // and time pattern strings.
762 if ((timeStyle != kNone) && (dateStyle != kNone))
763 {
764 UnicodeString tempus1(timePattern);
765 if (tempus1.length() == 0) {
766 currentBundle.adoptInstead(
767 ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, nullptr, &status));
768 if (U_FAILURE(status)) {
769 status = U_INVALID_FORMAT_ERROR;
770 return;
771 }
772 switch (ures_getType(currentBundle.getAlias())) {
773 case URES_STRING: {
774 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
775 break;
776 }
777 case URES_ARRAY: {
778 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
779 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
780 fTimeOverride.setTo(true, ovrStr, ovrStrLen);
781 break;
782 }
783 default: {
784 status = U_INVALID_FORMAT_ERROR;
785 return;
786 }
787 }
788
789 tempus1.setTo(true, resStr, resStrLen);
790 }
791
792 currentBundle.adoptInstead(
793 ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)dateStyle, nullptr, &status));
794 if (U_FAILURE(status)) {
795 status = U_INVALID_FORMAT_ERROR;
796 return;
797 }
798 switch (ures_getType(currentBundle.getAlias())) {
799 case URES_STRING: {
800 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
801 break;
802 }
803 case URES_ARRAY: {
804 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
805 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
806 fDateOverride.setTo(true, ovrStr, ovrStrLen);
807 break;
808 }
809 default: {
810 status = U_INVALID_FORMAT_ERROR;
811 return;
812 }
813 }
814
815 UnicodeString tempus2(true, resStr, resStrLen);
816
817 // Currently, for compatibility with pre-CLDR-42 data, we default to the "atTime"
818 // combining patterns. Depending on guidance in CLDR 42 spec and on DisplayOptions,
819 // we may change this.
820 LocalUResourceBundlePointer dateAtTimePatterns;
821 if (!cTypeIsGregorian) {
822 CharString resourcePath("calendar/", status);
823 resourcePath.append(cType, status).append("/DateTimePatterns%atTime", status);
824 dateAtTimePatterns.adoptInstead(
825 ures_getByKeyWithFallback(bundle.getAlias(), resourcePath.data(),
826 nullptr, &status));
827 }
828 if (cTypeIsGregorian || status == U_MISSING_RESOURCE_ERROR) {
829 status = U_ZERO_ERROR;
830 dateAtTimePatterns.adoptInstead(
831 ures_getByKeyWithFallback(bundle.getAlias(),
832 "calendar/gregorian/DateTimePatterns%atTime",
833 nullptr, &status));
834 }
835 if (U_SUCCESS(status) && ures_getSize(dateAtTimePatterns.getAlias()) >= 4) {
836 resStr = ures_getStringByIndex(dateAtTimePatterns.getAlias(), dateStyle - kDateOffset, &resStrLen, &status);
837 } else {
838 status = U_ZERO_ERROR;
839 int32_t glueIndex = kDateTime;
840 int32_t patternsSize = ures_getSize(dateTimePatterns.getAlias());
841 if (patternsSize >= (kDateTimeOffset + kShort + 1)) {
842 // Get proper date time format
843 glueIndex = (int32_t)(kDateTimeOffset + (dateStyle - kDateOffset));
844 }
845
846 resStr = ures_getStringByIndex(dateTimePatterns.getAlias(), glueIndex, &resStrLen, &status);
847 }
848 SimpleFormatter(UnicodeString(true, resStr, resStrLen), 2, 2, status).
849 format(tempus1, tempus2, fPattern, status);
850 }
851 // if the pattern includes just time data or just date date, load the appropriate
852 // pattern string from the resources
853 // setTo() - see DateFormatSymbols::assignArray comments
854 else if (timeStyle != kNone) {
855 fPattern.setTo(timePattern);
856 if (fPattern.length() == 0) {
857 currentBundle.adoptInstead(
858 ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, nullptr, &status));
859 if (U_FAILURE(status)) {
860 status = U_INVALID_FORMAT_ERROR;
861 return;
862 }
863 switch (ures_getType(currentBundle.getAlias())) {
864 case URES_STRING: {
865 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
866 break;
867 }
868 case URES_ARRAY: {
869 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
870 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
871 fDateOverride.setTo(true, ovrStr, ovrStrLen);
872 break;
873 }
874 default: {
875 status = U_INVALID_FORMAT_ERROR;
876 return;
877 }
878 }
879 fPattern.setTo(true, resStr, resStrLen);
880 }
881 }
882 else if (dateStyle != kNone) {
883 currentBundle.adoptInstead(
884 ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)dateStyle, nullptr, &status));
885 if (U_FAILURE(status)) {
886 status = U_INVALID_FORMAT_ERROR;
887 return;
888 }
889 switch (ures_getType(currentBundle.getAlias())) {
890 case URES_STRING: {
891 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
892 break;
893 }
894 case URES_ARRAY: {
895 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
896 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
897 fDateOverride.setTo(true, ovrStr, ovrStrLen);
898 break;
899 }
900 default: {
901 status = U_INVALID_FORMAT_ERROR;
902 return;
903 }
904 }
905 fPattern.setTo(true, resStr, resStrLen);
906 }
907
908 // and if it includes _neither_, that's an error
909 else
910 status = U_INVALID_FORMAT_ERROR;
911
912 // finally, finish initializing by creating a Calendar and a NumberFormat
913 initialize(locale, status);
914 }
915
916 //----------------------------------------------------------------------
917
918 Calendar*
initializeCalendar(TimeZone * adoptZone,const Locale & locale,UErrorCode & status)919 SimpleDateFormat::initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status)
920 {
921 if(!U_FAILURE(status)) {
922 fCalendar = Calendar::createInstance(
923 adoptZone ? adoptZone : TimeZone::forLocaleOrDefault(locale), locale, status);
924 }
925 return fCalendar;
926 }
927
928 void
initialize(const Locale & locale,UErrorCode & status)929 SimpleDateFormat::initialize(const Locale& locale,
930 UErrorCode& status)
931 {
932 if (U_FAILURE(status)) return;
933
934 parsePattern(); // Need this before initNumberFormatters(), to set fHasHanYearChar
935
936 // Simple-minded hack to force Gannen year numbering for ja@calendar=japanese
937 // if format is non-numeric (includes 年) and fDateOverride is not already specified.
938 // Now this does get updated if applyPattern subsequently changes the pattern type.
939 if (fDateOverride.isBogus() && fHasHanYearChar &&
940 fCalendar != nullptr && uprv_strcmp(fCalendar->getType(),"japanese") == 0 &&
941 uprv_strcmp(fLocale.getLanguage(),"ja") == 0) {
942 fDateOverride.setTo(u"y=jpanyear", -1);
943 }
944
945 // We don't need to check that the row count is >= 1, since all 2d arrays have at
946 // least one row
947 fNumberFormat = NumberFormat::createInstance(locale, status);
948 if (fNumberFormat != nullptr && U_SUCCESS(status))
949 {
950 fixNumberFormatForDates(*fNumberFormat);
951 //fNumberFormat->setLenient(true); // Java uses a custom DateNumberFormat to format/parse
952
953 initNumberFormatters(locale, status);
954 initSimpleNumberFormatter(status);
955
956 }
957 else if (U_SUCCESS(status))
958 {
959 status = U_MISSING_RESOURCE_ERROR;
960 }
961 }
962
963 /* Initialize the fields we use to disambiguate ambiguous years. Separate
964 * so we can call it from readObject().
965 */
initializeDefaultCentury()966 void SimpleDateFormat::initializeDefaultCentury()
967 {
968 if(fCalendar) {
969 fHaveDefaultCentury = fCalendar->haveDefaultCentury();
970 if(fHaveDefaultCentury) {
971 fDefaultCenturyStart = fCalendar->defaultCenturyStart();
972 fDefaultCenturyStartYear = fCalendar->defaultCenturyStartYear();
973 } else {
974 fDefaultCenturyStart = DBL_MIN;
975 fDefaultCenturyStartYear = -1;
976 }
977 }
978 }
979
980 /*
981 * Initialize the boolean attributes. Separate so we can call it from all constructors.
982 */
initializeBooleanAttributes()983 void SimpleDateFormat::initializeBooleanAttributes()
984 {
985 UErrorCode status = U_ZERO_ERROR;
986
987 setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status);
988 setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status);
989 setBooleanAttribute(UDAT_PARSE_PARTIAL_LITERAL_MATCH, true, status);
990 setBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, true, status);
991 }
992
993 /* Define one-century window into which to disambiguate dates using
994 * two-digit years. Make public in JDK 1.2.
995 */
parseAmbiguousDatesAsAfter(UDate startDate,UErrorCode & status)996 void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status)
997 {
998 if(U_FAILURE(status)) {
999 return;
1000 }
1001 if(!fCalendar) {
1002 status = U_ILLEGAL_ARGUMENT_ERROR;
1003 return;
1004 }
1005
1006 fCalendar->setTime(startDate, status);
1007 if(U_SUCCESS(status)) {
1008 fHaveDefaultCentury = true;
1009 fDefaultCenturyStart = startDate;
1010 fDefaultCenturyStartYear = fCalendar->get(UCAL_YEAR, status);
1011 }
1012 }
1013
1014 //----------------------------------------------------------------------
1015
1016 UnicodeString&
format(Calendar & cal,UnicodeString & appendTo,FieldPosition & pos) const1017 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& pos) const
1018 {
1019 UErrorCode status = U_ZERO_ERROR;
1020 FieldPositionOnlyHandler handler(pos);
1021 return _format(cal, appendTo, handler, status);
1022 }
1023
1024 //----------------------------------------------------------------------
1025
1026 UnicodeString&
format(Calendar & cal,UnicodeString & appendTo,FieldPositionIterator * posIter,UErrorCode & status) const1027 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo,
1028 FieldPositionIterator* posIter, UErrorCode& status) const
1029 {
1030 FieldPositionIteratorHandler handler(posIter, status);
1031 return _format(cal, appendTo, handler, status);
1032 }
1033
1034 //----------------------------------------------------------------------
1035
1036 UnicodeString&
_format(Calendar & cal,UnicodeString & appendTo,FieldPositionHandler & handler,UErrorCode & status) const1037 SimpleDateFormat::_format(Calendar& cal, UnicodeString& appendTo,
1038 FieldPositionHandler& handler, UErrorCode& status) const
1039 {
1040 if ( U_FAILURE(status) ) {
1041 return appendTo;
1042 }
1043 Calendar* workCal = &cal;
1044 Calendar* calClone = nullptr;
1045 if (&cal != fCalendar && uprv_strcmp(cal.getType(), fCalendar->getType()) != 0) {
1046 // Different calendar type
1047 // We use the time and time zone from the input calendar, but
1048 // do not use the input calendar for field calculation.
1049 calClone = fCalendar->clone();
1050 if (calClone != nullptr) {
1051 UDate t = cal.getTime(status);
1052 calClone->setTime(t, status);
1053 calClone->setTimeZone(cal.getTimeZone());
1054 workCal = calClone;
1055 } else {
1056 status = U_MEMORY_ALLOCATION_ERROR;
1057 return appendTo;
1058 }
1059 }
1060
1061 UBool inQuote = false;
1062 char16_t prevCh = 0;
1063 int32_t count = 0;
1064 int32_t fieldNum = 0;
1065 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1066
1067 // loop through the pattern string character by character
1068 for (int32_t i = 0; i < fPattern.length() && U_SUCCESS(status); ++i) {
1069 char16_t ch = fPattern[i];
1070
1071 // Use subFormat() to format a repeated pattern character
1072 // when a different pattern or non-pattern character is seen
1073 if (ch != prevCh && count > 0) {
1074 subFormat(appendTo, prevCh, count, capitalizationContext, fieldNum++,
1075 prevCh, handler, *workCal, status);
1076 count = 0;
1077 }
1078 if (ch == QUOTE) {
1079 // Consecutive single quotes are a single quote literal,
1080 // either outside of quotes or between quotes
1081 if ((i+1) < fPattern.length() && fPattern[i+1] == QUOTE) {
1082 appendTo += (char16_t)QUOTE;
1083 ++i;
1084 } else {
1085 inQuote = ! inQuote;
1086 }
1087 }
1088 else if (!inQuote && isSyntaxChar(ch)) {
1089 // ch is a date-time pattern character to be interpreted
1090 // by subFormat(); count the number of times it is repeated
1091 prevCh = ch;
1092 ++count;
1093 }
1094 else {
1095 // Append quoted characters and unquoted non-pattern characters
1096 appendTo += ch;
1097 }
1098 }
1099
1100 // Format the last item in the pattern, if any
1101 if (count > 0) {
1102 subFormat(appendTo, prevCh, count, capitalizationContext, fieldNum++,
1103 prevCh, handler, *workCal, status);
1104 }
1105
1106 if (calClone != nullptr) {
1107 delete calClone;
1108 }
1109
1110 return appendTo;
1111 }
1112
1113 //----------------------------------------------------------------------
1114
1115 /* Map calendar field into calendar field level.
1116 * the larger the level, the smaller the field unit.
1117 * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10,
1118 * UCAL_MONTH level is 20.
1119 * NOTE: if new fields adds in, the table needs to update.
1120 */
1121 const int32_t
1122 SimpleDateFormat::fgCalendarFieldToLevel[] =
1123 {
1124 /*GyM*/ 0, 10, 20,
1125 /*wW*/ 20, 30,
1126 /*dDEF*/ 30, 20, 30, 30,
1127 /*ahHm*/ 40, 50, 50, 60,
1128 /*sS*/ 70, 80,
1129 /*z?Y*/ 0, 0, 10,
1130 /*eug*/ 30, 10, 0,
1131 /*A?.*/ 40, 0, 0
1132 };
1133
getLevelFromChar(char16_t ch)1134 int32_t SimpleDateFormat::getLevelFromChar(char16_t ch) {
1135 // Map date field LETTER into calendar field level.
1136 // the larger the level, the smaller the field unit.
1137 // NOTE: if new fields adds in, the table needs to update.
1138 static const int32_t mapCharToLevel[] = {
1139 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1140 //
1141 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1142 // ! " # $ % & ' ( ) * + , - . /
1143 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1144 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1145 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
1146 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1,
1147 #else
1148 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
1149 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1150 #endif
1151 // @ A B C D E F G H I J K L M N O
1152 -1, 40, -1, -1, 20, 30, 30, 0, 50, -1, -1, 50, 20, 20, -1, 0,
1153 // P Q R S T U V W X Y Z [ \ ] ^ _
1154 -1, 20, -1, 80, -1, 10, 0, 30, 0, 10, 0, -1, -1, -1, -1, -1,
1155 // ` a b c d e f g h i j k l m n o
1156 -1, 40, -1, 30, 30, 30, -1, 0, 50, -1, -1, 50, 0, 60, -1, -1,
1157 // p q r s t u v w x y z { | } ~
1158 -1, 20, 10, 70, -1, 10, 0, 20, 0, 10, 0, -1, -1, -1, -1, -1
1159 };
1160
1161 return ch < UPRV_LENGTHOF(mapCharToLevel) ? mapCharToLevel[ch] : -1;
1162 }
1163
isSyntaxChar(char16_t ch)1164 UBool SimpleDateFormat::isSyntaxChar(char16_t ch) {
1165 static const UBool mapCharToIsSyntax[] = {
1166 //
1167 false, false, false, false, false, false, false, false,
1168 //
1169 false, false, false, false, false, false, false, false,
1170 //
1171 false, false, false, false, false, false, false, false,
1172 //
1173 false, false, false, false, false, false, false, false,
1174 // ! " # $ % & '
1175 false, false, false, false, false, false, false, false,
1176 // ( ) * + , - . /
1177 false, false, false, false, false, false, false, false,
1178 // 0 1 2 3 4 5 6 7
1179 false, false, false, false, false, false, false, false,
1180 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1181 // 8 9 : ; < = > ?
1182 false, false, true, false, false, false, false, false,
1183 #else
1184 // 8 9 : ; < = > ?
1185 false, false, false, false, false, false, false, false,
1186 #endif
1187 // @ A B C D E F G
1188 false, true, true, true, true, true, true, true,
1189 // H I J K L M N O
1190 true, true, true, true, true, true, true, true,
1191 // P Q R S T U V W
1192 true, true, true, true, true, true, true, true,
1193 // X Y Z [ \ ] ^ _
1194 true, true, true, false, false, false, false, false,
1195 // ` a b c d e f g
1196 false, true, true, true, true, true, true, true,
1197 // h i j k l m n o
1198 true, true, true, true, true, true, true, true,
1199 // p q r s t u v w
1200 true, true, true, true, true, true, true, true,
1201 // x y z { | } ~
1202 true, true, true, false, false, false, false, false
1203 };
1204
1205 return ch < UPRV_LENGTHOF(mapCharToIsSyntax) ? mapCharToIsSyntax[ch] : false;
1206 }
1207
1208 // Map index into pattern character string to Calendar field number.
1209 const UCalendarDateFields
1210 SimpleDateFormat::fgPatternIndexToCalendarField[] =
1211 {
1212 /*GyM*/ UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
1213 /*dkH*/ UCAL_DATE, UCAL_HOUR_OF_DAY, UCAL_HOUR_OF_DAY,
1214 /*msS*/ UCAL_MINUTE, UCAL_SECOND, UCAL_MILLISECOND,
1215 /*EDF*/ UCAL_DAY_OF_WEEK, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK_IN_MONTH,
1216 /*wWa*/ UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_AM_PM,
1217 /*hKz*/ UCAL_HOUR, UCAL_HOUR, UCAL_ZONE_OFFSET,
1218 /*Yeu*/ UCAL_YEAR_WOY, UCAL_DOW_LOCAL, UCAL_EXTENDED_YEAR,
1219 /*gAZ*/ UCAL_JULIAN_DAY, UCAL_MILLISECONDS_IN_DAY, UCAL_ZONE_OFFSET,
1220 /*v*/ UCAL_ZONE_OFFSET,
1221 /*c*/ UCAL_DOW_LOCAL,
1222 /*L*/ UCAL_MONTH,
1223 /*Q*/ UCAL_MONTH,
1224 /*q*/ UCAL_MONTH,
1225 /*V*/ UCAL_ZONE_OFFSET,
1226 /*U*/ UCAL_YEAR,
1227 /*O*/ UCAL_ZONE_OFFSET,
1228 /*Xx*/ UCAL_ZONE_OFFSET, UCAL_ZONE_OFFSET,
1229 /*r*/ UCAL_EXTENDED_YEAR,
1230 /*bB*/ UCAL_FIELD_COUNT, UCAL_FIELD_COUNT, // no mappings to calendar fields
1231 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1232 /*:*/ UCAL_FIELD_COUNT, /* => no useful mapping to any calendar field */
1233 #else
1234 /*no pattern char for UDAT_TIME_SEPARATOR_FIELD*/ UCAL_FIELD_COUNT, /* => no useful mapping to any calendar field */
1235 #endif
1236 };
1237
1238 // Map index into pattern character string to DateFormat field number
1239 const UDateFormatField
1240 SimpleDateFormat::fgPatternIndexToDateFormatField[] = {
1241 /*GyM*/ UDAT_ERA_FIELD, UDAT_YEAR_FIELD, UDAT_MONTH_FIELD,
1242 /*dkH*/ UDAT_DATE_FIELD, UDAT_HOUR_OF_DAY1_FIELD, UDAT_HOUR_OF_DAY0_FIELD,
1243 /*msS*/ UDAT_MINUTE_FIELD, UDAT_SECOND_FIELD, UDAT_FRACTIONAL_SECOND_FIELD,
1244 /*EDF*/ UDAT_DAY_OF_WEEK_FIELD, UDAT_DAY_OF_YEAR_FIELD, UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
1245 /*wWa*/ UDAT_WEEK_OF_YEAR_FIELD, UDAT_WEEK_OF_MONTH_FIELD, UDAT_AM_PM_FIELD,
1246 /*hKz*/ UDAT_HOUR1_FIELD, UDAT_HOUR0_FIELD, UDAT_TIMEZONE_FIELD,
1247 /*Yeu*/ UDAT_YEAR_WOY_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_EXTENDED_YEAR_FIELD,
1248 /*gAZ*/ UDAT_JULIAN_DAY_FIELD, UDAT_MILLISECONDS_IN_DAY_FIELD, UDAT_TIMEZONE_RFC_FIELD,
1249 /*v*/ UDAT_TIMEZONE_GENERIC_FIELD,
1250 /*c*/ UDAT_STANDALONE_DAY_FIELD,
1251 /*L*/ UDAT_STANDALONE_MONTH_FIELD,
1252 /*Q*/ UDAT_QUARTER_FIELD,
1253 /*q*/ UDAT_STANDALONE_QUARTER_FIELD,
1254 /*V*/ UDAT_TIMEZONE_SPECIAL_FIELD,
1255 /*U*/ UDAT_YEAR_NAME_FIELD,
1256 /*O*/ UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD,
1257 /*Xx*/ UDAT_TIMEZONE_ISO_FIELD, UDAT_TIMEZONE_ISO_LOCAL_FIELD,
1258 /*r*/ UDAT_RELATED_YEAR_FIELD,
1259 /*bB*/ UDAT_AM_PM_MIDNIGHT_NOON_FIELD, UDAT_FLEXIBLE_DAY_PERIOD_FIELD,
1260 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1261 /*:*/ UDAT_TIME_SEPARATOR_FIELD,
1262 #else
1263 /*no pattern char for UDAT_TIME_SEPARATOR_FIELD*/ UDAT_TIME_SEPARATOR_FIELD,
1264 #endif
1265 };
1266
1267 //----------------------------------------------------------------------
1268
1269 /**
1270 * Append symbols[value] to dst. Make sure the array index is not out
1271 * of bounds.
1272 */
1273 static inline void
_appendSymbol(UnicodeString & dst,int32_t value,const UnicodeString * symbols,int32_t symbolsCount)1274 _appendSymbol(UnicodeString& dst,
1275 int32_t value,
1276 const UnicodeString* symbols,
1277 int32_t symbolsCount) {
1278 U_ASSERT(0 <= value && value < symbolsCount);
1279 if (0 <= value && value < symbolsCount) {
1280 dst += symbols[value];
1281 }
1282 }
1283
1284 static inline void
_appendSymbolWithMonthPattern(UnicodeString & dst,int32_t value,const UnicodeString * symbols,int32_t symbolsCount,const UnicodeString * monthPattern,UErrorCode & status)1285 _appendSymbolWithMonthPattern(UnicodeString& dst, int32_t value, const UnicodeString* symbols, int32_t symbolsCount,
1286 const UnicodeString* monthPattern, UErrorCode& status) {
1287 U_ASSERT(0 <= value && value < symbolsCount);
1288 if (0 <= value && value < symbolsCount) {
1289 if (monthPattern == nullptr) {
1290 dst += symbols[value];
1291 } else {
1292 SimpleFormatter(*monthPattern, 1, 1, status).format(symbols[value], dst, status);
1293 }
1294 }
1295 }
1296
1297 //----------------------------------------------------------------------
1298
1299 void
initSimpleNumberFormatter(UErrorCode & status)1300 SimpleDateFormat::initSimpleNumberFormatter(UErrorCode &status) {
1301 if (U_FAILURE(status)) {
1302 return;
1303 }
1304 auto* df = dynamic_cast<const DecimalFormat*>(fNumberFormat);
1305 if (df == nullptr) {
1306 return;
1307 }
1308 const DecimalFormatSymbols* syms = df->getDecimalFormatSymbols();
1309 if (syms == nullptr) {
1310 return;
1311 }
1312 fSimpleNumberFormatter = new number::SimpleNumberFormatter(
1313 number::SimpleNumberFormatter::forLocaleAndSymbolsAndGroupingStrategy(
1314 fLocale, *syms, UNUM_GROUPING_OFF, status
1315 )
1316 );
1317 if (fSimpleNumberFormatter == nullptr) {
1318 status = U_MEMORY_ALLOCATION_ERROR;
1319 }
1320 }
1321
1322 void
initNumberFormatters(const Locale & locale,UErrorCode & status)1323 SimpleDateFormat::initNumberFormatters(const Locale &locale,UErrorCode &status) {
1324 if (U_FAILURE(status)) {
1325 return;
1326 }
1327 if ( fDateOverride.isBogus() && fTimeOverride.isBogus() ) {
1328 return;
1329 }
1330 umtx_lock(&LOCK);
1331 if (fSharedNumberFormatters == nullptr) {
1332 fSharedNumberFormatters = allocSharedNumberFormatters();
1333 if (fSharedNumberFormatters == nullptr) {
1334 status = U_MEMORY_ALLOCATION_ERROR;
1335 }
1336 }
1337 umtx_unlock(&LOCK);
1338
1339 if (U_FAILURE(status)) {
1340 return;
1341 }
1342
1343 processOverrideString(locale,fDateOverride,kOvrStrDate,status);
1344 processOverrideString(locale,fTimeOverride,kOvrStrTime,status);
1345 }
1346
1347 void
processOverrideString(const Locale & locale,const UnicodeString & str,int8_t type,UErrorCode & status)1348 SimpleDateFormat::processOverrideString(const Locale &locale, const UnicodeString &str, int8_t type, UErrorCode &status) {
1349 if (str.isBogus() || U_FAILURE(status)) {
1350 return;
1351 }
1352
1353 int32_t start = 0;
1354 int32_t len;
1355 UnicodeString nsName;
1356 UnicodeString ovrField;
1357 UBool moreToProcess = true;
1358 NSOverride *overrideList = nullptr;
1359
1360 while (moreToProcess) {
1361 int32_t delimiterPosition = str.indexOf((char16_t)ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE,start);
1362 if (delimiterPosition == -1) {
1363 moreToProcess = false;
1364 len = str.length() - start;
1365 } else {
1366 len = delimiterPosition - start;
1367 }
1368 UnicodeString currentString(str,start,len);
1369 int32_t equalSignPosition = currentString.indexOf((char16_t)ULOC_KEYWORD_ASSIGN_UNICODE,0);
1370 if (equalSignPosition == -1) { // Simple override string such as "hebrew"
1371 nsName.setTo(currentString);
1372 ovrField.setToBogus();
1373 } else { // Field specific override string such as "y=hebrew"
1374 nsName.setTo(currentString,equalSignPosition+1);
1375 ovrField.setTo(currentString,0,1); // We just need the first character.
1376 }
1377
1378 int32_t nsNameHash = nsName.hashCode();
1379 // See if the numbering system is in the override list, if not, then add it.
1380 NSOverride *curr = overrideList;
1381 const SharedNumberFormat *snf = nullptr;
1382 UBool found = false;
1383 while ( curr && !found ) {
1384 if ( curr->hash == nsNameHash ) {
1385 snf = curr->snf;
1386 found = true;
1387 }
1388 curr = curr->next;
1389 }
1390
1391 if (!found) {
1392 LocalPointer<NSOverride> cur(new NSOverride);
1393 if (!cur.isNull()) {
1394 char kw[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1395 uprv_strcpy(kw,"numbers=");
1396 nsName.extract(0,len,kw+8,ULOC_KEYWORD_AND_VALUES_CAPACITY-8,US_INV);
1397
1398 Locale ovrLoc(locale.getLanguage(),locale.getCountry(),locale.getVariant(),kw);
1399 cur->hash = nsNameHash;
1400 cur->next = overrideList;
1401 SharedObject::copyPtr(
1402 createSharedNumberFormat(ovrLoc, status), cur->snf);
1403 if (U_FAILURE(status)) {
1404 if (overrideList) {
1405 overrideList->free();
1406 }
1407 return;
1408 }
1409 snf = cur->snf;
1410 overrideList = cur.orphan();
1411 } else {
1412 status = U_MEMORY_ALLOCATION_ERROR;
1413 if (overrideList) {
1414 overrideList->free();
1415 }
1416 return;
1417 }
1418 }
1419
1420 // Now that we have an appropriate number formatter, fill in the appropriate spaces in the
1421 // number formatters table.
1422 if (ovrField.isBogus()) {
1423 switch (type) {
1424 case kOvrStrDate:
1425 case kOvrStrBoth: {
1426 for ( int8_t i=0 ; i<kDateFieldsCount; i++ ) {
1427 SharedObject::copyPtr(snf, fSharedNumberFormatters[kDateFields[i]]);
1428 }
1429 if (type==kOvrStrDate) {
1430 break;
1431 }
1432 U_FALLTHROUGH;
1433 }
1434 case kOvrStrTime : {
1435 for ( int8_t i=0 ; i<kTimeFieldsCount; i++ ) {
1436 SharedObject::copyPtr(snf, fSharedNumberFormatters[kTimeFields[i]]);
1437 }
1438 break;
1439 }
1440 }
1441 } else {
1442 // if the pattern character is unrecognized, signal an error and bail out
1443 UDateFormatField patternCharIndex =
1444 DateFormatSymbols::getPatternCharIndex(ovrField.charAt(0));
1445 if (patternCharIndex == UDAT_FIELD_COUNT) {
1446 status = U_INVALID_FORMAT_ERROR;
1447 if (overrideList) {
1448 overrideList->free();
1449 }
1450 return;
1451 }
1452 SharedObject::copyPtr(snf, fSharedNumberFormatters[patternCharIndex]);
1453 }
1454
1455 start = delimiterPosition + 1;
1456 }
1457 if (overrideList) {
1458 overrideList->free();
1459 }
1460 }
1461
1462 //---------------------------------------------------------------------
1463 void
subFormat(UnicodeString & appendTo,char16_t ch,int32_t count,UDisplayContext capitalizationContext,int32_t fieldNum,char16_t fieldToOutput,FieldPositionHandler & handler,Calendar & cal,UErrorCode & status) const1464 SimpleDateFormat::subFormat(UnicodeString &appendTo,
1465 char16_t ch,
1466 int32_t count,
1467 UDisplayContext capitalizationContext,
1468 int32_t fieldNum,
1469 char16_t fieldToOutput,
1470 FieldPositionHandler& handler,
1471 Calendar& cal,
1472 UErrorCode& status) const
1473 {
1474 if (U_FAILURE(status)) {
1475 return;
1476 }
1477
1478 // this function gets called by format() to produce the appropriate substitution
1479 // text for an individual pattern symbol (e.g., "HH" or "yyyy")
1480
1481 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch);
1482 const int32_t maxIntCount = 10;
1483 int32_t beginOffset = appendTo.length();
1484 const NumberFormat *currentNumberFormat;
1485 DateFormatSymbols::ECapitalizationContextUsageType capContextUsageType = DateFormatSymbols::kCapContextUsageOther;
1486
1487 UBool isHebrewCalendar = (uprv_strcmp(cal.getType(),"hebrew") == 0);
1488 UBool isChineseCalendar = (uprv_strcmp(cal.getType(),"chinese") == 0 || uprv_strcmp(cal.getType(),"dangi") == 0);
1489
1490 // if the pattern character is unrecognized, signal an error and dump out
1491 if (patternCharIndex == UDAT_FIELD_COUNT)
1492 {
1493 if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored
1494 status = U_INVALID_FORMAT_ERROR;
1495 }
1496 return;
1497 }
1498
1499 UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
1500 int32_t value = 0;
1501 // Don't get value unless it is useful
1502 if (field < UCAL_FIELD_COUNT) {
1503 value = (patternCharIndex != UDAT_RELATED_YEAR_FIELD)? cal.get(field, status): cal.getRelatedYear(status);
1504 }
1505 if (U_FAILURE(status)) {
1506 return;
1507 }
1508
1509 currentNumberFormat = getNumberFormatByIndex(patternCharIndex);
1510 if (currentNumberFormat == nullptr) {
1511 status = U_INTERNAL_PROGRAM_ERROR;
1512 return;
1513 }
1514 UnicodeString hebr("hebr", 4, US_INV);
1515
1516 switch (patternCharIndex) {
1517
1518 // for any "G" symbol, write out the appropriate era string
1519 // "GGGG" is wide era name, "GGGGG" is narrow era name, anything else is abbreviated name
1520 case UDAT_ERA_FIELD:
1521 if (isChineseCalendar) {
1522 zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, 9); // as in ICU4J
1523 } else {
1524 if (count == 5) {
1525 _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount);
1526 capContextUsageType = DateFormatSymbols::kCapContextUsageEraNarrow;
1527 } else if (count == 4) {
1528 _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount);
1529 capContextUsageType = DateFormatSymbols::kCapContextUsageEraWide;
1530 } else {
1531 _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount);
1532 capContextUsageType = DateFormatSymbols::kCapContextUsageEraAbbrev;
1533 }
1534 }
1535 break;
1536
1537 case UDAT_YEAR_NAME_FIELD:
1538 if (fSymbols->fShortYearNames != nullptr && value <= fSymbols->fShortYearNamesCount) {
1539 // the Calendar YEAR field runs 1 through 60 for cyclic years
1540 _appendSymbol(appendTo, value - 1, fSymbols->fShortYearNames, fSymbols->fShortYearNamesCount);
1541 break;
1542 }
1543 // else fall through to numeric year handling, do not break here
1544 U_FALLTHROUGH;
1545
1546 // OLD: for "yyyy", write out the whole year; for "yy", write out the last 2 digits
1547 // NEW: UTS#35:
1548 //Year y yy yyy yyyy yyyyy
1549 //AD 1 1 01 001 0001 00001
1550 //AD 12 12 12 012 0012 00012
1551 //AD 123 123 23 123 0123 00123
1552 //AD 1234 1234 34 1234 1234 01234
1553 //AD 12345 12345 45 12345 12345 12345
1554 case UDAT_YEAR_FIELD:
1555 case UDAT_YEAR_WOY_FIELD:
1556 if (fDateOverride.compare(hebr)==0 && value>HEBREW_CAL_CUR_MILLENIUM_START_YEAR && value<HEBREW_CAL_CUR_MILLENIUM_END_YEAR) {
1557 value-=HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
1558 }
1559 if(count == 2)
1560 zeroPaddingNumber(currentNumberFormat, appendTo, value, 2, 2);
1561 else
1562 zeroPaddingNumber(currentNumberFormat, appendTo, value, count, maxIntCount);
1563 break;
1564
1565 // for "MMMM"/"LLLL", write out the whole month name, for "MMM"/"LLL", write out the month
1566 // abbreviation, for "M"/"L" or "MM"/"LL", write out the month as a number with the
1567 // appropriate number of digits
1568 // for "MMMMM"/"LLLLL", use the narrow form
1569 case UDAT_MONTH_FIELD:
1570 case UDAT_STANDALONE_MONTH_FIELD:
1571 if ( isHebrewCalendar ) {
1572 HebrewCalendar *hc = (HebrewCalendar*)&cal;
1573 if (hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value == 6 && count >= 3 )
1574 value = 13; // Show alternate form for Adar II in leap years in Hebrew calendar.
1575 if (!hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6 && count < 3 )
1576 value--; // Adjust the month number down 1 in Hebrew non-leap years, i.e. Adar is 6, not 7.
1577 }
1578 {
1579 int32_t isLeapMonth = (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount)?
1580 cal.get(UCAL_IS_LEAP_MONTH, status): 0;
1581 // should consolidate the next section by using arrays of pointers & counts for the right symbols...
1582 if (count == 5) {
1583 if (patternCharIndex == UDAT_MONTH_FIELD) {
1584 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fNarrowMonths, fSymbols->fNarrowMonthsCount,
1585 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatNarrow]): nullptr, status);
1586 } else {
1587 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneNarrowMonths, fSymbols->fStandaloneNarrowMonthsCount,
1588 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneNarrow]): nullptr, status);
1589 }
1590 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthNarrow;
1591 } else if (count == 4) {
1592 if (patternCharIndex == UDAT_MONTH_FIELD) {
1593 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fMonths, fSymbols->fMonthsCount,
1594 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide]): nullptr, status);
1595 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthFormat;
1596 } else {
1597 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount,
1598 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide]): nullptr, status);
1599 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthStandalone;
1600 }
1601 } else if (count == 3) {
1602 if (patternCharIndex == UDAT_MONTH_FIELD) {
1603 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fShortMonths, fSymbols->fShortMonthsCount,
1604 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev]): nullptr, status);
1605 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthFormat;
1606 } else {
1607 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount,
1608 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev]): nullptr, status);
1609 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthStandalone;
1610 }
1611 } else {
1612 UnicodeString monthNumber;
1613 zeroPaddingNumber(currentNumberFormat,monthNumber, value + 1, count, maxIntCount);
1614 _appendSymbolWithMonthPattern(appendTo, 0, &monthNumber, 1,
1615 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric]): nullptr, status);
1616 }
1617 }
1618 break;
1619
1620 // for "k" and "kk", write out the hour, adjusting midnight to appear as "24"
1621 case UDAT_HOUR_OF_DAY1_FIELD:
1622 if (value == 0)
1623 zeroPaddingNumber(currentNumberFormat,appendTo, cal.getMaximum(UCAL_HOUR_OF_DAY) + 1, count, maxIntCount);
1624 else
1625 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1626 break;
1627
1628 case UDAT_FRACTIONAL_SECOND_FIELD:
1629 // Fractional seconds left-justify
1630 {
1631 int32_t minDigits = (count > 3) ? 3 : count;
1632 if (count == 1) {
1633 value /= 100;
1634 } else if (count == 2) {
1635 value /= 10;
1636 }
1637 zeroPaddingNumber(currentNumberFormat, appendTo, value, minDigits, maxIntCount);
1638 if (count > 3) {
1639 zeroPaddingNumber(currentNumberFormat, appendTo, 0, count - 3, maxIntCount);
1640 }
1641 }
1642 break;
1643
1644 // for "ee" or "e", use local numeric day-of-the-week
1645 // for "EEEEEE" or "eeeeee", write out the short day-of-the-week name
1646 // for "EEEEE" or "eeeee", write out the narrow day-of-the-week name
1647 // for "EEEE" or "eeee", write out the wide day-of-the-week name
1648 // for "EEE" or "EE" or "E" or "eee", write out the abbreviated day-of-the-week name
1649 case UDAT_DOW_LOCAL_FIELD:
1650 if ( count < 3 ) {
1651 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1652 break;
1653 }
1654 // fall through to EEEEE-EEE handling, but for that we don't want local day-of-week,
1655 // we want standard day-of-week, so first fix value to work for EEEEE-EEE.
1656 value = cal.get(UCAL_DAY_OF_WEEK, status);
1657 if (U_FAILURE(status)) {
1658 return;
1659 }
1660 // fall through, do not break here
1661 U_FALLTHROUGH;
1662 case UDAT_DAY_OF_WEEK_FIELD:
1663 if (count == 5) {
1664 _appendSymbol(appendTo, value, fSymbols->fNarrowWeekdays,
1665 fSymbols->fNarrowWeekdaysCount);
1666 capContextUsageType = DateFormatSymbols::kCapContextUsageDayNarrow;
1667 } else if (count == 4) {
1668 _appendSymbol(appendTo, value, fSymbols->fWeekdays,
1669 fSymbols->fWeekdaysCount);
1670 capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1671 } else if (count == 6) {
1672 _appendSymbol(appendTo, value, fSymbols->fShorterWeekdays,
1673 fSymbols->fShorterWeekdaysCount);
1674 capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1675 } else {
1676 _appendSymbol(appendTo, value, fSymbols->fShortWeekdays,
1677 fSymbols->fShortWeekdaysCount);
1678 capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1679 }
1680 break;
1681
1682 // for "ccc", write out the abbreviated day-of-the-week name
1683 // for "cccc", write out the wide day-of-the-week name
1684 // for "ccccc", use the narrow day-of-the-week name
1685 // for "ccccc", use the short day-of-the-week name
1686 case UDAT_STANDALONE_DAY_FIELD:
1687 if ( count < 3 ) {
1688 zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, maxIntCount);
1689 break;
1690 }
1691 // fall through to alpha DOW handling, but for that we don't want local day-of-week,
1692 // we want standard day-of-week, so first fix value.
1693 value = cal.get(UCAL_DAY_OF_WEEK, status);
1694 if (U_FAILURE(status)) {
1695 return;
1696 }
1697 if (count == 5) {
1698 _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowWeekdays,
1699 fSymbols->fStandaloneNarrowWeekdaysCount);
1700 capContextUsageType = DateFormatSymbols::kCapContextUsageDayNarrow;
1701 } else if (count == 4) {
1702 _appendSymbol(appendTo, value, fSymbols->fStandaloneWeekdays,
1703 fSymbols->fStandaloneWeekdaysCount);
1704 capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1705 } else if (count == 6) {
1706 _appendSymbol(appendTo, value, fSymbols->fStandaloneShorterWeekdays,
1707 fSymbols->fStandaloneShorterWeekdaysCount);
1708 capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1709 } else { // count == 3
1710 _appendSymbol(appendTo, value, fSymbols->fStandaloneShortWeekdays,
1711 fSymbols->fStandaloneShortWeekdaysCount);
1712 capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1713 }
1714 break;
1715
1716 // for "a" symbol, write out the whole AM/PM string
1717 case UDAT_AM_PM_FIELD:
1718 if (count < 5) {
1719 _appendSymbol(appendTo, value, fSymbols->fAmPms,
1720 fSymbols->fAmPmsCount);
1721 } else {
1722 _appendSymbol(appendTo, value, fSymbols->fNarrowAmPms,
1723 fSymbols->fNarrowAmPmsCount);
1724 }
1725 break;
1726
1727 // if we see pattern character for UDAT_TIME_SEPARATOR_FIELD (none currently defined),
1728 // write out the time separator string. Leave support in for future definition.
1729 case UDAT_TIME_SEPARATOR_FIELD:
1730 {
1731 UnicodeString separator;
1732 appendTo += fSymbols->getTimeSeparatorString(separator);
1733 }
1734 break;
1735
1736 // for "h" and "hh", write out the hour, adjusting noon and midnight to show up
1737 // as "12"
1738 case UDAT_HOUR1_FIELD:
1739 if (value == 0)
1740 zeroPaddingNumber(currentNumberFormat,appendTo, cal.getLeastMaximum(UCAL_HOUR) + 1, count, maxIntCount);
1741 else
1742 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1743 break;
1744
1745 case UDAT_TIMEZONE_FIELD: // 'z'
1746 case UDAT_TIMEZONE_RFC_FIELD: // 'Z'
1747 case UDAT_TIMEZONE_GENERIC_FIELD: // 'v'
1748 case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V'
1749 case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: // 'O'
1750 case UDAT_TIMEZONE_ISO_FIELD: // 'X'
1751 case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x'
1752 {
1753 char16_t zsbuf[ZONE_NAME_U16_MAX];
1754 UnicodeString zoneString(zsbuf, 0, UPRV_LENGTHOF(zsbuf));
1755 const TimeZone& tz = cal.getTimeZone();
1756 UDate date = cal.getTime(status);
1757 const TimeZoneFormat *tzfmt = tzFormat(status);
1758 if (U_SUCCESS(status)) {
1759 if (patternCharIndex == UDAT_TIMEZONE_FIELD) {
1760 if (count < 4) {
1761 // "z", "zz", "zzz"
1762 tzfmt->format(UTZFMT_STYLE_SPECIFIC_SHORT, tz, date, zoneString);
1763 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneShort;
1764 } else {
1765 // "zzzz" or longer
1766 tzfmt->format(UTZFMT_STYLE_SPECIFIC_LONG, tz, date, zoneString);
1767 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong;
1768 }
1769 }
1770 else if (patternCharIndex == UDAT_TIMEZONE_RFC_FIELD) {
1771 if (count < 4) {
1772 // "Z"
1773 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString);
1774 } else if (count == 5) {
1775 // "ZZZZZ"
1776 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString);
1777 } else {
1778 // "ZZ", "ZZZ", "ZZZZ"
1779 tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString);
1780 }
1781 }
1782 else if (patternCharIndex == UDAT_TIMEZONE_GENERIC_FIELD) {
1783 if (count == 1) {
1784 // "v"
1785 tzfmt->format(UTZFMT_STYLE_GENERIC_SHORT, tz, date, zoneString);
1786 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneShort;
1787 } else if (count == 4) {
1788 // "vvvv"
1789 tzfmt->format(UTZFMT_STYLE_GENERIC_LONG, tz, date, zoneString);
1790 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong;
1791 }
1792 }
1793 else if (patternCharIndex == UDAT_TIMEZONE_SPECIAL_FIELD) {
1794 if (count == 1) {
1795 // "V"
1796 tzfmt->format(UTZFMT_STYLE_ZONE_ID_SHORT, tz, date, zoneString);
1797 } else if (count == 2) {
1798 // "VV"
1799 tzfmt->format(UTZFMT_STYLE_ZONE_ID, tz, date, zoneString);
1800 } else if (count == 3) {
1801 // "VVV"
1802 tzfmt->format(UTZFMT_STYLE_EXEMPLAR_LOCATION, tz, date, zoneString);
1803 } else if (count == 4) {
1804 // "VVVV"
1805 tzfmt->format(UTZFMT_STYLE_GENERIC_LOCATION, tz, date, zoneString);
1806 capContextUsageType = DateFormatSymbols::kCapContextUsageZoneLong;
1807 }
1808 }
1809 else if (patternCharIndex == UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD) {
1810 if (count == 1) {
1811 // "O"
1812 tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT_SHORT, tz, date, zoneString);
1813 } else if (count == 4) {
1814 // "OOOO"
1815 tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString);
1816 }
1817 }
1818 else if (patternCharIndex == UDAT_TIMEZONE_ISO_FIELD) {
1819 if (count == 1) {
1820 // "X"
1821 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_SHORT, tz, date, zoneString);
1822 } else if (count == 2) {
1823 // "XX"
1824 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_FIXED, tz, date, zoneString);
1825 } else if (count == 3) {
1826 // "XXX"
1827 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FIXED, tz, date, zoneString);
1828 } else if (count == 4) {
1829 // "XXXX"
1830 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_FULL, tz, date, zoneString);
1831 } else if (count == 5) {
1832 // "XXXXX"
1833 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString);
1834 }
1835 }
1836 else if (patternCharIndex == UDAT_TIMEZONE_ISO_LOCAL_FIELD) {
1837 if (count == 1) {
1838 // "x"
1839 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT, tz, date, zoneString);
1840 } else if (count == 2) {
1841 // "xx"
1842 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED, tz, date, zoneString);
1843 } else if (count == 3) {
1844 // "xxx"
1845 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED, tz, date, zoneString);
1846 } else if (count == 4) {
1847 // "xxxx"
1848 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString);
1849 } else if (count == 5) {
1850 // "xxxxx"
1851 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL, tz, date, zoneString);
1852 }
1853 }
1854 else {
1855 UPRV_UNREACHABLE_EXIT;
1856 }
1857 }
1858 appendTo += zoneString;
1859 }
1860 break;
1861
1862 case UDAT_QUARTER_FIELD:
1863 if (count >= 5)
1864 _appendSymbol(appendTo, value/3, fSymbols->fNarrowQuarters,
1865 fSymbols->fNarrowQuartersCount);
1866 else if (count == 4)
1867 _appendSymbol(appendTo, value/3, fSymbols->fQuarters,
1868 fSymbols->fQuartersCount);
1869 else if (count == 3)
1870 _appendSymbol(appendTo, value/3, fSymbols->fShortQuarters,
1871 fSymbols->fShortQuartersCount);
1872 else
1873 zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount);
1874 break;
1875
1876 case UDAT_STANDALONE_QUARTER_FIELD:
1877 if (count >= 5)
1878 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneNarrowQuarters,
1879 fSymbols->fStandaloneNarrowQuartersCount);
1880 else if (count == 4)
1881 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneQuarters,
1882 fSymbols->fStandaloneQuartersCount);
1883 else if (count == 3)
1884 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneShortQuarters,
1885 fSymbols->fStandaloneShortQuartersCount);
1886 else
1887 zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount);
1888 break;
1889
1890 case UDAT_AM_PM_MIDNIGHT_NOON_FIELD:
1891 {
1892 const UnicodeString *toAppend = nullptr;
1893 int32_t hour = cal.get(UCAL_HOUR_OF_DAY, status);
1894
1895 // Note: "midnight" can be ambiguous as to whether it refers to beginning of day or end of day.
1896 // For ICU 57 output of "midnight" is temporarily suppressed.
1897
1898 // For "midnight" and "noon":
1899 // Time, as displayed, must be exactly noon or midnight.
1900 // This means minutes and seconds, if present, must be zero.
1901 if ((/*hour == 0 ||*/ hour == 12) &&
1902 (!fHasMinute || cal.get(UCAL_MINUTE, status) == 0) &&
1903 (!fHasSecond || cal.get(UCAL_SECOND, status) == 0)) {
1904 // Stealing am/pm value to use as our array index.
1905 // It works out: am/midnight are both 0, pm/noon are both 1,
1906 // 12 am is 12 midnight, and 12 pm is 12 noon.
1907 int32_t val = cal.get(UCAL_AM_PM, status);
1908
1909 if (count <= 3) {
1910 toAppend = &fSymbols->fAbbreviatedDayPeriods[val];
1911 } else if (count == 4 || count > 5) {
1912 toAppend = &fSymbols->fWideDayPeriods[val];
1913 } else { // count == 5
1914 toAppend = &fSymbols->fNarrowDayPeriods[val];
1915 }
1916 }
1917
1918 // toAppend is nullptr if time isn't exactly midnight or noon (as displayed).
1919 // toAppend is bogus if time is midnight or noon, but no localized string exists.
1920 // In either case, fall back to am/pm.
1921 if (toAppend == nullptr || toAppend->isBogus()) {
1922 // Reformat with identical arguments except ch, now changed to 'a'.
1923 // We are passing a different fieldToOutput because we want to add
1924 // 'b' to field position. This makes this fallback stable when
1925 // there is a data change on locales.
1926 subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'b', handler, cal, status);
1927 return;
1928 } else {
1929 appendTo += *toAppend;
1930 }
1931
1932 break;
1933 }
1934
1935 case UDAT_FLEXIBLE_DAY_PERIOD_FIELD:
1936 {
1937 // TODO: Maybe fetch the DayperiodRules during initialization (instead of at the first
1938 // loading of an instance) if a relevant pattern character (b or B) is used.
1939 const DayPeriodRules *ruleSet = DayPeriodRules::getInstance(this->getSmpFmtLocale(), status);
1940 if (U_FAILURE(status)) {
1941 // Data doesn't conform to spec, therefore loading failed.
1942 break;
1943 }
1944 if (ruleSet == nullptr) {
1945 // Data doesn't exist for the locale we're looking for.
1946 // Falling back to am/pm.
1947 // We are passing a different fieldToOutput because we want to add
1948 // 'B' to field position. This makes this fallback stable when
1949 // there is a data change on locales.
1950 subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'B', handler, cal, status);
1951 return;
1952 }
1953
1954 // Get current display time.
1955 int32_t hour = cal.get(UCAL_HOUR_OF_DAY, status);
1956 int32_t minute = 0;
1957 if (fHasMinute) {
1958 minute = cal.get(UCAL_MINUTE, status);
1959 }
1960 int32_t second = 0;
1961 if (fHasSecond) {
1962 second = cal.get(UCAL_SECOND, status);
1963 }
1964
1965 // Determine day period.
1966 DayPeriodRules::DayPeriod periodType;
1967 if (hour == 0 && minute == 0 && second == 0 && ruleSet->hasMidnight()) {
1968 periodType = DayPeriodRules::DAYPERIOD_MIDNIGHT;
1969 } else if (hour == 12 && minute == 0 && second == 0 && ruleSet->hasNoon()) {
1970 periodType = DayPeriodRules::DAYPERIOD_NOON;
1971 } else {
1972 periodType = ruleSet->getDayPeriodForHour(hour);
1973 }
1974
1975 // Rule set exists, therefore periodType can't be UNKNOWN.
1976 // Get localized string.
1977 U_ASSERT(periodType != DayPeriodRules::DAYPERIOD_UNKNOWN);
1978 UnicodeString *toAppend = nullptr;
1979 int32_t index;
1980
1981 // Note: "midnight" can be ambiguous as to whether it refers to beginning of day or end of day.
1982 // For ICU 57 output of "midnight" is temporarily suppressed.
1983
1984 if (periodType != DayPeriodRules::DAYPERIOD_AM &&
1985 periodType != DayPeriodRules::DAYPERIOD_PM &&
1986 periodType != DayPeriodRules::DAYPERIOD_MIDNIGHT) {
1987 index = (int32_t)periodType;
1988 if (count <= 3) {
1989 toAppend = &fSymbols->fAbbreviatedDayPeriods[index]; // i.e. short
1990 } else if (count == 4 || count > 5) {
1991 toAppend = &fSymbols->fWideDayPeriods[index];
1992 } else { // count == 5
1993 toAppend = &fSymbols->fNarrowDayPeriods[index];
1994 }
1995 }
1996
1997 // Fallback schedule:
1998 // Midnight/Noon -> General Periods -> AM/PM.
1999
2000 // Midnight/Noon -> General Periods.
2001 if ((toAppend == nullptr || toAppend->isBogus()) &&
2002 (periodType == DayPeriodRules::DAYPERIOD_MIDNIGHT ||
2003 periodType == DayPeriodRules::DAYPERIOD_NOON)) {
2004 periodType = ruleSet->getDayPeriodForHour(hour);
2005 index = (int32_t)periodType;
2006
2007 if (count <= 3) {
2008 toAppend = &fSymbols->fAbbreviatedDayPeriods[index]; // i.e. short
2009 } else if (count == 4 || count > 5) {
2010 toAppend = &fSymbols->fWideDayPeriods[index];
2011 } else { // count == 5
2012 toAppend = &fSymbols->fNarrowDayPeriods[index];
2013 }
2014 }
2015
2016 // General Periods -> AM/PM.
2017 if (periodType == DayPeriodRules::DAYPERIOD_AM ||
2018 periodType == DayPeriodRules::DAYPERIOD_PM ||
2019 toAppend->isBogus()) {
2020 // We are passing a different fieldToOutput because we want to add
2021 // 'B' to field position iterator. This makes this fallback stable when
2022 // there is a data change on locales.
2023 subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'B', handler, cal, status);
2024 return;
2025 }
2026 else {
2027 appendTo += *toAppend;
2028 }
2029
2030 break;
2031 }
2032
2033 // all of the other pattern symbols can be formatted as simple numbers with
2034 // appropriate zero padding
2035 default:
2036 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
2037 break;
2038 }
2039 #if !UCONFIG_NO_BREAK_ITERATION
2040 // if first field, check to see whether we need to and are able to titlecase it
2041 if (fieldNum == 0 && fCapitalizationBrkIter != nullptr && appendTo.length() > beginOffset &&
2042 u_islower(appendTo.char32At(beginOffset))) {
2043 UBool titlecase = false;
2044 switch (capitalizationContext) {
2045 case UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE:
2046 titlecase = true;
2047 break;
2048 case UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU:
2049 titlecase = fSymbols->fCapitalization[capContextUsageType][0];
2050 break;
2051 case UDISPCTX_CAPITALIZATION_FOR_STANDALONE:
2052 titlecase = fSymbols->fCapitalization[capContextUsageType][1];
2053 break;
2054 default:
2055 // titlecase = false;
2056 break;
2057 }
2058 if (titlecase) {
2059 BreakIterator* const mutableCapitalizationBrkIter = fCapitalizationBrkIter->clone();
2060 UnicodeString firstField(appendTo, beginOffset);
2061 firstField.toTitle(mutableCapitalizationBrkIter, fLocale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
2062 appendTo.replaceBetween(beginOffset, appendTo.length(), firstField);
2063 delete mutableCapitalizationBrkIter;
2064 }
2065 }
2066 #endif
2067
2068 handler.addAttribute(DateFormatSymbols::getPatternCharIndex(fieldToOutput), beginOffset, appendTo.length());
2069 }
2070
2071 //----------------------------------------------------------------------
2072
adoptNumberFormat(NumberFormat * formatToAdopt)2073 void SimpleDateFormat::adoptNumberFormat(NumberFormat *formatToAdopt) {
2074 // Null out the fast formatter, it references fNumberFormat which we're
2075 // about to invalidate
2076 delete fSimpleNumberFormatter;
2077 fSimpleNumberFormatter = nullptr;
2078
2079 fixNumberFormatForDates(*formatToAdopt);
2080 delete fNumberFormat;
2081 fNumberFormat = formatToAdopt;
2082
2083 // We successfully set the default number format. Now delete the overrides
2084 // (can't fail).
2085 if (fSharedNumberFormatters) {
2086 freeSharedNumberFormatters(fSharedNumberFormatters);
2087 fSharedNumberFormatters = nullptr;
2088 }
2089
2090 // Recompute fSimpleNumberFormatter if necessary
2091 UErrorCode localStatus = U_ZERO_ERROR;
2092 initSimpleNumberFormatter(localStatus);
2093 }
2094
adoptNumberFormat(const UnicodeString & fields,NumberFormat * formatToAdopt,UErrorCode & status)2095 void SimpleDateFormat::adoptNumberFormat(const UnicodeString& fields, NumberFormat *formatToAdopt, UErrorCode &status){
2096 fixNumberFormatForDates(*formatToAdopt);
2097 LocalPointer<NumberFormat> fmt(formatToAdopt);
2098 if (U_FAILURE(status)) {
2099 return;
2100 }
2101
2102 // We must ensure fSharedNumberFormatters is allocated.
2103 if (fSharedNumberFormatters == nullptr) {
2104 fSharedNumberFormatters = allocSharedNumberFormatters();
2105 if (fSharedNumberFormatters == nullptr) {
2106 status = U_MEMORY_ALLOCATION_ERROR;
2107 return;
2108 }
2109 }
2110 const SharedNumberFormat *newFormat = createSharedNumberFormat(fmt.orphan());
2111 if (newFormat == nullptr) {
2112 status = U_MEMORY_ALLOCATION_ERROR;
2113 return;
2114 }
2115 for (int i=0; i<fields.length(); i++) {
2116 char16_t field = fields.charAt(i);
2117 // if the pattern character is unrecognized, signal an error and bail out
2118 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(field);
2119 if (patternCharIndex == UDAT_FIELD_COUNT) {
2120 status = U_INVALID_FORMAT_ERROR;
2121 newFormat->deleteIfZeroRefCount();
2122 return;
2123 }
2124
2125 // Set the number formatter in the table
2126 SharedObject::copyPtr(
2127 newFormat, fSharedNumberFormatters[patternCharIndex]);
2128 }
2129 newFormat->deleteIfZeroRefCount();
2130 }
2131
2132 const NumberFormat *
getNumberFormatForField(char16_t field) const2133 SimpleDateFormat::getNumberFormatForField(char16_t field) const {
2134 UDateFormatField index = DateFormatSymbols::getPatternCharIndex(field);
2135 if (index == UDAT_FIELD_COUNT) {
2136 return nullptr;
2137 }
2138 return getNumberFormatByIndex(index);
2139 }
2140
2141 //----------------------------------------------------------------------
2142 void
zeroPaddingNumber(const NumberFormat * currentNumberFormat,UnicodeString & appendTo,int32_t value,int32_t minDigits,int32_t maxDigits) const2143 SimpleDateFormat::zeroPaddingNumber(
2144 const NumberFormat *currentNumberFormat,
2145 UnicodeString &appendTo,
2146 int32_t value, int32_t minDigits, int32_t maxDigits) const
2147 {
2148
2149 if (currentNumberFormat == fNumberFormat && fSimpleNumberFormatter) {
2150 // Can use fast path
2151 UErrorCode localStatus = U_ZERO_ERROR;
2152 number::SimpleNumber number = number::SimpleNumber::forInt64(value, localStatus);
2153 number.setMinimumIntegerDigits(minDigits, localStatus);
2154 number.truncateStart(maxDigits, localStatus);
2155
2156 number::FormattedNumber result = fSimpleNumberFormatter->format(std::move(number), localStatus);
2157 if (U_FAILURE(localStatus)) {
2158 return;
2159 }
2160 appendTo.append(result.toTempString(localStatus));
2161 return;
2162 }
2163
2164 // Check for RBNF (no clone necessary)
2165 auto* rbnf = dynamic_cast<const RuleBasedNumberFormat*>(currentNumberFormat);
2166 if (rbnf != nullptr) {
2167 FieldPosition pos(FieldPosition::DONT_CARE);
2168 rbnf->format(value, appendTo, pos); // 3rd arg is there to speed up processing
2169 return;
2170 }
2171
2172 // Fall back to slow path (clone and mutate the NumberFormat)
2173 if (currentNumberFormat != nullptr) {
2174 FieldPosition pos(FieldPosition::DONT_CARE);
2175 LocalPointer<NumberFormat> nf(currentNumberFormat->clone());
2176 nf->setMinimumIntegerDigits(minDigits);
2177 nf->setMaximumIntegerDigits(maxDigits);
2178 nf->format(value, appendTo, pos); // 3rd arg is there to speed up processing
2179 }
2180 }
2181
2182 //----------------------------------------------------------------------
2183
2184 /**
2185 * Return true if the given format character, occurring count
2186 * times, represents a numeric field.
2187 */
isNumeric(char16_t formatChar,int32_t count)2188 UBool SimpleDateFormat::isNumeric(char16_t formatChar, int32_t count) {
2189 return DateFormatSymbols::isNumericPatternChar(formatChar, count);
2190 }
2191
2192 UBool
isAtNumericField(const UnicodeString & pattern,int32_t patternOffset)2193 SimpleDateFormat::isAtNumericField(const UnicodeString &pattern, int32_t patternOffset) {
2194 if (patternOffset >= pattern.length()) {
2195 // not at any field
2196 return false;
2197 }
2198 char16_t ch = pattern.charAt(patternOffset);
2199 UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch);
2200 if (f == UDAT_FIELD_COUNT) {
2201 // not at any field
2202 return false;
2203 }
2204 int32_t i = patternOffset;
2205 while (pattern.charAt(++i) == ch) {}
2206 return DateFormatSymbols::isNumericField(f, i - patternOffset);
2207 }
2208
2209 UBool
isAfterNonNumericField(const UnicodeString & pattern,int32_t patternOffset)2210 SimpleDateFormat::isAfterNonNumericField(const UnicodeString &pattern, int32_t patternOffset) {
2211 if (patternOffset <= 0) {
2212 // not after any field
2213 return false;
2214 }
2215 char16_t ch = pattern.charAt(--patternOffset);
2216 UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch);
2217 if (f == UDAT_FIELD_COUNT) {
2218 // not after any field
2219 return false;
2220 }
2221 int32_t i = patternOffset;
2222 while (pattern.charAt(--i) == ch) {}
2223 return !DateFormatSymbols::isNumericField(f, patternOffset - i);
2224 }
2225
2226 void
parse(const UnicodeString & text,Calendar & cal,ParsePosition & parsePos) const2227 SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& parsePos) const
2228 {
2229 UErrorCode status = U_ZERO_ERROR;
2230 int32_t pos = parsePos.getIndex();
2231 if(parsePos.getIndex() < 0) {
2232 parsePos.setErrorIndex(0);
2233 return;
2234 }
2235 int32_t start = pos;
2236
2237 // Hold the day period until everything else is parsed, because we need
2238 // the hour to interpret time correctly.
2239 int32_t dayPeriodInt = -1;
2240
2241 UBool ambiguousYear[] = { false };
2242 int32_t saveHebrewMonth = -1;
2243 int32_t count = 0;
2244 UTimeZoneFormatTimeType tzTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
2245
2246 // For parsing abutting numeric fields. 'abutPat' is the
2247 // offset into 'pattern' of the first of 2 or more abutting
2248 // numeric fields. 'abutStart' is the offset into 'text'
2249 // where parsing the fields begins. 'abutPass' starts off as 0
2250 // and increments each time we try to parse the fields.
2251 int32_t abutPat = -1; // If >=0, we are in a run of abutting numeric fields
2252 int32_t abutStart = 0;
2253 int32_t abutPass = 0;
2254 UBool inQuote = false;
2255
2256 MessageFormat * numericLeapMonthFormatter = nullptr;
2257
2258 Calendar* calClone = nullptr;
2259 Calendar *workCal = &cal;
2260 if (&cal != fCalendar && uprv_strcmp(cal.getType(), fCalendar->getType()) != 0) {
2261 // Different calendar type
2262 // We use the time/zone from the input calendar, but
2263 // do not use the input calendar for field calculation.
2264 calClone = fCalendar->clone();
2265 if (calClone != nullptr) {
2266 calClone->setTime(cal.getTime(status),status);
2267 if (U_FAILURE(status)) {
2268 goto ExitParse;
2269 }
2270 calClone->setTimeZone(cal.getTimeZone());
2271 workCal = calClone;
2272 } else {
2273 status = U_MEMORY_ALLOCATION_ERROR;
2274 goto ExitParse;
2275 }
2276 }
2277
2278 if (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) {
2279 numericLeapMonthFormatter = new MessageFormat(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric], fLocale, status);
2280 if (numericLeapMonthFormatter == nullptr) {
2281 status = U_MEMORY_ALLOCATION_ERROR;
2282 goto ExitParse;
2283 } else if (U_FAILURE(status)) {
2284 goto ExitParse; // this will delete numericLeapMonthFormatter
2285 }
2286 }
2287
2288 for (int32_t i=0; i<fPattern.length(); ++i) {
2289 char16_t ch = fPattern.charAt(i);
2290
2291 // Handle alphabetic field characters.
2292 if (!inQuote && isSyntaxChar(ch)) {
2293 int32_t fieldPat = i;
2294
2295 // Count the length of this field specifier
2296 count = 1;
2297 while ((i+1)<fPattern.length() &&
2298 fPattern.charAt(i+1) == ch) {
2299 ++count;
2300 ++i;
2301 }
2302
2303 if (isNumeric(ch, count)) {
2304 if (abutPat < 0) {
2305 // Determine if there is an abutting numeric field.
2306 // Record the start of a set of abutting numeric fields.
2307 if (isAtNumericField(fPattern, i + 1)) {
2308 abutPat = fieldPat;
2309 abutStart = pos;
2310 abutPass = 0;
2311 }
2312 }
2313 } else {
2314 abutPat = -1; // End of any abutting fields
2315 }
2316
2317 // Handle fields within a run of abutting numeric fields. Take
2318 // the pattern "HHmmss" as an example. We will try to parse
2319 // 2/2/2 characters of the input text, then if that fails,
2320 // 1/2/2. We only adjust the width of the leftmost field; the
2321 // others remain fixed. This allows "123456" => 12:34:56, but
2322 // "12345" => 1:23:45. Likewise, for the pattern "yyyyMMdd" we
2323 // try 4/2/2, 3/2/2, 2/2/2, and finally 1/2/2.
2324 if (abutPat >= 0) {
2325 // If we are at the start of a run of abutting fields, then
2326 // shorten this field in each pass. If we can't shorten
2327 // this field any more, then the parse of this set of
2328 // abutting numeric fields has failed.
2329 if (fieldPat == abutPat) {
2330 count -= abutPass++;
2331 if (count == 0) {
2332 status = U_PARSE_ERROR;
2333 goto ExitParse;
2334 }
2335 }
2336
2337 pos = subParse(text, pos, ch, count,
2338 true, false, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter, &tzTimeType);
2339
2340 // If the parse fails anywhere in the run, back up to the
2341 // start of the run and retry.
2342 if (pos < 0) {
2343 i = abutPat - 1;
2344 pos = abutStart;
2345 continue;
2346 }
2347 }
2348
2349 // Handle non-numeric fields and non-abutting numeric
2350 // fields.
2351 else if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored
2352 int32_t s = subParse(text, pos, ch, count,
2353 false, true, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter, &tzTimeType, &dayPeriodInt);
2354
2355 if (s == -pos-1) {
2356 // era not present, in special cases allow this to continue
2357 // from the position where the era was expected
2358 s = pos;
2359
2360 if (i+1 < fPattern.length()) {
2361 // move to next pattern character
2362 char16_t c = fPattern.charAt(i+1);
2363
2364 // check for whitespace
2365 if (PatternProps::isWhiteSpace(c)) {
2366 i++;
2367 // Advance over run in pattern
2368 while ((i+1)<fPattern.length() &&
2369 PatternProps::isWhiteSpace(fPattern.charAt(i+1))) {
2370 ++i;
2371 }
2372 }
2373 }
2374 }
2375 else if (s <= 0) {
2376 status = U_PARSE_ERROR;
2377 goto ExitParse;
2378 }
2379 pos = s;
2380 }
2381 }
2382
2383 // Handle literal pattern characters. These are any
2384 // quoted characters and non-alphabetic unquoted
2385 // characters.
2386 else {
2387
2388 abutPat = -1; // End of any abutting fields
2389
2390 if (! matchLiterals(fPattern, i, text, pos, getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status), getBooleanAttribute(UDAT_PARSE_PARTIAL_LITERAL_MATCH, status), isLenient())) {
2391 status = U_PARSE_ERROR;
2392 goto ExitParse;
2393 }
2394 }
2395 }
2396
2397 // Special hack for trailing "." after non-numeric field.
2398 if (text.charAt(pos) == 0x2e && getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) {
2399 // only do if the last field is not numeric
2400 if (isAfterNonNumericField(fPattern, fPattern.length())) {
2401 pos++; // skip the extra "."
2402 }
2403 }
2404
2405 // If dayPeriod is set, use it in conjunction with hour-of-day to determine am/pm.
2406 if (dayPeriodInt >= 0) {
2407 DayPeriodRules::DayPeriod dayPeriod = (DayPeriodRules::DayPeriod)dayPeriodInt;
2408 const DayPeriodRules *ruleSet = DayPeriodRules::getInstance(this->getSmpFmtLocale(), status);
2409
2410 if (!cal.isSet(UCAL_HOUR) && !cal.isSet(UCAL_HOUR_OF_DAY)) {
2411 // If hour is not set, set time to the midpoint of current day period, overwriting
2412 // minutes if it's set.
2413 double midPoint = ruleSet->getMidPointForDayPeriod(dayPeriod, status);
2414
2415 // If we can't get midPoint we do nothing.
2416 if (U_SUCCESS(status)) {
2417 // Truncate midPoint toward zero to get the hour.
2418 // Any leftover means it was a half-hour.
2419 int32_t midPointHour = (int32_t) midPoint;
2420 int32_t midPointMinute = (midPoint - midPointHour) > 0 ? 30 : 0;
2421
2422 // No need to set am/pm because hour-of-day is set last therefore takes precedence.
2423 cal.set(UCAL_HOUR_OF_DAY, midPointHour);
2424 cal.set(UCAL_MINUTE, midPointMinute);
2425 }
2426 } else {
2427 int hourOfDay;
2428
2429 if (cal.isSet(UCAL_HOUR_OF_DAY)) { // Hour is parsed in 24-hour format.
2430 hourOfDay = cal.get(UCAL_HOUR_OF_DAY, status);
2431 } else { // Hour is parsed in 12-hour format.
2432 hourOfDay = cal.get(UCAL_HOUR, status);
2433 // cal.get() turns 12 to 0 for 12-hour time; change 0 to 12
2434 // so 0 unambiguously means a 24-hour time from above.
2435 if (hourOfDay == 0) { hourOfDay = 12; }
2436 }
2437 U_ASSERT(0 <= hourOfDay && hourOfDay <= 23);
2438
2439
2440 // If hour-of-day is 0 or 13 thru 23 then input time in unambiguously in 24-hour format.
2441 if (hourOfDay == 0 || (13 <= hourOfDay && hourOfDay <= 23)) {
2442 // Make hour-of-day take precedence over (hour + am/pm) by setting it again.
2443 cal.set(UCAL_HOUR_OF_DAY, hourOfDay);
2444 } else {
2445 // We have a 12-hour time and need to choose between am and pm.
2446 // Behave as if dayPeriod spanned 6 hours each way from its center point.
2447 // This will parse correctly for consistent time + period (e.g. 10 at night) as
2448 // well as provide a reasonable recovery for inconsistent time + period (e.g.
2449 // 9 in the afternoon).
2450
2451 // Assume current time is in the AM.
2452 // - Change 12 back to 0 for easier handling of 12am.
2453 // - Append minutes as fractional hours because e.g. 8:15 and 8:45 could be parsed
2454 // into different half-days if center of dayPeriod is at 14:30.
2455 // - cal.get(MINUTE) will return 0 if MINUTE is unset, which works.
2456 if (hourOfDay == 12) { hourOfDay = 0; }
2457 double currentHour = hourOfDay + (cal.get(UCAL_MINUTE, status)) / 60.0;
2458 double midPointHour = ruleSet->getMidPointForDayPeriod(dayPeriod, status);
2459
2460 if (U_SUCCESS(status)) {
2461 double hoursAheadMidPoint = currentHour - midPointHour;
2462
2463 // Assume current time is in the AM.
2464 if (-6 <= hoursAheadMidPoint && hoursAheadMidPoint < 6) {
2465 // Assumption holds; set time as such.
2466 cal.set(UCAL_AM_PM, 0);
2467 } else {
2468 cal.set(UCAL_AM_PM, 1);
2469 }
2470 }
2471 }
2472 }
2473 }
2474
2475 // At this point the fields of Calendar have been set. Calendar
2476 // will fill in default values for missing fields when the time
2477 // is computed.
2478
2479 parsePos.setIndex(pos);
2480
2481 // This part is a problem: When we call parsedDate.after, we compute the time.
2482 // Take the date April 3 2004 at 2:30 am. When this is first set up, the year
2483 // will be wrong if we're parsing a 2-digit year pattern. It will be 1904.
2484 // April 3 1904 is a Sunday (unlike 2004) so it is the DST onset day. 2:30 am
2485 // is therefore an "impossible" time, since the time goes from 1:59 to 3:00 am
2486 // on that day. It is therefore parsed out to fields as 3:30 am. Then we
2487 // add 100 years, and get April 3 2004 at 3:30 am. Note that April 3 2004 is
2488 // a Saturday, so it can have a 2:30 am -- and it should. [LIU]
2489 /*
2490 UDate parsedDate = calendar.getTime();
2491 if( ambiguousYear[0] && !parsedDate.after(fDefaultCenturyStart) ) {
2492 calendar.add(Calendar.YEAR, 100);
2493 parsedDate = calendar.getTime();
2494 }
2495 */
2496 // Because of the above condition, save off the fields in case we need to readjust.
2497 // The procedure we use here is not particularly efficient, but there is no other
2498 // way to do this given the API restrictions present in Calendar. We minimize
2499 // inefficiency by only performing this computation when it might apply, that is,
2500 // when the two-digit year is equal to the start year, and thus might fall at the
2501 // front or the back of the default century. This only works because we adjust
2502 // the year correctly to start with in other cases -- see subParse().
2503 if (ambiguousYear[0] || tzTimeType != UTZFMT_TIME_TYPE_UNKNOWN) // If this is true then the two-digit year == the default start year
2504 {
2505 // We need a copy of the fields, and we need to avoid triggering a call to
2506 // complete(), which will recalculate the fields. Since we can't access
2507 // the fields[] array in Calendar, we clone the entire object. This will
2508 // stop working if Calendar.clone() is ever rewritten to call complete().
2509 Calendar *copy;
2510 if (ambiguousYear[0]) {
2511 copy = cal.clone();
2512 // Check for failed cloning.
2513 if (copy == nullptr) {
2514 status = U_MEMORY_ALLOCATION_ERROR;
2515 goto ExitParse;
2516 }
2517 UDate parsedDate = copy->getTime(status);
2518 // {sfb} check internalGetDefaultCenturyStart
2519 if (fHaveDefaultCentury && (parsedDate < fDefaultCenturyStart)) {
2520 // We can't use add here because that does a complete() first.
2521 cal.set(UCAL_YEAR, fDefaultCenturyStartYear + 100);
2522 }
2523 delete copy;
2524 }
2525
2526 if (tzTimeType != UTZFMT_TIME_TYPE_UNKNOWN) {
2527 copy = cal.clone();
2528 // Check for failed cloning.
2529 if (copy == nullptr) {
2530 status = U_MEMORY_ALLOCATION_ERROR;
2531 goto ExitParse;
2532 }
2533 const TimeZone & tz = cal.getTimeZone();
2534 BasicTimeZone *btz = nullptr;
2535
2536 if (dynamic_cast<const OlsonTimeZone *>(&tz) != nullptr
2537 || dynamic_cast<const SimpleTimeZone *>(&tz) != nullptr
2538 || dynamic_cast<const RuleBasedTimeZone *>(&tz) != nullptr
2539 || dynamic_cast<const VTimeZone *>(&tz) != nullptr) {
2540 btz = (BasicTimeZone*)&tz;
2541 }
2542
2543 // Get local millis
2544 copy->set(UCAL_ZONE_OFFSET, 0);
2545 copy->set(UCAL_DST_OFFSET, 0);
2546 UDate localMillis = copy->getTime(status);
2547
2548 // Make sure parsed time zone type (Standard or Daylight)
2549 // matches the rule used by the parsed time zone.
2550 int32_t raw, dst;
2551 if (btz != nullptr) {
2552 if (tzTimeType == UTZFMT_TIME_TYPE_STANDARD) {
2553 btz->getOffsetFromLocal(localMillis,
2554 UCAL_TZ_LOCAL_STANDARD_FORMER, UCAL_TZ_LOCAL_STANDARD_LATTER, raw, dst, status);
2555 } else {
2556 btz->getOffsetFromLocal(localMillis,
2557 UCAL_TZ_LOCAL_DAYLIGHT_FORMER, UCAL_TZ_LOCAL_DAYLIGHT_LATTER, raw, dst, status);
2558 }
2559 } else {
2560 // No good way to resolve ambiguous time at transition,
2561 // but following code work in most case.
2562 tz.getOffset(localMillis, true, raw, dst, status);
2563 }
2564
2565 // Now, compare the results with parsed type, either standard or daylight saving time
2566 int32_t resolvedSavings = dst;
2567 if (tzTimeType == UTZFMT_TIME_TYPE_STANDARD) {
2568 if (dst != 0) {
2569 // Override DST_OFFSET = 0 in the result calendar
2570 resolvedSavings = 0;
2571 }
2572 } else { // tztype == TZTYPE_DST
2573 if (dst == 0) {
2574 if (btz != nullptr) {
2575 // This implementation resolves daylight saving time offset
2576 // closest rule after the given time.
2577 UDate baseTime = localMillis + raw;
2578 UDate time = baseTime;
2579 UDate limit = baseTime + MAX_DAYLIGHT_DETECTION_RANGE;
2580 TimeZoneTransition trs;
2581 UBool trsAvail;
2582
2583 // Search for DST rule after the given time
2584 while (time < limit) {
2585 trsAvail = btz->getNextTransition(time, false, trs);
2586 if (!trsAvail) {
2587 break;
2588 }
2589 resolvedSavings = trs.getTo()->getDSTSavings();
2590 if (resolvedSavings != 0) {
2591 break;
2592 }
2593 time = trs.getTime();
2594 }
2595
2596 if (resolvedSavings == 0) {
2597 // If no DST rule after the given time was found, search for
2598 // DST rule before.
2599 time = baseTime;
2600 limit = baseTime - MAX_DAYLIGHT_DETECTION_RANGE;
2601 while (time > limit) {
2602 trsAvail = btz->getPreviousTransition(time, true, trs);
2603 if (!trsAvail) {
2604 break;
2605 }
2606 resolvedSavings = trs.getFrom()->getDSTSavings();
2607 if (resolvedSavings != 0) {
2608 break;
2609 }
2610 time = trs.getTime() - 1;
2611 }
2612
2613 if (resolvedSavings == 0) {
2614 resolvedSavings = btz->getDSTSavings();
2615 }
2616 }
2617 } else {
2618 resolvedSavings = tz.getDSTSavings();
2619 }
2620 if (resolvedSavings == 0) {
2621 // final fallback
2622 resolvedSavings = U_MILLIS_PER_HOUR;
2623 }
2624 }
2625 }
2626 cal.set(UCAL_ZONE_OFFSET, raw);
2627 cal.set(UCAL_DST_OFFSET, resolvedSavings);
2628 delete copy;
2629 }
2630 }
2631 ExitParse:
2632 // Set the parsed result if local calendar is used
2633 // instead of the input calendar
2634 if (U_SUCCESS(status) && workCal != &cal) {
2635 cal.setTimeZone(workCal->getTimeZone());
2636 cal.setTime(workCal->getTime(status), status);
2637 }
2638
2639 if (numericLeapMonthFormatter != nullptr) {
2640 delete numericLeapMonthFormatter;
2641 }
2642 if (calClone != nullptr) {
2643 delete calClone;
2644 }
2645
2646 // If any Calendar calls failed, we pretend that we
2647 // couldn't parse the string, when in reality this isn't quite accurate--
2648 // we did parse it; the Calendar calls just failed.
2649 if (U_FAILURE(status)) {
2650 parsePos.setErrorIndex(pos);
2651 parsePos.setIndex(start);
2652 }
2653 }
2654
2655 //----------------------------------------------------------------------
2656
2657 static int32_t
2658 matchStringWithOptionalDot(const UnicodeString &text,
2659 int32_t index,
2660 const UnicodeString &data);
2661
matchQuarterString(const UnicodeString & text,int32_t start,UCalendarDateFields field,const UnicodeString * data,int32_t dataCount,Calendar & cal) const2662 int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
2663 int32_t start,
2664 UCalendarDateFields field,
2665 const UnicodeString* data,
2666 int32_t dataCount,
2667 Calendar& cal) const
2668 {
2669 int32_t i = 0;
2670 int32_t count = dataCount;
2671
2672 // There may be multiple strings in the data[] array which begin with
2673 // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
2674 // We keep track of the longest match, and return that. Note that this
2675 // unfortunately requires us to test all array elements.
2676 int32_t bestMatchLength = 0, bestMatch = -1;
2677 UnicodeString bestMatchName;
2678
2679 for (; i < count; ++i) {
2680 int32_t matchLength = 0;
2681 if ((matchLength = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) {
2682 bestMatchLength = matchLength;
2683 bestMatch = i;
2684 }
2685 }
2686
2687 if (bestMatch >= 0) {
2688 cal.set(field, bestMatch * 3);
2689 return start + bestMatchLength;
2690 }
2691
2692 return -start;
2693 }
2694
matchDayPeriodStrings(const UnicodeString & text,int32_t start,const UnicodeString * data,int32_t dataCount,int32_t & dayPeriod) const2695 int32_t SimpleDateFormat::matchDayPeriodStrings(const UnicodeString& text, int32_t start,
2696 const UnicodeString* data, int32_t dataCount,
2697 int32_t &dayPeriod) const
2698 {
2699
2700 int32_t bestMatchLength = 0, bestMatch = -1;
2701
2702 for (int32_t i = 0; i < dataCount; ++i) {
2703 int32_t matchLength = 0;
2704 if ((matchLength = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) {
2705 bestMatchLength = matchLength;
2706 bestMatch = i;
2707 }
2708 }
2709
2710 if (bestMatch >= 0) {
2711 dayPeriod = bestMatch;
2712 return start + bestMatchLength;
2713 }
2714
2715 return -start;
2716 }
2717
2718 //----------------------------------------------------------------------
matchLiterals(const UnicodeString & pattern,int32_t & patternOffset,const UnicodeString & text,int32_t & textOffset,UBool whitespaceLenient,UBool partialMatchLenient,UBool oldLeniency)2719 UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern,
2720 int32_t &patternOffset,
2721 const UnicodeString &text,
2722 int32_t &textOffset,
2723 UBool whitespaceLenient,
2724 UBool partialMatchLenient,
2725 UBool oldLeniency)
2726 {
2727 UBool inQuote = false;
2728 UnicodeString literal;
2729 int32_t i = patternOffset;
2730
2731 // scan pattern looking for contiguous literal characters
2732 for ( ; i < pattern.length(); i += 1) {
2733 char16_t ch = pattern.charAt(i);
2734
2735 if (!inQuote && isSyntaxChar(ch)) {
2736 break;
2737 }
2738
2739 if (ch == QUOTE) {
2740 // Match a quote literal ('') inside OR outside of quotes
2741 if ((i + 1) < pattern.length() && pattern.charAt(i + 1) == QUOTE) {
2742 i += 1;
2743 } else {
2744 inQuote = !inQuote;
2745 continue;
2746 }
2747 }
2748
2749 literal += ch;
2750 }
2751
2752 // at this point, literal contains the literal text
2753 // and i is the index of the next non-literal pattern character.
2754 int32_t p;
2755 int32_t t = textOffset;
2756
2757 if (whitespaceLenient) {
2758 // trim leading, trailing whitespace from
2759 // the literal text
2760 literal.trim();
2761
2762 // ignore any leading whitespace in the text
2763 while (t < text.length() && u_isWhitespace(text.charAt(t))) {
2764 t += 1;
2765 }
2766 }
2767
2768 for (p = 0; p < literal.length() && t < text.length();) {
2769 UBool needWhitespace = false;
2770
2771 while (p < literal.length() && PatternProps::isWhiteSpace(literal.charAt(p))) {
2772 needWhitespace = true;
2773 p += 1;
2774 }
2775
2776 if (needWhitespace) {
2777 int32_t tStart = t;
2778
2779 while (t < text.length()) {
2780 char16_t tch = text.charAt(t);
2781
2782 if (!u_isUWhiteSpace(tch) && !PatternProps::isWhiteSpace(tch)) {
2783 break;
2784 }
2785
2786 t += 1;
2787 }
2788
2789 // TODO: should we require internal spaces
2790 // in lenient mode? (There won't be any
2791 // leading or trailing spaces)
2792 if (!whitespaceLenient && t == tStart) {
2793 // didn't find matching whitespace:
2794 // an error in strict mode
2795 return false;
2796 }
2797
2798 // In strict mode, this run of whitespace
2799 // may have been at the end.
2800 if (p >= literal.length()) {
2801 break;
2802 }
2803 }
2804 if (t >= text.length() || literal.charAt(p) != text.charAt(t)) {
2805 // Ran out of text, or found a non-matching character:
2806 // OK in lenient mode, an error in strict mode.
2807 if (whitespaceLenient) {
2808 if (t == textOffset && text.charAt(t) == 0x2e &&
2809 isAfterNonNumericField(pattern, patternOffset)) {
2810 // Lenient mode and the literal input text begins with a "." and
2811 // we are after a non-numeric field: We skip the "."
2812 ++t;
2813 continue; // Do not update p.
2814 }
2815 // if it is actual whitespace and we're whitespace lenient it's OK
2816
2817 char16_t wsc = text.charAt(t);
2818 if(PatternProps::isWhiteSpace(wsc)) {
2819 // Lenient mode and it's just whitespace we skip it
2820 ++t;
2821 continue; // Do not update p.
2822 }
2823 }
2824 // hack around oldleniency being a bit of a catch-all bucket and we're just adding support specifically for partial matches
2825 if(partialMatchLenient && oldLeniency) {
2826 break;
2827 }
2828
2829 return false;
2830 }
2831 ++p;
2832 ++t;
2833 }
2834
2835 // At this point if we're in strict mode we have a complete match.
2836 // If we're in lenient mode we may have a partial match, or no
2837 // match at all.
2838 if (p <= 0) {
2839 // no match. Pretend it matched a run of whitespace
2840 // and ignorables in the text.
2841 const UnicodeSet *ignorables = nullptr;
2842 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(pattern.charAt(i));
2843 if (patternCharIndex != UDAT_FIELD_COUNT) {
2844 ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex);
2845 }
2846
2847 for (t = textOffset; t < text.length(); t += 1) {
2848 char16_t ch = text.charAt(t);
2849
2850 if (ignorables == nullptr || !ignorables->contains(ch)) {
2851 break;
2852 }
2853 }
2854 }
2855
2856 // if we get here, we've got a complete match.
2857 patternOffset = i - 1;
2858 textOffset = t;
2859
2860 return true;
2861 }
2862
2863 //----------------------------------------------------------------------
2864 // check both wide and abbrev months.
2865 // Does not currently handle monthPattern.
2866 // UCalendarDateFields field = UCAL_MONTH
2867
matchAlphaMonthStrings(const UnicodeString & text,int32_t start,const UnicodeString * wideData,const UnicodeString * shortData,int32_t dataCount,Calendar & cal) const2868 int32_t SimpleDateFormat::matchAlphaMonthStrings(const UnicodeString& text,
2869 int32_t start,
2870 const UnicodeString* wideData,
2871 const UnicodeString* shortData,
2872 int32_t dataCount,
2873 Calendar& cal) const
2874 {
2875 int32_t i;
2876 int32_t bestMatchLength = 0, bestMatch = -1;
2877
2878 for (i = 0; i < dataCount; ++i) {
2879 int32_t matchLen = 0;
2880 if ((matchLen = matchStringWithOptionalDot(text, start, wideData[i])) > bestMatchLength) {
2881 bestMatch = i;
2882 bestMatchLength = matchLen;
2883 }
2884 }
2885 for (i = 0; i < dataCount; ++i) {
2886 int32_t matchLen = 0;
2887 if ((matchLen = matchStringWithOptionalDot(text, start, shortData[i])) > bestMatchLength) {
2888 bestMatch = i;
2889 bestMatchLength = matchLen;
2890 }
2891 }
2892
2893 if (bestMatch >= 0) {
2894 // Adjustment for Hebrew Calendar month Adar II
2895 if (!strcmp(cal.getType(),"hebrew") && bestMatch==13) {
2896 cal.set(UCAL_MONTH,6);
2897 } else {
2898 cal.set(UCAL_MONTH, bestMatch);
2899 }
2900 return start + bestMatchLength;
2901 }
2902
2903 return -start;
2904 }
2905
2906 //----------------------------------------------------------------------
2907
matchString(const UnicodeString & text,int32_t start,UCalendarDateFields field,const UnicodeString * data,int32_t dataCount,const UnicodeString * monthPattern,Calendar & cal) const2908 int32_t SimpleDateFormat::matchString(const UnicodeString& text,
2909 int32_t start,
2910 UCalendarDateFields field,
2911 const UnicodeString* data,
2912 int32_t dataCount,
2913 const UnicodeString* monthPattern,
2914 Calendar& cal) const
2915 {
2916 int32_t i = 0;
2917 int32_t count = dataCount;
2918
2919 if (field == UCAL_DAY_OF_WEEK) i = 1;
2920
2921 // There may be multiple strings in the data[] array which begin with
2922 // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
2923 // We keep track of the longest match, and return that. Note that this
2924 // unfortunately requires us to test all array elements.
2925 // But this does not really work for cases such as Chuvash in which
2926 // May is "ҫу" and August is "ҫурла"/"ҫур.", hence matchAlphaMonthStrings.
2927 int32_t bestMatchLength = 0, bestMatch = -1;
2928 UnicodeString bestMatchName;
2929 int32_t isLeapMonth = 0;
2930
2931 for (; i < count; ++i) {
2932 int32_t matchLen = 0;
2933 if ((matchLen = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) {
2934 bestMatch = i;
2935 bestMatchLength = matchLen;
2936 }
2937
2938 if (monthPattern != nullptr) {
2939 UErrorCode status = U_ZERO_ERROR;
2940 UnicodeString leapMonthName;
2941 SimpleFormatter(*monthPattern, 1, 1, status).format(data[i], leapMonthName, status);
2942 if (U_SUCCESS(status)) {
2943 if ((matchLen = matchStringWithOptionalDot(text, start, leapMonthName)) > bestMatchLength) {
2944 bestMatch = i;
2945 bestMatchLength = matchLen;
2946 isLeapMonth = 1;
2947 }
2948 }
2949 }
2950 }
2951
2952 if (bestMatch >= 0) {
2953 if (field < UCAL_FIELD_COUNT) {
2954 // Adjustment for Hebrew Calendar month Adar II
2955 if (!strcmp(cal.getType(),"hebrew") && field==UCAL_MONTH && bestMatch==13) {
2956 cal.set(field,6);
2957 } else {
2958 if (field == UCAL_YEAR) {
2959 bestMatch++; // only get here for cyclic year names, which match 1-based years 1-60
2960 }
2961 cal.set(field, bestMatch);
2962 }
2963 if (monthPattern != nullptr) {
2964 cal.set(UCAL_IS_LEAP_MONTH, isLeapMonth);
2965 }
2966 }
2967
2968 return start + bestMatchLength;
2969 }
2970
2971 return -start;
2972 }
2973
2974 static int32_t
matchStringWithOptionalDot(const UnicodeString & text,int32_t index,const UnicodeString & data)2975 matchStringWithOptionalDot(const UnicodeString &text,
2976 int32_t index,
2977 const UnicodeString &data) {
2978 UErrorCode sts = U_ZERO_ERROR;
2979 int32_t matchLenText = 0;
2980 int32_t matchLenData = 0;
2981
2982 u_caseInsensitivePrefixMatch(text.getBuffer() + index, text.length() - index,
2983 data.getBuffer(), data.length(),
2984 0 /* default case option */,
2985 &matchLenText, &matchLenData,
2986 &sts);
2987 U_ASSERT (U_SUCCESS(sts));
2988
2989 if (matchLenData == data.length() /* normal match */
2990 || (data.charAt(data.length() - 1) == 0x2e
2991 && matchLenData == data.length() - 1 /* match without trailing dot */)) {
2992 return matchLenText;
2993 }
2994
2995 return 0;
2996 }
2997
2998 //----------------------------------------------------------------------
2999
3000 void
set2DigitYearStart(UDate d,UErrorCode & status)3001 SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status)
3002 {
3003 parseAmbiguousDatesAsAfter(d, status);
3004 }
3005
3006 /**
3007 * Private member function that converts the parsed date strings into
3008 * timeFields. Returns -start (for ParsePosition) if failed.
3009 */
subParse(const UnicodeString & text,int32_t & start,char16_t ch,int32_t count,UBool obeyCount,UBool allowNegative,UBool ambiguousYear[],int32_t & saveHebrewMonth,Calendar & cal,int32_t patLoc,MessageFormat * numericLeapMonthFormatter,UTimeZoneFormatTimeType * tzTimeType,int32_t * dayPeriod) const3010 int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, char16_t ch, int32_t count,
3011 UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal,
3012 int32_t patLoc, MessageFormat * numericLeapMonthFormatter, UTimeZoneFormatTimeType *tzTimeType,
3013 int32_t *dayPeriod) const
3014 {
3015 Formattable number;
3016 int32_t value = 0;
3017 int32_t i;
3018 int32_t ps = 0;
3019 UErrorCode status = U_ZERO_ERROR;
3020 ParsePosition pos(0);
3021 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch);
3022 const NumberFormat *currentNumberFormat;
3023 UnicodeString temp;
3024 UBool gotNumber = false;
3025
3026 #if defined (U_DEBUG_CAL)
3027 //fprintf(stderr, "%s:%d - [%c] st=%d \n", __FILE__, __LINE__, (char) ch, start);
3028 #endif
3029
3030 if (patternCharIndex == UDAT_FIELD_COUNT) {
3031 return -start;
3032 }
3033
3034 currentNumberFormat = getNumberFormatByIndex(patternCharIndex);
3035 if (currentNumberFormat == nullptr) {
3036 return -start;
3037 }
3038 UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex]; // UCAL_FIELD_COUNT if irrelevant
3039 UnicodeString hebr("hebr", 4, US_INV);
3040
3041 if (numericLeapMonthFormatter != nullptr) {
3042 numericLeapMonthFormatter->setFormats((const Format **)¤tNumberFormat, 1);
3043 }
3044 UBool isChineseCalendar = (uprv_strcmp(cal.getType(),"chinese") == 0 || uprv_strcmp(cal.getType(),"dangi") == 0);
3045
3046 // If there are any spaces here, skip over them. If we hit the end
3047 // of the string, then fail.
3048 for (;;) {
3049 if (start >= text.length()) {
3050 return -start;
3051 }
3052 UChar32 c = text.char32At(start);
3053 if (!u_isUWhiteSpace(c) /*||*/ && !PatternProps::isWhiteSpace(c)) {
3054 break;
3055 }
3056 start += U16_LENGTH(c);
3057 }
3058 pos.setIndex(start);
3059
3060 // We handle a few special cases here where we need to parse
3061 // a number value. We handle further, more generic cases below. We need
3062 // to handle some of them here because some fields require extra processing on
3063 // the parsed value.
3064 if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD || // k
3065 patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD || // H
3066 patternCharIndex == UDAT_HOUR1_FIELD || // h
3067 patternCharIndex == UDAT_HOUR0_FIELD || // K
3068 (patternCharIndex == UDAT_DOW_LOCAL_FIELD && count <= 2) || // e
3069 (patternCharIndex == UDAT_STANDALONE_DAY_FIELD && count <= 2) || // c
3070 (patternCharIndex == UDAT_MONTH_FIELD && count <= 2) || // M
3071 (patternCharIndex == UDAT_STANDALONE_MONTH_FIELD && count <= 2) || // L
3072 (patternCharIndex == UDAT_QUARTER_FIELD && count <= 2) || // Q
3073 (patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD && count <= 2) || // q
3074 patternCharIndex == UDAT_YEAR_FIELD || // y
3075 patternCharIndex == UDAT_YEAR_WOY_FIELD || // Y
3076 patternCharIndex == UDAT_YEAR_NAME_FIELD || // U (falls back to numeric)
3077 (patternCharIndex == UDAT_ERA_FIELD && isChineseCalendar) || // G
3078 patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD) // S
3079 {
3080 int32_t parseStart = pos.getIndex();
3081 // It would be good to unify this with the obeyCount logic below,
3082 // but that's going to be difficult.
3083 const UnicodeString* src;
3084
3085 UBool parsedNumericLeapMonth = false;
3086 if (numericLeapMonthFormatter != nullptr && (patternCharIndex == UDAT_MONTH_FIELD || patternCharIndex == UDAT_STANDALONE_MONTH_FIELD)) {
3087 int32_t argCount;
3088 Formattable * args = numericLeapMonthFormatter->parse(text, pos, argCount);
3089 if (args != nullptr && argCount == 1 && pos.getIndex() > parseStart && args[0].isNumeric()) {
3090 parsedNumericLeapMonth = true;
3091 number.setLong(args[0].getLong());
3092 cal.set(UCAL_IS_LEAP_MONTH, 1);
3093 delete[] args;
3094 } else {
3095 pos.setIndex(parseStart);
3096 cal.set(UCAL_IS_LEAP_MONTH, 0);
3097 }
3098 }
3099
3100 if (!parsedNumericLeapMonth) {
3101 if (obeyCount) {
3102 if ((start+count) > text.length()) {
3103 return -start;
3104 }
3105
3106 text.extractBetween(0, start + count, temp);
3107 src = &temp;
3108 } else {
3109 src = &text;
3110 }
3111
3112 parseInt(*src, number, pos, allowNegative,currentNumberFormat);
3113 }
3114
3115 int32_t txtLoc = pos.getIndex();
3116
3117 if (txtLoc > parseStart) {
3118 value = number.getLong();
3119 gotNumber = true;
3120
3121 // suffix processing
3122 if (value < 0 ) {
3123 txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, true);
3124 if (txtLoc != pos.getIndex()) {
3125 value *= -1;
3126 }
3127 }
3128 else {
3129 txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, false);
3130 }
3131
3132 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) {
3133 // Check the range of the value
3134 int32_t bias = gFieldRangeBias[patternCharIndex];
3135 if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
3136 return -start;
3137 }
3138 }
3139
3140 pos.setIndex(txtLoc);
3141 }
3142 }
3143
3144 // Make sure that we got a number if
3145 // we want one, and didn't get one
3146 // if we don't want one.
3147 switch (patternCharIndex) {
3148 case UDAT_HOUR_OF_DAY1_FIELD:
3149 case UDAT_HOUR_OF_DAY0_FIELD:
3150 case UDAT_HOUR1_FIELD:
3151 case UDAT_HOUR0_FIELD:
3152 // special range check for hours:
3153 if (value < 0 || value > 24) {
3154 return -start;
3155 }
3156
3157 // fall through to gotNumber check
3158 U_FALLTHROUGH;
3159 case UDAT_YEAR_FIELD:
3160 case UDAT_YEAR_WOY_FIELD:
3161 case UDAT_FRACTIONAL_SECOND_FIELD:
3162 // these must be a number
3163 if (! gotNumber) {
3164 return -start;
3165 }
3166
3167 break;
3168
3169 default:
3170 // we check the rest of the fields below.
3171 break;
3172 }
3173
3174 switch (patternCharIndex) {
3175 case UDAT_ERA_FIELD:
3176 if (isChineseCalendar) {
3177 if (!gotNumber) {
3178 return -start;
3179 }
3180 cal.set(UCAL_ERA, value);
3181 return pos.getIndex();
3182 }
3183 if (count == 5) {
3184 ps = matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, nullptr, cal);
3185 } else if (count == 4) {
3186 ps = matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, nullptr, cal);
3187 } else {
3188 ps = matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, nullptr, cal);
3189 }
3190
3191 // check return position, if it equals -start, then matchString error
3192 // special case the return code so we don't necessarily fail out until we
3193 // verify no year information also
3194 if (ps == -start)
3195 ps--;
3196
3197 return ps;
3198
3199 case UDAT_YEAR_FIELD:
3200 // If there are 3 or more YEAR pattern characters, this indicates
3201 // that the year value is to be treated literally, without any
3202 // two-digit year adjustments (e.g., from "01" to 2001). Otherwise
3203 // we made adjustments to place the 2-digit year in the proper
3204 // century, for parsed strings from "00" to "99". Any other string
3205 // is treated literally: "2250", "-1", "1", "002".
3206 if (fDateOverride.compare(hebr)==0 && value < 1000) {
3207 value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
3208 } else if (text.moveIndex32(start, 2) == pos.getIndex() && !isChineseCalendar
3209 && u_isdigit(text.char32At(start))
3210 && u_isdigit(text.char32At(text.moveIndex32(start, 1))))
3211 {
3212 // only adjust year for patterns less than 3.
3213 if(count < 3) {
3214 // Assume for example that the defaultCenturyStart is 6/18/1903.
3215 // This means that two-digit years will be forced into the range
3216 // 6/18/1903 to 6/17/2003. As a result, years 00, 01, and 02
3217 // correspond to 2000, 2001, and 2002. Years 04, 05, etc. correspond
3218 // to 1904, 1905, etc. If the year is 03, then it is 2003 if the
3219 // other fields specify a date before 6/18, or 1903 if they specify a
3220 // date afterwards. As a result, 03 is an ambiguous year. All other
3221 // two-digit years are unambiguous.
3222 if(fHaveDefaultCentury) { // check if this formatter even has a pivot year
3223 int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
3224 ambiguousYear[0] = (value == ambiguousTwoDigitYear);
3225 value += (fDefaultCenturyStartYear/100)*100 +
3226 (value < ambiguousTwoDigitYear ? 100 : 0);
3227 }
3228 }
3229 }
3230 cal.set(UCAL_YEAR, value);
3231
3232 // Delayed checking for adjustment of Hebrew month numbers in non-leap years.
3233 if (saveHebrewMonth >= 0) {
3234 HebrewCalendar *hc = (HebrewCalendar*)&cal;
3235 if (!hc->isLeapYear(value) && saveHebrewMonth >= 6) {
3236 cal.set(UCAL_MONTH,saveHebrewMonth);
3237 } else {
3238 cal.set(UCAL_MONTH,saveHebrewMonth-1);
3239 }
3240 saveHebrewMonth = -1;
3241 }
3242 return pos.getIndex();
3243
3244 case UDAT_YEAR_WOY_FIELD:
3245 // Comment is the same as for UDAT_Year_FIELDs - look above
3246 if (fDateOverride.compare(hebr)==0 && value < 1000) {
3247 value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
3248 } else if (text.moveIndex32(start, 2) == pos.getIndex()
3249 && u_isdigit(text.char32At(start))
3250 && u_isdigit(text.char32At(text.moveIndex32(start, 1)))
3251 && fHaveDefaultCentury )
3252 {
3253 int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
3254 ambiguousYear[0] = (value == ambiguousTwoDigitYear);
3255 value += (fDefaultCenturyStartYear/100)*100 +
3256 (value < ambiguousTwoDigitYear ? 100 : 0);
3257 }
3258 cal.set(UCAL_YEAR_WOY, value);
3259 return pos.getIndex();
3260
3261 case UDAT_YEAR_NAME_FIELD:
3262 if (fSymbols->fShortYearNames != nullptr) {
3263 int32_t newStart = matchString(text, start, UCAL_YEAR, fSymbols->fShortYearNames, fSymbols->fShortYearNamesCount, nullptr, cal);
3264 if (newStart > 0) {
3265 return newStart;
3266 }
3267 }
3268 if (gotNumber && (getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC,status) || value > fSymbols->fShortYearNamesCount)) {
3269 cal.set(UCAL_YEAR, value);
3270 return pos.getIndex();
3271 }
3272 return -start;
3273
3274 case UDAT_MONTH_FIELD:
3275 case UDAT_STANDALONE_MONTH_FIELD:
3276 if (gotNumber) // i.e., M or MM.
3277 {
3278 // When parsing month numbers from the Hebrew Calendar, we might need to adjust the month depending on whether
3279 // or not it was a leap year. We may or may not yet know what year it is, so might have to delay checking until
3280 // the year is parsed.
3281 if (!strcmp(cal.getType(),"hebrew")) {
3282 HebrewCalendar *hc = (HebrewCalendar*)&cal;
3283 if (cal.isSet(UCAL_YEAR)) {
3284 UErrorCode monthStatus = U_ZERO_ERROR;
3285 if (!hc->isLeapYear(hc->get(UCAL_YEAR, monthStatus)) && value >= 6) {
3286 cal.set(UCAL_MONTH, value);
3287 } else {
3288 cal.set(UCAL_MONTH, value - 1);
3289 }
3290 } else {
3291 saveHebrewMonth = value;
3292 }
3293 } else {
3294 // Don't want to parse the month if it is a string
3295 // while pattern uses numeric style: M/MM, L/LL
3296 // [We computed 'value' above.]
3297 cal.set(UCAL_MONTH, value - 1);
3298 }
3299 return pos.getIndex();
3300 } else {
3301 // count >= 3 // i.e., MMM/MMMM, LLL/LLLL
3302 // Want to be able to parse both short and long forms.
3303 // Try count == 4 first:
3304 UnicodeString * wideMonthPat = nullptr;
3305 UnicodeString * shortMonthPat = nullptr;
3306 if (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) {
3307 if (patternCharIndex==UDAT_MONTH_FIELD) {
3308 wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide];
3309 shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev];
3310 } else {
3311 wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide];
3312 shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev];
3313 }
3314 }
3315 int32_t newStart = 0;
3316 if (patternCharIndex==UDAT_MONTH_FIELD) {
3317 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) && count>=3 && count <=4 &&
3318 fSymbols->fLeapMonthPatterns==nullptr && fSymbols->fMonthsCount==fSymbols->fShortMonthsCount) {
3319 // single function to check both wide and short, an experiment
3320 newStart = matchAlphaMonthStrings(text, start, fSymbols->fMonths, fSymbols->fShortMonths, fSymbols->fMonthsCount, cal); // try MMMM,MMM
3321 if (newStart > 0) {
3322 return newStart;
3323 }
3324 }
3325 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3326 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fMonths, fSymbols->fMonthsCount, wideMonthPat, cal); // try MMMM
3327 if (newStart > 0) {
3328 return newStart;
3329 }
3330 }
3331 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3332 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fShortMonths, fSymbols->fShortMonthsCount, shortMonthPat, cal); // try MMM
3333 }
3334 } else {
3335 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) && count>=3 && count <=4 &&
3336 fSymbols->fLeapMonthPatterns==nullptr && fSymbols->fStandaloneMonthsCount==fSymbols->fStandaloneShortMonthsCount) {
3337 // single function to check both wide and short, an experiment
3338 newStart = matchAlphaMonthStrings(text, start, fSymbols->fStandaloneMonths, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneMonthsCount, cal); // try MMMM,MMM
3339 if (newStart > 0) {
3340 return newStart;
3341 }
3342 }
3343 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3344 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, wideMonthPat, cal); // try LLLL
3345 if (newStart > 0) {
3346 return newStart;
3347 }
3348 }
3349 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3350 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, shortMonthPat, cal); // try LLL
3351 }
3352 }
3353 if (newStart > 0 || !getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) // currently we do not try to parse MMMMM/LLLLL: #8860
3354 return newStart;
3355 // else we allowing parsing as number, below
3356 }
3357 break;
3358
3359 case UDAT_HOUR_OF_DAY1_FIELD:
3360 // [We computed 'value' above.]
3361 if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1)
3362 value = 0;
3363
3364 // fall through to set field
3365 U_FALLTHROUGH;
3366 case UDAT_HOUR_OF_DAY0_FIELD:
3367 cal.set(UCAL_HOUR_OF_DAY, value);
3368 return pos.getIndex();
3369
3370 case UDAT_FRACTIONAL_SECOND_FIELD:
3371 // Fractional seconds left-justify
3372 i = countDigits(text, start, pos.getIndex());
3373 if (i < 3) {
3374 while (i < 3) {
3375 value *= 10;
3376 i++;
3377 }
3378 } else {
3379 int32_t a = 1;
3380 while (i > 3) {
3381 a *= 10;
3382 i--;
3383 }
3384 value /= a;
3385 }
3386 cal.set(UCAL_MILLISECOND, value);
3387 return pos.getIndex();
3388
3389 case UDAT_DOW_LOCAL_FIELD:
3390 if (gotNumber) // i.e., e or ee
3391 {
3392 // [We computed 'value' above.]
3393 cal.set(UCAL_DOW_LOCAL, value);
3394 return pos.getIndex();
3395 }
3396 // else for eee-eeeee fall through to handling of EEE-EEEEE
3397 // fall through, do not break here
3398 U_FALLTHROUGH;
3399 case UDAT_DAY_OF_WEEK_FIELD:
3400 {
3401 // Want to be able to parse both short and long forms.
3402 // Try count == 4 (EEEE) wide first:
3403 int32_t newStart = 0;
3404 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3405 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3406 fSymbols->fWeekdays, fSymbols->fWeekdaysCount, nullptr, cal)) > 0)
3407 return newStart;
3408 }
3409 // EEEE wide failed, now try EEE abbreviated
3410 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3411 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3412 fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, nullptr, cal)) > 0)
3413 return newStart;
3414 }
3415 // EEE abbreviated failed, now try EEEEEE short
3416 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 6) {
3417 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3418 fSymbols->fShorterWeekdays, fSymbols->fShorterWeekdaysCount, nullptr, cal)) > 0)
3419 return newStart;
3420 }
3421 // EEEEEE short failed, now try EEEEE narrow
3422 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3423 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3424 fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, nullptr, cal)) > 0)
3425 return newStart;
3426 }
3427 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status) || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD)
3428 return newStart;
3429 // else we allowing parsing as number, below
3430 }
3431 break;
3432
3433 case UDAT_STANDALONE_DAY_FIELD:
3434 {
3435 if (gotNumber) // c or cc
3436 {
3437 // [We computed 'value' above.]
3438 cal.set(UCAL_DOW_LOCAL, value);
3439 return pos.getIndex();
3440 }
3441 // Want to be able to parse both short and long forms.
3442 // Try count == 4 (cccc) first:
3443 int32_t newStart = 0;
3444 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3445 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3446 fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, nullptr, cal)) > 0)
3447 return newStart;
3448 }
3449 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3450 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3451 fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, nullptr, cal)) > 0)
3452 return newStart;
3453 }
3454 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 6) {
3455 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3456 fSymbols->fStandaloneShorterWeekdays, fSymbols->fStandaloneShorterWeekdaysCount, nullptr, cal)) > 0)
3457 return newStart;
3458 }
3459 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
3460 return newStart;
3461 // else we allowing parsing as number, below
3462 }
3463 break;
3464
3465 case UDAT_AM_PM_FIELD:
3466 {
3467 // optionally try both wide/abbrev and narrow forms
3468 int32_t newStart = 0;
3469 // try wide/abbrev
3470 if( getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count < 5 ) {
3471 if ((newStart = matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, nullptr, cal)) > 0) {
3472 return newStart;
3473 }
3474 }
3475 // try narrow
3476 if( getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count >= 5 ) {
3477 if ((newStart = matchString(text, start, UCAL_AM_PM, fSymbols->fNarrowAmPms, fSymbols->fNarrowAmPmsCount, nullptr, cal)) > 0) {
3478 return newStart;
3479 }
3480 }
3481 // no matches for given options
3482 return -start;
3483 }
3484
3485 case UDAT_HOUR1_FIELD:
3486 // [We computed 'value' above.]
3487 if (value == cal.getLeastMaximum(UCAL_HOUR)+1)
3488 value = 0;
3489
3490 // fall through to set field
3491 U_FALLTHROUGH;
3492 case UDAT_HOUR0_FIELD:
3493 cal.set(UCAL_HOUR, value);
3494 return pos.getIndex();
3495
3496 case UDAT_QUARTER_FIELD:
3497 if (gotNumber) // i.e., Q or QQ.
3498 {
3499 // Don't want to parse the month if it is a string
3500 // while pattern uses numeric style: Q or QQ.
3501 // [We computed 'value' above.]
3502 cal.set(UCAL_MONTH, (value - 1) * 3);
3503 return pos.getIndex();
3504 } else {
3505 // count >= 3 // i.e., QQQ or QQQQ
3506 // Want to be able to parse short, long, and narrow forms.
3507 // Try count == 4 first:
3508 int32_t newStart = 0;
3509
3510 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3511 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3512 fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0)
3513 return newStart;
3514 }
3515 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3516 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3517 fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal)) > 0)
3518 return newStart;
3519 }
3520 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3521 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3522 fSymbols->fNarrowQuarters, fSymbols->fNarrowQuartersCount, cal)) > 0)
3523 return newStart;
3524 }
3525 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
3526 return newStart;
3527 // else we allowing parsing as number, below
3528 if(!getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status))
3529 return -start;
3530 }
3531 break;
3532
3533 case UDAT_STANDALONE_QUARTER_FIELD:
3534 if (gotNumber) // i.e., q or qq.
3535 {
3536 // Don't want to parse the month if it is a string
3537 // while pattern uses numeric style: q or q.
3538 // [We computed 'value' above.]
3539 cal.set(UCAL_MONTH, (value - 1) * 3);
3540 return pos.getIndex();
3541 } else {
3542 // count >= 3 // i.e., qqq or qqqq
3543 // Want to be able to parse both short and long forms.
3544 // Try count == 4 first:
3545 int32_t newStart = 0;
3546
3547 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3548 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3549 fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0)
3550 return newStart;
3551 }
3552 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3553 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3554 fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal)) > 0)
3555 return newStart;
3556 }
3557 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3558 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3559 fSymbols->fStandaloneNarrowQuarters, fSymbols->fStandaloneNarrowQuartersCount, cal)) > 0)
3560 return newStart;
3561 }
3562 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
3563 return newStart;
3564 // else we allowing parsing as number, below
3565 if(!getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status))
3566 return -start;
3567 }
3568 break;
3569
3570 case UDAT_TIMEZONE_FIELD: // 'z'
3571 {
3572 UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_SPECIFIC_SHORT : UTZFMT_STYLE_SPECIFIC_LONG;
3573 const TimeZoneFormat *tzfmt = tzFormat(status);
3574 if (U_SUCCESS(status)) {
3575 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3576 if (tz != nullptr) {
3577 cal.adoptTimeZone(tz);
3578 return pos.getIndex();
3579 }
3580 }
3581 return -start;
3582 }
3583 break;
3584 case UDAT_TIMEZONE_RFC_FIELD: // 'Z'
3585 {
3586 UTimeZoneFormatStyle style = (count < 4) ?
3587 UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL : ((count == 5) ? UTZFMT_STYLE_ISO_EXTENDED_FULL: UTZFMT_STYLE_LOCALIZED_GMT);
3588 const TimeZoneFormat *tzfmt = tzFormat(status);
3589 if (U_SUCCESS(status)) {
3590 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3591 if (tz != nullptr) {
3592 cal.adoptTimeZone(tz);
3593 return pos.getIndex();
3594 }
3595 }
3596 return -start;
3597 }
3598 case UDAT_TIMEZONE_GENERIC_FIELD: // 'v'
3599 {
3600 UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_GENERIC_SHORT : UTZFMT_STYLE_GENERIC_LONG;
3601 const TimeZoneFormat *tzfmt = tzFormat(status);
3602 if (U_SUCCESS(status)) {
3603 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3604 if (tz != nullptr) {
3605 cal.adoptTimeZone(tz);
3606 return pos.getIndex();
3607 }
3608 }
3609 return -start;
3610 }
3611 case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V'
3612 {
3613 UTimeZoneFormatStyle style;
3614 switch (count) {
3615 case 1:
3616 style = UTZFMT_STYLE_ZONE_ID_SHORT;
3617 break;
3618 case 2:
3619 style = UTZFMT_STYLE_ZONE_ID;
3620 break;
3621 case 3:
3622 style = UTZFMT_STYLE_EXEMPLAR_LOCATION;
3623 break;
3624 default:
3625 style = UTZFMT_STYLE_GENERIC_LOCATION;
3626 break;
3627 }
3628 const TimeZoneFormat *tzfmt = tzFormat(status);
3629 if (U_SUCCESS(status)) {
3630 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3631 if (tz != nullptr) {
3632 cal.adoptTimeZone(tz);
3633 return pos.getIndex();
3634 }
3635 }
3636 return -start;
3637 }
3638 case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: // 'O'
3639 {
3640 UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_LOCALIZED_GMT_SHORT : UTZFMT_STYLE_LOCALIZED_GMT;
3641 const TimeZoneFormat *tzfmt = tzFormat(status);
3642 if (U_SUCCESS(status)) {
3643 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3644 if (tz != nullptr) {
3645 cal.adoptTimeZone(tz);
3646 return pos.getIndex();
3647 }
3648 }
3649 return -start;
3650 }
3651 case UDAT_TIMEZONE_ISO_FIELD: // 'X'
3652 {
3653 UTimeZoneFormatStyle style;
3654 switch (count) {
3655 case 1:
3656 style = UTZFMT_STYLE_ISO_BASIC_SHORT;
3657 break;
3658 case 2:
3659 style = UTZFMT_STYLE_ISO_BASIC_FIXED;
3660 break;
3661 case 3:
3662 style = UTZFMT_STYLE_ISO_EXTENDED_FIXED;
3663 break;
3664 case 4:
3665 style = UTZFMT_STYLE_ISO_BASIC_FULL;
3666 break;
3667 default:
3668 style = UTZFMT_STYLE_ISO_EXTENDED_FULL;
3669 break;
3670 }
3671 const TimeZoneFormat *tzfmt = tzFormat(status);
3672 if (U_SUCCESS(status)) {
3673 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3674 if (tz != nullptr) {
3675 cal.adoptTimeZone(tz);
3676 return pos.getIndex();
3677 }
3678 }
3679 return -start;
3680 }
3681 case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x'
3682 {
3683 UTimeZoneFormatStyle style;
3684 switch (count) {
3685 case 1:
3686 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT;
3687 break;
3688 case 2:
3689 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED;
3690 break;
3691 case 3:
3692 style = UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED;
3693 break;
3694 case 4:
3695 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL;
3696 break;
3697 default:
3698 style = UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL;
3699 break;
3700 }
3701 const TimeZoneFormat *tzfmt = tzFormat(status);
3702 if (U_SUCCESS(status)) {
3703 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3704 if (tz != nullptr) {
3705 cal.adoptTimeZone(tz);
3706 return pos.getIndex();
3707 }
3708 }
3709 return -start;
3710 }
3711 // currently no pattern character is defined for UDAT_TIME_SEPARATOR_FIELD
3712 // so we should not get here. Leave support in for future definition.
3713 case UDAT_TIME_SEPARATOR_FIELD:
3714 {
3715 static const char16_t def_sep = DateFormatSymbols::DEFAULT_TIME_SEPARATOR;
3716 static const char16_t alt_sep = DateFormatSymbols::ALTERNATE_TIME_SEPARATOR;
3717
3718 // Try matching a time separator.
3719 int32_t count_sep = 1;
3720 UnicodeString data[3];
3721 fSymbols->getTimeSeparatorString(data[0]);
3722
3723 // Add the default, if different from the locale.
3724 if (data[0].compare(&def_sep, 1) != 0) {
3725 data[count_sep++].setTo(def_sep);
3726 }
3727
3728 // If lenient, add also the alternate, if different from the locale.
3729 if (isLenient() && data[0].compare(&alt_sep, 1) != 0) {
3730 data[count_sep++].setTo(alt_sep);
3731 }
3732
3733 return matchString(text, start, UCAL_FIELD_COUNT /* => nothing to set */, data, count_sep, nullptr, cal);
3734 }
3735
3736 case UDAT_AM_PM_MIDNIGHT_NOON_FIELD:
3737 {
3738 U_ASSERT(dayPeriod != nullptr);
3739 int32_t ampmStart = subParse(text, start, 0x61, count,
3740 obeyCount, allowNegative, ambiguousYear, saveHebrewMonth, cal,
3741 patLoc, numericLeapMonthFormatter, tzTimeType);
3742
3743 if (ampmStart > 0) {
3744 return ampmStart;
3745 } else {
3746 int32_t newStart = 0;
3747
3748 // Only match the first two strings from the day period strings array.
3749 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3750 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fAbbreviatedDayPeriods,
3751 2, *dayPeriod)) > 0) {
3752 return newStart;
3753 }
3754 }
3755 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3756 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fNarrowDayPeriods,
3757 2, *dayPeriod)) > 0) {
3758 return newStart;
3759 }
3760 }
3761 // count == 4, but allow other counts
3762 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status)) {
3763 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fWideDayPeriods,
3764 2, *dayPeriod)) > 0) {
3765 return newStart;
3766 }
3767 }
3768
3769 return -start;
3770 }
3771 }
3772
3773 case UDAT_FLEXIBLE_DAY_PERIOD_FIELD:
3774 {
3775 U_ASSERT(dayPeriod != nullptr);
3776 int32_t newStart = 0;
3777
3778 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3779 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fAbbreviatedDayPeriods,
3780 fSymbols->fAbbreviatedDayPeriodsCount, *dayPeriod)) > 0) {
3781 return newStart;
3782 }
3783 }
3784 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3785 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fNarrowDayPeriods,
3786 fSymbols->fNarrowDayPeriodsCount, *dayPeriod)) > 0) {
3787 return newStart;
3788 }
3789 }
3790 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3791 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fWideDayPeriods,
3792 fSymbols->fWideDayPeriodsCount, *dayPeriod)) > 0) {
3793 return newStart;
3794 }
3795 }
3796
3797 return -start;
3798 }
3799
3800 default:
3801 // Handle "generic" fields
3802 // this is now handled below, outside the switch block
3803 break;
3804 }
3805 // Handle "generic" fields:
3806 // switch default case now handled here (outside switch block) to allow
3807 // parsing of some string fields as digits for lenient case
3808
3809 int32_t parseStart = pos.getIndex();
3810 const UnicodeString* src;
3811 if (obeyCount) {
3812 if ((start+count) > text.length()) {
3813 return -start;
3814 }
3815 text.extractBetween(0, start + count, temp);
3816 src = &temp;
3817 } else {
3818 src = &text;
3819 }
3820 parseInt(*src, number, pos, allowNegative,currentNumberFormat);
3821 if (obeyCount && !isLenient() && pos.getIndex() < start + count) {
3822 return -start;
3823 }
3824 if (pos.getIndex() != parseStart) {
3825 int32_t val = number.getLong();
3826
3827 // Don't need suffix processing here (as in number processing at the beginning of the function);
3828 // the new fields being handled as numeric values (month, weekdays, quarters) should not have suffixes.
3829
3830 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) {
3831 // Check the range of the value
3832 int32_t bias = gFieldRangeBias[patternCharIndex];
3833 if (bias >= 0 && (val > cal.getMaximum(field) + bias || val < cal.getMinimum(field) + bias)) {
3834 return -start;
3835 }
3836 }
3837
3838 // For the following, need to repeat some of the "if (gotNumber)" code above:
3839 // UDAT_[STANDALONE_]MONTH_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_STANDALONE_DAY_FIELD,
3840 // UDAT_[STANDALONE_]QUARTER_FIELD
3841 switch (patternCharIndex) {
3842 case UDAT_MONTH_FIELD:
3843 // See notes under UDAT_MONTH_FIELD case above
3844 if (!strcmp(cal.getType(),"hebrew")) {
3845 HebrewCalendar *hc = (HebrewCalendar*)&cal;
3846 if (cal.isSet(UCAL_YEAR)) {
3847 UErrorCode monthStatus = U_ZERO_ERROR;
3848 if (!hc->isLeapYear(hc->get(UCAL_YEAR, monthStatus)) && val >= 6) {
3849 cal.set(UCAL_MONTH, val);
3850 } else {
3851 cal.set(UCAL_MONTH, val - 1);
3852 }
3853 } else {
3854 saveHebrewMonth = val;
3855 }
3856 } else {
3857 cal.set(UCAL_MONTH, val - 1);
3858 }
3859 break;
3860 case UDAT_STANDALONE_MONTH_FIELD:
3861 cal.set(UCAL_MONTH, val - 1);
3862 break;
3863 case UDAT_DOW_LOCAL_FIELD:
3864 case UDAT_STANDALONE_DAY_FIELD:
3865 cal.set(UCAL_DOW_LOCAL, val);
3866 break;
3867 case UDAT_QUARTER_FIELD:
3868 case UDAT_STANDALONE_QUARTER_FIELD:
3869 cal.set(UCAL_MONTH, (val - 1) * 3);
3870 break;
3871 case UDAT_RELATED_YEAR_FIELD:
3872 cal.setRelatedYear(val);
3873 break;
3874 default:
3875 cal.set(field, val);
3876 break;
3877 }
3878 return pos.getIndex();
3879 }
3880 return -start;
3881 }
3882
3883 /**
3884 * Parse an integer using fNumberFormat. This method is semantically
3885 * const, but actually may modify fNumberFormat.
3886 */
parseInt(const UnicodeString & text,Formattable & number,ParsePosition & pos,UBool allowNegative,const NumberFormat * fmt) const3887 void SimpleDateFormat::parseInt(const UnicodeString& text,
3888 Formattable& number,
3889 ParsePosition& pos,
3890 UBool allowNegative,
3891 const NumberFormat *fmt) const {
3892 parseInt(text, number, -1, pos, allowNegative,fmt);
3893 }
3894
3895 /**
3896 * Parse an integer using fNumberFormat up to maxDigits.
3897 */
parseInt(const UnicodeString & text,Formattable & number,int32_t maxDigits,ParsePosition & pos,UBool allowNegative,const NumberFormat * fmt) const3898 void SimpleDateFormat::parseInt(const UnicodeString& text,
3899 Formattable& number,
3900 int32_t maxDigits,
3901 ParsePosition& pos,
3902 UBool allowNegative,
3903 const NumberFormat *fmt) const {
3904 UnicodeString oldPrefix;
3905 auto* fmtAsDF = dynamic_cast<const DecimalFormat*>(fmt);
3906 LocalPointer<DecimalFormat> df;
3907 if (!allowNegative && fmtAsDF != nullptr) {
3908 df.adoptInstead(fmtAsDF->clone());
3909 if (df.isNull()) {
3910 // Memory allocation error
3911 return;
3912 }
3913 df->setNegativePrefix(UnicodeString(true, SUPPRESS_NEGATIVE_PREFIX, -1));
3914 fmt = df.getAlias();
3915 }
3916 int32_t oldPos = pos.getIndex();
3917 fmt->parse(text, number, pos);
3918
3919 if (maxDigits > 0) {
3920 // adjust the result to fit into
3921 // the maxDigits and move the position back
3922 int32_t nDigits = pos.getIndex() - oldPos;
3923 if (nDigits > maxDigits) {
3924 int32_t val = number.getLong();
3925 nDigits -= maxDigits;
3926 while (nDigits > 0) {
3927 val /= 10;
3928 nDigits--;
3929 }
3930 pos.setIndex(oldPos + maxDigits);
3931 number.setLong(val);
3932 }
3933 }
3934 }
3935
countDigits(const UnicodeString & text,int32_t start,int32_t end) const3936 int32_t SimpleDateFormat::countDigits(const UnicodeString& text, int32_t start, int32_t end) const {
3937 int32_t numDigits = 0;
3938 int32_t idx = start;
3939 while (idx < end) {
3940 UChar32 cp = text.char32At(idx);
3941 if (u_isdigit(cp)) {
3942 numDigits++;
3943 }
3944 idx += U16_LENGTH(cp);
3945 }
3946 return numDigits;
3947 }
3948
3949 //----------------------------------------------------------------------
3950
translatePattern(const UnicodeString & originalPattern,UnicodeString & translatedPattern,const UnicodeString & from,const UnicodeString & to,UErrorCode & status)3951 void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern,
3952 UnicodeString& translatedPattern,
3953 const UnicodeString& from,
3954 const UnicodeString& to,
3955 UErrorCode& status)
3956 {
3957 // run through the pattern and convert any pattern symbols from the version
3958 // in "from" to the corresponding character in "to". This code takes
3959 // quoted strings into account (it doesn't try to translate them), and it signals
3960 // an error if a particular "pattern character" doesn't appear in "from".
3961 // Depending on the values of "from" and "to" this can convert from generic
3962 // to localized patterns or localized to generic.
3963 if (U_FAILURE(status)) {
3964 return;
3965 }
3966
3967 translatedPattern.remove();
3968 UBool inQuote = false;
3969 for (int32_t i = 0; i < originalPattern.length(); ++i) {
3970 char16_t c = originalPattern[i];
3971 if (inQuote) {
3972 if (c == QUOTE) {
3973 inQuote = false;
3974 }
3975 } else {
3976 if (c == QUOTE) {
3977 inQuote = true;
3978 } else if (isSyntaxChar(c)) {
3979 int32_t ci = from.indexOf(c);
3980 if (ci == -1) {
3981 status = U_INVALID_FORMAT_ERROR;
3982 return;
3983 }
3984 c = to[ci];
3985 }
3986 }
3987 translatedPattern += c;
3988 }
3989 if (inQuote) {
3990 status = U_INVALID_FORMAT_ERROR;
3991 return;
3992 }
3993 }
3994
3995 //----------------------------------------------------------------------
3996
3997 UnicodeString&
toPattern(UnicodeString & result) const3998 SimpleDateFormat::toPattern(UnicodeString& result) const
3999 {
4000 result = fPattern;
4001 return result;
4002 }
4003
4004 //----------------------------------------------------------------------
4005
4006 UnicodeString&
toLocalizedPattern(UnicodeString & result,UErrorCode & status) const4007 SimpleDateFormat::toLocalizedPattern(UnicodeString& result,
4008 UErrorCode& status) const
4009 {
4010 translatePattern(fPattern, result,
4011 UnicodeString(DateFormatSymbols::getPatternUChars()),
4012 fSymbols->fLocalPatternChars, status);
4013 return result;
4014 }
4015
4016 //----------------------------------------------------------------------
4017
4018 void
applyPattern(const UnicodeString & pattern)4019 SimpleDateFormat::applyPattern(const UnicodeString& pattern)
4020 {
4021 fPattern = pattern;
4022 parsePattern();
4023
4024 // Hack to update use of Gannen year numbering for ja@calendar=japanese -
4025 // use only if format is non-numeric (includes 年) and no other fDateOverride.
4026 if (fCalendar != nullptr && uprv_strcmp(fCalendar->getType(),"japanese") == 0 &&
4027 uprv_strcmp(fLocale.getLanguage(),"ja") == 0) {
4028 if (fDateOverride==UnicodeString(u"y=jpanyear") && !fHasHanYearChar) {
4029 // Gannen numbering is set but new pattern should not use it, unset;
4030 // use procedure from adoptNumberFormat to clear overrides
4031 if (fSharedNumberFormatters) {
4032 freeSharedNumberFormatters(fSharedNumberFormatters);
4033 fSharedNumberFormatters = nullptr;
4034 }
4035 fDateOverride.setToBogus(); // record status
4036 } else if (fDateOverride.isBogus() && fHasHanYearChar) {
4037 // No current override (=> no Gannen numbering) but new pattern needs it;
4038 // use procedures from initNUmberFormatters / adoptNumberFormat
4039 umtx_lock(&LOCK);
4040 if (fSharedNumberFormatters == nullptr) {
4041 fSharedNumberFormatters = allocSharedNumberFormatters();
4042 }
4043 umtx_unlock(&LOCK);
4044 if (fSharedNumberFormatters != nullptr) {
4045 Locale ovrLoc(fLocale.getLanguage(),fLocale.getCountry(),fLocale.getVariant(),"numbers=jpanyear");
4046 UErrorCode status = U_ZERO_ERROR;
4047 const SharedNumberFormat *snf = createSharedNumberFormat(ovrLoc, status);
4048 if (U_SUCCESS(status)) {
4049 // Now that we have an appropriate number formatter, fill in the
4050 // appropriate slot in the number formatters table.
4051 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(u'y');
4052 SharedObject::copyPtr(snf, fSharedNumberFormatters[patternCharIndex]);
4053 snf->deleteIfZeroRefCount();
4054 fDateOverride.setTo(u"y=jpanyear", -1); // record status
4055 }
4056 }
4057 }
4058 }
4059 }
4060
4061 //----------------------------------------------------------------------
4062
4063 void
applyLocalizedPattern(const UnicodeString & pattern,UErrorCode & status)4064 SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern,
4065 UErrorCode &status)
4066 {
4067 translatePattern(pattern, fPattern,
4068 fSymbols->fLocalPatternChars,
4069 UnicodeString(DateFormatSymbols::getPatternUChars()), status);
4070 }
4071
4072 //----------------------------------------------------------------------
4073
4074 const DateFormatSymbols*
getDateFormatSymbols() const4075 SimpleDateFormat::getDateFormatSymbols() const
4076 {
4077 return fSymbols;
4078 }
4079
4080 //----------------------------------------------------------------------
4081
4082 void
adoptDateFormatSymbols(DateFormatSymbols * newFormatSymbols)4083 SimpleDateFormat::adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols)
4084 {
4085 delete fSymbols;
4086 fSymbols = newFormatSymbols;
4087 }
4088
4089 //----------------------------------------------------------------------
4090 void
setDateFormatSymbols(const DateFormatSymbols & newFormatSymbols)4091 SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols)
4092 {
4093 delete fSymbols;
4094 fSymbols = new DateFormatSymbols(newFormatSymbols);
4095 }
4096
4097 //----------------------------------------------------------------------
4098 const TimeZoneFormat*
getTimeZoneFormat() const4099 SimpleDateFormat::getTimeZoneFormat() const {
4100 // TimeZoneFormat initialization might fail when out of memory.
4101 // If we always initialize TimeZoneFormat instance, we can return
4102 // such status there. For now, this implementation lazily instantiates
4103 // a TimeZoneFormat for performance optimization reasons, but cannot
4104 // propagate such error (probably just out of memory case) to the caller.
4105 UErrorCode status = U_ZERO_ERROR;
4106 return (const TimeZoneFormat*)tzFormat(status);
4107 }
4108
4109 //----------------------------------------------------------------------
4110 void
adoptTimeZoneFormat(TimeZoneFormat * timeZoneFormatToAdopt)4111 SimpleDateFormat::adoptTimeZoneFormat(TimeZoneFormat* timeZoneFormatToAdopt)
4112 {
4113 delete fTimeZoneFormat;
4114 fTimeZoneFormat = timeZoneFormatToAdopt;
4115 }
4116
4117 //----------------------------------------------------------------------
4118 void
setTimeZoneFormat(const TimeZoneFormat & newTimeZoneFormat)4119 SimpleDateFormat::setTimeZoneFormat(const TimeZoneFormat& newTimeZoneFormat)
4120 {
4121 delete fTimeZoneFormat;
4122 fTimeZoneFormat = new TimeZoneFormat(newTimeZoneFormat);
4123 }
4124
4125 //----------------------------------------------------------------------
4126
4127
adoptCalendar(Calendar * calendarToAdopt)4128 void SimpleDateFormat::adoptCalendar(Calendar* calendarToAdopt)
4129 {
4130 UErrorCode status = U_ZERO_ERROR;
4131 Locale calLocale(fLocale);
4132 calLocale.setKeywordValue("calendar", calendarToAdopt->getType(), status);
4133 DateFormatSymbols *newSymbols =
4134 DateFormatSymbols::createForLocale(calLocale, status);
4135 if (U_FAILURE(status)) {
4136 delete calendarToAdopt;
4137 return;
4138 }
4139 DateFormat::adoptCalendar(calendarToAdopt);
4140 delete fSymbols;
4141 fSymbols = newSymbols;
4142 initializeDefaultCentury(); // we need a new century (possibly)
4143 }
4144
4145
4146 //----------------------------------------------------------------------
4147
4148
4149 // override the DateFormat implementation in order to
4150 // lazily initialize fCapitalizationBrkIter
4151 void
setContext(UDisplayContext value,UErrorCode & status)4152 SimpleDateFormat::setContext(UDisplayContext value, UErrorCode& status)
4153 {
4154 DateFormat::setContext(value, status);
4155 #if !UCONFIG_NO_BREAK_ITERATION
4156 if (U_SUCCESS(status)) {
4157 if ( fCapitalizationBrkIter == nullptr && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
4158 value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE) ) {
4159 status = U_ZERO_ERROR;
4160 fCapitalizationBrkIter = BreakIterator::createSentenceInstance(fLocale, status);
4161 if (U_FAILURE(status)) {
4162 delete fCapitalizationBrkIter;
4163 fCapitalizationBrkIter = nullptr;
4164 }
4165 }
4166 }
4167 #endif
4168 }
4169
4170
4171 //----------------------------------------------------------------------
4172
4173
4174 UBool
isFieldUnitIgnored(UCalendarDateFields field) const4175 SimpleDateFormat::isFieldUnitIgnored(UCalendarDateFields field) const {
4176 return isFieldUnitIgnored(fPattern, field);
4177 }
4178
4179
4180 UBool
isFieldUnitIgnored(const UnicodeString & pattern,UCalendarDateFields field)4181 SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern,
4182 UCalendarDateFields field) {
4183 int32_t fieldLevel = fgCalendarFieldToLevel[field];
4184 int32_t level;
4185 char16_t ch;
4186 UBool inQuote = false;
4187 char16_t prevCh = 0;
4188 int32_t count = 0;
4189
4190 for (int32_t i = 0; i < pattern.length(); ++i) {
4191 ch = pattern[i];
4192 if (ch != prevCh && count > 0) {
4193 level = getLevelFromChar(prevCh);
4194 // the larger the level, the smaller the field unit.
4195 if (fieldLevel <= level) {
4196 return false;
4197 }
4198 count = 0;
4199 }
4200 if (ch == QUOTE) {
4201 if ((i+1) < pattern.length() && pattern[i+1] == QUOTE) {
4202 ++i;
4203 } else {
4204 inQuote = ! inQuote;
4205 }
4206 }
4207 else if (!inQuote && isSyntaxChar(ch)) {
4208 prevCh = ch;
4209 ++count;
4210 }
4211 }
4212 if (count > 0) {
4213 // last item
4214 level = getLevelFromChar(prevCh);
4215 if (fieldLevel <= level) {
4216 return false;
4217 }
4218 }
4219 return true;
4220 }
4221
4222 //----------------------------------------------------------------------
4223
4224 const Locale&
getSmpFmtLocale() const4225 SimpleDateFormat::getSmpFmtLocale() const {
4226 return fLocale;
4227 }
4228
4229 //----------------------------------------------------------------------
4230
4231 int32_t
checkIntSuffix(const UnicodeString & text,int32_t start,int32_t patLoc,UBool isNegative) const4232 SimpleDateFormat::checkIntSuffix(const UnicodeString& text, int32_t start,
4233 int32_t patLoc, UBool isNegative) const {
4234 // local variables
4235 UnicodeString suf;
4236 int32_t patternMatch;
4237 int32_t textPreMatch;
4238 int32_t textPostMatch;
4239
4240 // check that we are still in range
4241 if ( (start > text.length()) ||
4242 (start < 0) ||
4243 (patLoc < 0) ||
4244 (patLoc > fPattern.length())) {
4245 // out of range, don't advance location in text
4246 return start;
4247 }
4248
4249 // get the suffix
4250 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fNumberFormat);
4251 if (decfmt != nullptr) {
4252 if (isNegative) {
4253 suf = decfmt->getNegativeSuffix(suf);
4254 }
4255 else {
4256 suf = decfmt->getPositiveSuffix(suf);
4257 }
4258 }
4259
4260 // check for suffix
4261 if (suf.length() <= 0) {
4262 return start;
4263 }
4264
4265 // check suffix will be encountered in the pattern
4266 patternMatch = compareSimpleAffix(suf,fPattern,patLoc);
4267
4268 // check if a suffix will be encountered in the text
4269 textPreMatch = compareSimpleAffix(suf,text,start);
4270
4271 // check if a suffix was encountered in the text
4272 textPostMatch = compareSimpleAffix(suf,text,start-suf.length());
4273
4274 // check for suffix match
4275 if ((textPreMatch >= 0) && (patternMatch >= 0) && (textPreMatch == patternMatch)) {
4276 return start;
4277 }
4278 else if ((textPostMatch >= 0) && (patternMatch >= 0) && (textPostMatch == patternMatch)) {
4279 return start - suf.length();
4280 }
4281
4282 // should not get here
4283 return start;
4284 }
4285
4286 //----------------------------------------------------------------------
4287
4288 int32_t
compareSimpleAffix(const UnicodeString & affix,const UnicodeString & input,int32_t pos) const4289 SimpleDateFormat::compareSimpleAffix(const UnicodeString& affix,
4290 const UnicodeString& input,
4291 int32_t pos) const {
4292 int32_t start = pos;
4293 for (int32_t i=0; i<affix.length(); ) {
4294 UChar32 c = affix.char32At(i);
4295 int32_t len = U16_LENGTH(c);
4296 if (PatternProps::isWhiteSpace(c)) {
4297 // We may have a pattern like: \u200F \u0020
4298 // and input text like: \u200F \u0020
4299 // Note that U+200F and U+0020 are Pattern_White_Space but only
4300 // U+0020 is UWhiteSpace. So we have to first do a direct
4301 // match of the run of Pattern_White_Space in the pattern,
4302 // then match any extra characters.
4303 UBool literalMatch = false;
4304 while (pos < input.length() &&
4305 input.char32At(pos) == c) {
4306 literalMatch = true;
4307 i += len;
4308 pos += len;
4309 if (i == affix.length()) {
4310 break;
4311 }
4312 c = affix.char32At(i);
4313 len = U16_LENGTH(c);
4314 if (!PatternProps::isWhiteSpace(c)) {
4315 break;
4316 }
4317 }
4318
4319 // Advance over run in pattern
4320 i = skipPatternWhiteSpace(affix, i);
4321
4322 // Advance over run in input text
4323 // Must see at least one white space char in input,
4324 // unless we've already matched some characters literally.
4325 int32_t s = pos;
4326 pos = skipUWhiteSpace(input, pos);
4327 if (pos == s && !literalMatch) {
4328 return -1;
4329 }
4330
4331 // If we skip UWhiteSpace in the input text, we need to skip it in the pattern.
4332 // Otherwise, the previous lines may have skipped over text (such as U+00A0) that
4333 // is also in the affix.
4334 i = skipUWhiteSpace(affix, i);
4335 } else {
4336 if (pos < input.length() &&
4337 input.char32At(pos) == c) {
4338 i += len;
4339 pos += len;
4340 } else {
4341 return -1;
4342 }
4343 }
4344 }
4345 return pos - start;
4346 }
4347
4348 //----------------------------------------------------------------------
4349
4350 int32_t
skipPatternWhiteSpace(const UnicodeString & text,int32_t pos) const4351 SimpleDateFormat::skipPatternWhiteSpace(const UnicodeString& text, int32_t pos) const {
4352 const char16_t* s = text.getBuffer();
4353 return (int32_t)(PatternProps::skipWhiteSpace(s + pos, text.length() - pos) - s);
4354 }
4355
4356 //----------------------------------------------------------------------
4357
4358 int32_t
skipUWhiteSpace(const UnicodeString & text,int32_t pos) const4359 SimpleDateFormat::skipUWhiteSpace(const UnicodeString& text, int32_t pos) const {
4360 while (pos < text.length()) {
4361 UChar32 c = text.char32At(pos);
4362 if (!u_isUWhiteSpace(c)) {
4363 break;
4364 }
4365 pos += U16_LENGTH(c);
4366 }
4367 return pos;
4368 }
4369
4370 //----------------------------------------------------------------------
4371
4372 // Lazy TimeZoneFormat instantiation, semantically const.
4373 TimeZoneFormat *
tzFormat(UErrorCode & status) const4374 SimpleDateFormat::tzFormat(UErrorCode &status) const {
4375 Mutex m(&LOCK);
4376 if (fTimeZoneFormat == nullptr && U_SUCCESS(status)) {
4377 const_cast<SimpleDateFormat *>(this)->fTimeZoneFormat =
4378 TimeZoneFormat::createInstance(fLocale, status);
4379 }
4380 return fTimeZoneFormat;
4381 }
4382
parsePattern()4383 void SimpleDateFormat::parsePattern() {
4384 fHasMinute = false;
4385 fHasSecond = false;
4386 fHasHanYearChar = false;
4387
4388 int len = fPattern.length();
4389 UBool inQuote = false;
4390 for (int32_t i = 0; i < len; ++i) {
4391 char16_t ch = fPattern[i];
4392 if (ch == QUOTE) {
4393 inQuote = !inQuote;
4394 }
4395 if (ch == 0x5E74) { // don't care whether this is inside quotes
4396 fHasHanYearChar = true;
4397 }
4398 if (!inQuote) {
4399 if (ch == 0x6D) { // 0x6D == 'm'
4400 fHasMinute = true;
4401 }
4402 if (ch == 0x73) { // 0x73 == 's'
4403 fHasSecond = true;
4404 }
4405 }
4406 }
4407 }
4408
4409 U_NAMESPACE_END
4410
4411 #endif /* #if !UCONFIG_NO_FORMATTING */
4412
4413 //eof
4414