1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1997-2016, International Business Machines Corporation and *
6 * others. All Rights Reserved. *
7 *******************************************************************************
8 *
9 * File SMPDTFMT.CPP
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 02/19/97 aliu Converted from java.
15 * 03/31/97 aliu Modified extensively to work with 50 locales.
16 * 04/01/97 aliu Added support for centuries.
17 * 07/09/97 helena Made ParsePosition into a class.
18 * 07/21/98 stephen Added initializeDefaultCentury.
19 * Removed getZoneIndex (added in DateFormatSymbols)
20 * Removed subParseLong
21 * Removed chk
22 * 02/22/99 stephen Removed character literals for EBCDIC safety
23 * 10/14/99 aliu Updated 2-digit year parsing so that only "00" thru
24 * "99" are recognized. {j28 4182066}
25 * 11/15/99 weiv Added support for week of year/day of week format
26 ********************************************************************************
27 */
28
29 #define ZID_KEY_MAX 128
30
31 #include "unicode/utypes.h"
32
33 #if !UCONFIG_NO_FORMATTING
34 #include "unicode/smpdtfmt.h"
35 #include "unicode/dtfmtsym.h"
36 #include "unicode/ures.h"
37 #include "unicode/msgfmt.h"
38 #include "unicode/calendar.h"
39 #include "unicode/gregocal.h"
40 #include "unicode/timezone.h"
41 #include "unicode/decimfmt.h"
42 #include "unicode/dcfmtsym.h"
43 #include "unicode/uchar.h"
44 #include "unicode/uniset.h"
45 #include "unicode/ustring.h"
46 #include "unicode/basictz.h"
47 #include "unicode/simpleformatter.h"
48 #include "unicode/simplenumberformatter.h"
49 #include "unicode/simpletz.h"
50 #include "unicode/rbtz.h"
51 #include "unicode/tzfmt.h"
52 #include "unicode/ucasemap.h"
53 #include "unicode/utf16.h"
54 #include "unicode/vtzone.h"
55 #include "unicode/udisplaycontext.h"
56 #include "unicode/brkiter.h"
57 #include "unicode/rbnf.h"
58 #include "unicode/dtptngen.h"
59 #include "uresimp.h"
60 #include "olsontz.h"
61 #include "patternprops.h"
62 #include "fphdlimp.h"
63 #include "hebrwcal.h"
64 #include "cstring.h"
65 #include "uassert.h"
66 #include "cmemory.h"
67 #include "umutex.h"
68 #include "mutex.h"
69 #include <float.h>
70 #include "smpdtfst.h"
71 #include "sharednumberformat.h"
72 #include "ucasemap_imp.h"
73 #include "ustr_imp.h"
74 #include "charstr.h"
75 #include "uvector.h"
76 #include "cstr.h"
77 #include "dayperiodrules.h"
78 #include "tznames_impl.h" // ZONE_NAME_U16_MAX
79 #include "number_utypes.h"
80
81 #if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL)
82 #include <stdio.h>
83 #endif
84
85 // *****************************************************************************
86 // class SimpleDateFormat
87 // *****************************************************************************
88
89 U_NAMESPACE_BEGIN
90
91 /**
92 * Last-resort string to use for "GMT" when constructing time zone strings.
93 */
94 // For time zones that have no names, use strings GMT+minutes and
95 // GMT-minutes. For instance, in France the time zone is GMT+60.
96 // Also accepted are GMT+H:MM or GMT-H:MM.
97 // Currently not being used
98 //static const char16_t gGmt[] = {0x0047, 0x004D, 0x0054, 0x0000}; // "GMT"
99 //static const char16_t gGmtPlus[] = {0x0047, 0x004D, 0x0054, 0x002B, 0x0000}; // "GMT+"
100 //static const char16_t gGmtMinus[] = {0x0047, 0x004D, 0x0054, 0x002D, 0x0000}; // "GMT-"
101 //static const char16_t gDefGmtPat[] = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0x0000}; /* GMT{0} */
102 //static const char16_t gDefGmtNegHmsPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* -HH:mm:ss */
103 //static const char16_t gDefGmtNegHmPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* -HH:mm */
104 //static const char16_t gDefGmtPosHmsPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* +HH:mm:ss */
105 //static const char16_t gDefGmtPosHmPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* +HH:mm */
106 //static const char16_t gUt[] = {0x0055, 0x0054, 0x0000}; // "UT"
107 //static const char16_t gUtc[] = {0x0055, 0x0054, 0x0043, 0x0000}; // "UT"
108
109 typedef enum GmtPatSize {
110 kGmtLen = 3,
111 kGmtPatLen = 6,
112 kNegHmsLen = 9,
113 kNegHmLen = 6,
114 kPosHmsLen = 9,
115 kPosHmLen = 6,
116 kUtLen = 2,
117 kUtcLen = 3
118 } GmtPatSize;
119
120 // Stuff needed for numbering system overrides
121
122 typedef enum OvrStrType {
123 kOvrStrDate = 0,
124 kOvrStrTime = 1,
125 kOvrStrBoth = 2
126 } OvrStrType;
127
128 static const UDateFormatField kDateFields[] = {
129 UDAT_YEAR_FIELD,
130 UDAT_MONTH_FIELD,
131 UDAT_DATE_FIELD,
132 UDAT_DAY_OF_YEAR_FIELD,
133 UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
134 UDAT_WEEK_OF_YEAR_FIELD,
135 UDAT_WEEK_OF_MONTH_FIELD,
136 UDAT_YEAR_WOY_FIELD,
137 UDAT_EXTENDED_YEAR_FIELD,
138 UDAT_JULIAN_DAY_FIELD,
139 UDAT_STANDALONE_DAY_FIELD,
140 UDAT_STANDALONE_MONTH_FIELD,
141 UDAT_QUARTER_FIELD,
142 UDAT_STANDALONE_QUARTER_FIELD,
143 UDAT_YEAR_NAME_FIELD,
144 UDAT_RELATED_YEAR_FIELD };
145 static const int8_t kDateFieldsCount = 16;
146
147 static const UDateFormatField kTimeFields[] = {
148 UDAT_HOUR_OF_DAY1_FIELD,
149 UDAT_HOUR_OF_DAY0_FIELD,
150 UDAT_MINUTE_FIELD,
151 UDAT_SECOND_FIELD,
152 UDAT_FRACTIONAL_SECOND_FIELD,
153 UDAT_HOUR1_FIELD,
154 UDAT_HOUR0_FIELD,
155 UDAT_MILLISECONDS_IN_DAY_FIELD,
156 UDAT_TIMEZONE_RFC_FIELD,
157 UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD };
158 static const int8_t kTimeFieldsCount = 10;
159
160
161 // This is a pattern-of-last-resort used when we can't load a usable pattern out
162 // of a resource.
163 static const char16_t gDefaultPattern[] =
164 {
165 0x79, 0x4D, 0x4D, 0x64, 0x64, 0x20, 0x68, 0x68, 0x3A, 0x6D, 0x6D, 0x20, 0x61, 0
166 }; /* "yMMdd hh:mm a" */
167
168 // This prefix is designed to NEVER MATCH real text, in order to
169 // suppress the parsing of negative numbers. Adjust as needed (if
170 // this becomes valid Unicode).
171 static const char16_t SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0};
172
173 /**
174 * These are the tags we expect to see in normal resource bundle files associated
175 * with a locale.
176 */
177 static const char16_t QUOTE = 0x27; // Single quote
178
179 /*
180 * The field range check bias for each UDateFormatField.
181 * The bias is added to the minimum and maximum values
182 * before they are compared to the parsed number.
183 * For example, the calendar stores zero-based month numbers
184 * but the parsed month numbers start at 1, so the bias is 1.
185 *
186 * A value of -1 means that the value is not checked.
187 */
188 static const int32_t gFieldRangeBias[] = {
189 -1, // 'G' - UDAT_ERA_FIELD
190 -1, // 'y' - UDAT_YEAR_FIELD
191 1, // 'M' - UDAT_MONTH_FIELD
192 0, // 'd' - UDAT_DATE_FIELD
193 -1, // 'k' - UDAT_HOUR_OF_DAY1_FIELD
194 -1, // 'H' - UDAT_HOUR_OF_DAY0_FIELD
195 0, // 'm' - UDAT_MINUTE_FIELD
196 0, // 's' - UDAT_SECOND_FIELD
197 -1, // 'S' - UDAT_FRACTIONAL_SECOND_FIELD (0-999?)
198 -1, // 'E' - UDAT_DAY_OF_WEEK_FIELD (1-7?)
199 -1, // 'D' - UDAT_DAY_OF_YEAR_FIELD (1 - 366?)
200 -1, // 'F' - UDAT_DAY_OF_WEEK_IN_MONTH_FIELD (1-5?)
201 -1, // 'w' - UDAT_WEEK_OF_YEAR_FIELD (1-52?)
202 -1, // 'W' - UDAT_WEEK_OF_MONTH_FIELD (1-5?)
203 -1, // 'a' - UDAT_AM_PM_FIELD
204 -1, // 'h' - UDAT_HOUR1_FIELD
205 -1, // 'K' - UDAT_HOUR0_FIELD
206 -1, // 'z' - UDAT_TIMEZONE_FIELD
207 -1, // 'Y' - UDAT_YEAR_WOY_FIELD
208 -1, // 'e' - UDAT_DOW_LOCAL_FIELD
209 -1, // 'u' - UDAT_EXTENDED_YEAR_FIELD
210 -1, // 'g' - UDAT_JULIAN_DAY_FIELD
211 -1, // 'A' - UDAT_MILLISECONDS_IN_DAY_FIELD
212 -1, // 'Z' - UDAT_TIMEZONE_RFC_FIELD
213 -1, // 'v' - UDAT_TIMEZONE_GENERIC_FIELD
214 0, // 'c' - UDAT_STANDALONE_DAY_FIELD
215 1, // 'L' - UDAT_STANDALONE_MONTH_FIELD
216 -1, // 'Q' - UDAT_QUARTER_FIELD (1-4?)
217 -1, // 'q' - UDAT_STANDALONE_QUARTER_FIELD
218 -1, // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD
219 -1, // 'U' - UDAT_YEAR_NAME_FIELD
220 -1, // 'O' - UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD
221 -1, // 'X' - UDAT_TIMEZONE_ISO_FIELD
222 -1, // 'x' - UDAT_TIMEZONE_ISO_LOCAL_FIELD
223 -1, // 'r' - UDAT_RELATED_YEAR_FIELD
224 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
225 -1, // ':' - UDAT_TIME_SEPARATOR_FIELD
226 #else
227 -1, // (no pattern character currently) - UDAT_TIME_SEPARATOR_FIELD
228 #endif
229 };
230
231 // When calendar uses hebr numbering (i.e. he@calendar=hebrew),
232 // offset the years within the current millennium down to 1-999
233 static const int32_t HEBREW_CAL_CUR_MILLENIUM_START_YEAR = 5000;
234 static const int32_t HEBREW_CAL_CUR_MILLENIUM_END_YEAR = 6000;
235
236 /**
237 * Maximum range for detecting daylight offset of a time zone when parsed time zone
238 * string indicates it's daylight saving time, but the detected time zone does not
239 * observe daylight saving time at the parsed date.
240 */
241 static const double MAX_DAYLIGHT_DETECTION_RANGE = 30*365*24*60*60*1000.0;
242
243 static UMutex LOCK;
244
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)245 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)
246
247 SimpleDateFormat::NSOverride::~NSOverride() {
248 if (snf != nullptr) {
249 snf->removeRef();
250 }
251 }
252
253
free()254 void SimpleDateFormat::NSOverride::free() {
255 NSOverride *cur = this;
256 while (cur) {
257 NSOverride *next_temp = cur->next;
258 delete cur;
259 cur = next_temp;
260 }
261 }
262
263 // no matter what the locale's default number format looked like, we want
264 // to modify it so that it doesn't use thousands separators, doesn't always
265 // show the decimal point, and recognizes integers only when parsing
fixNumberFormatForDates(NumberFormat & nf)266 static void fixNumberFormatForDates(NumberFormat &nf) {
267 nf.setGroupingUsed(false);
268 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(&nf);
269 if (decfmt != nullptr) {
270 decfmt->setDecimalSeparatorAlwaysShown(false);
271 }
272 nf.setParseIntegerOnly(true);
273 nf.setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00"
274 }
275
createSharedNumberFormat(NumberFormat * nfToAdopt)276 static const SharedNumberFormat *createSharedNumberFormat(
277 NumberFormat *nfToAdopt) {
278 fixNumberFormatForDates(*nfToAdopt);
279 const SharedNumberFormat *result = new SharedNumberFormat(nfToAdopt);
280 if (result == nullptr) {
281 delete nfToAdopt;
282 }
283 return result;
284 }
285
createSharedNumberFormat(const Locale & loc,UErrorCode & status)286 static const SharedNumberFormat *createSharedNumberFormat(
287 const Locale &loc, UErrorCode &status) {
288 NumberFormat *nf = NumberFormat::createInstance(loc, status);
289 if (U_FAILURE(status)) {
290 return nullptr;
291 }
292 const SharedNumberFormat *result = createSharedNumberFormat(nf);
293 if (result == nullptr) {
294 status = U_MEMORY_ALLOCATION_ERROR;
295 }
296 return result;
297 }
298
allocSharedNumberFormatters()299 static const SharedNumberFormat **allocSharedNumberFormatters() {
300 const SharedNumberFormat **result = (const SharedNumberFormat**)
301 uprv_malloc(UDAT_FIELD_COUNT * sizeof(const SharedNumberFormat*));
302 if (result == nullptr) {
303 return nullptr;
304 }
305 for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) {
306 result[i] = nullptr;
307 }
308 return result;
309 }
310
freeSharedNumberFormatters(const SharedNumberFormat ** list)311 static void freeSharedNumberFormatters(const SharedNumberFormat ** list) {
312 for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) {
313 SharedObject::clearPtr(list[i]);
314 }
315 uprv_free(list);
316 }
317
getNumberFormatByIndex(UDateFormatField index) const318 const NumberFormat *SimpleDateFormat::getNumberFormatByIndex(
319 UDateFormatField index) const {
320 if (fSharedNumberFormatters == nullptr ||
321 fSharedNumberFormatters[index] == nullptr) {
322 return fNumberFormat;
323 }
324 return &(**fSharedNumberFormatters[index]);
325 }
326
327 //----------------------------------------------------------------------
328
~SimpleDateFormat()329 SimpleDateFormat::~SimpleDateFormat()
330 {
331 delete fSymbols;
332 if (fSharedNumberFormatters) {
333 freeSharedNumberFormatters(fSharedNumberFormatters);
334 }
335 delete fTimeZoneFormat;
336 delete fSimpleNumberFormatter;
337
338 #if !UCONFIG_NO_BREAK_ITERATION
339 delete fCapitalizationBrkIter;
340 #endif
341 }
342
343 //----------------------------------------------------------------------
344
SimpleDateFormat(UErrorCode & status)345 SimpleDateFormat::SimpleDateFormat(UErrorCode& status)
346 : fLocale(Locale::getDefault())
347 {
348 initializeBooleanAttributes();
349 construct(kShort, (EStyle) (kShort + kDateOffset), fLocale, status);
350 initializeDefaultCentury();
351 }
352
353 //----------------------------------------------------------------------
354
SimpleDateFormat(const UnicodeString & pattern,UErrorCode & status)355 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
356 UErrorCode &status)
357 : fPattern(pattern),
358 fLocale(Locale::getDefault())
359 {
360 fDateOverride.setToBogus();
361 fTimeOverride.setToBogus();
362 initializeBooleanAttributes();
363 initializeCalendar(nullptr,fLocale,status);
364 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
365 initialize(fLocale, status);
366 initializeDefaultCentury();
367
368 }
369 //----------------------------------------------------------------------
370
SimpleDateFormat(const UnicodeString & pattern,const UnicodeString & override,UErrorCode & status)371 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
372 const UnicodeString& override,
373 UErrorCode &status)
374 : fPattern(pattern),
375 fLocale(Locale::getDefault())
376 {
377 fDateOverride.setTo(override);
378 fTimeOverride.setToBogus();
379 initializeBooleanAttributes();
380 initializeCalendar(nullptr,fLocale,status);
381 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
382 initialize(fLocale, status);
383 initializeDefaultCentury();
384
385 processOverrideString(fLocale,override,kOvrStrBoth,status);
386
387 }
388
389 //----------------------------------------------------------------------
390
SimpleDateFormat(const UnicodeString & pattern,const Locale & locale,UErrorCode & status)391 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
392 const Locale& locale,
393 UErrorCode& status)
394 : fPattern(pattern),
395 fLocale(locale)
396 {
397
398 fDateOverride.setToBogus();
399 fTimeOverride.setToBogus();
400 initializeBooleanAttributes();
401
402 initializeCalendar(nullptr,fLocale,status);
403 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
404 initialize(fLocale, status);
405 initializeDefaultCentury();
406 }
407
408 //----------------------------------------------------------------------
409
SimpleDateFormat(const UnicodeString & pattern,const UnicodeString & override,const Locale & locale,UErrorCode & status)410 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
411 const UnicodeString& override,
412 const Locale& locale,
413 UErrorCode& status)
414 : fPattern(pattern),
415 fLocale(locale)
416 {
417
418 fDateOverride.setTo(override);
419 fTimeOverride.setToBogus();
420 initializeBooleanAttributes();
421
422 initializeCalendar(nullptr,fLocale,status);
423 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
424 initialize(fLocale, status);
425 initializeDefaultCentury();
426
427 processOverrideString(locale,override,kOvrStrBoth,status);
428
429 }
430
431 //----------------------------------------------------------------------
432
SimpleDateFormat(const UnicodeString & pattern,DateFormatSymbols * symbolsToAdopt,UErrorCode & status)433 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
434 DateFormatSymbols* symbolsToAdopt,
435 UErrorCode& status)
436 : fPattern(pattern),
437 fLocale(Locale::getDefault()),
438 fSymbols(symbolsToAdopt)
439 {
440
441 fDateOverride.setToBogus();
442 fTimeOverride.setToBogus();
443 initializeBooleanAttributes();
444
445 initializeCalendar(nullptr,fLocale,status);
446 initialize(fLocale, status);
447 initializeDefaultCentury();
448 }
449
450 //----------------------------------------------------------------------
451
SimpleDateFormat(const UnicodeString & pattern,const DateFormatSymbols & symbols,UErrorCode & status)452 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
453 const DateFormatSymbols& symbols,
454 UErrorCode& status)
455 : fPattern(pattern),
456 fLocale(Locale::getDefault()),
457 fSymbols(new DateFormatSymbols(symbols))
458 {
459
460 fDateOverride.setToBogus();
461 fTimeOverride.setToBogus();
462 initializeBooleanAttributes();
463
464 initializeCalendar(nullptr, fLocale, status);
465 initialize(fLocale, status);
466 initializeDefaultCentury();
467 }
468
469 //----------------------------------------------------------------------
470
471 // Not for public consumption; used by DateFormat
SimpleDateFormat(EStyle timeStyle,EStyle dateStyle,const Locale & locale,UErrorCode & status)472 SimpleDateFormat::SimpleDateFormat(EStyle timeStyle,
473 EStyle dateStyle,
474 const Locale& locale,
475 UErrorCode& status)
476 : fLocale(locale)
477 {
478 initializeBooleanAttributes();
479 construct(timeStyle, dateStyle, fLocale, status);
480 if(U_SUCCESS(status)) {
481 initializeDefaultCentury();
482 }
483 }
484
485 //----------------------------------------------------------------------
486
487 /**
488 * Not for public consumption; used by DateFormat. This constructor
489 * never fails. If the resource data is not available, it uses the
490 * the last resort symbols.
491 */
SimpleDateFormat(const Locale & locale,UErrorCode & status)492 SimpleDateFormat::SimpleDateFormat(const Locale& locale,
493 UErrorCode& status)
494 : fPattern(gDefaultPattern),
495 fLocale(locale)
496 {
497 if (U_FAILURE(status)) return;
498 initializeBooleanAttributes();
499 initializeCalendar(nullptr, fLocale, status);
500 fSymbols = DateFormatSymbols::createForLocale(fLocale, status);
501 if (U_FAILURE(status))
502 {
503 status = U_ZERO_ERROR;
504 delete fSymbols;
505 // This constructor doesn't fail; it uses last resort data
506 fSymbols = new DateFormatSymbols(status);
507 /* test for nullptr */
508 if (fSymbols == nullptr) {
509 status = U_MEMORY_ALLOCATION_ERROR;
510 return;
511 }
512 }
513
514 fDateOverride.setToBogus();
515 fTimeOverride.setToBogus();
516
517 initialize(fLocale, status);
518 if(U_SUCCESS(status)) {
519 initializeDefaultCentury();
520 }
521 }
522
523 //----------------------------------------------------------------------
524
SimpleDateFormat(const SimpleDateFormat & other)525 SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other)
526 : DateFormat(other),
527 fLocale(other.fLocale)
528 {
529 initializeBooleanAttributes();
530 *this = other;
531 }
532
533 //----------------------------------------------------------------------
534
operator =(const SimpleDateFormat & other)535 SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other)
536 {
537 if (this == &other) {
538 return *this;
539 }
540
541 // fSimpleNumberFormatter references fNumberFormatter, delete it
542 // before we call the = operator which may invalidate fNumberFormatter
543 delete fSimpleNumberFormatter;
544 fSimpleNumberFormatter = nullptr;
545
546 DateFormat::operator=(other);
547 fDateOverride = other.fDateOverride;
548 fTimeOverride = other.fTimeOverride;
549
550 delete fSymbols;
551 fSymbols = nullptr;
552
553 if (other.fSymbols)
554 fSymbols = new DateFormatSymbols(*other.fSymbols);
555
556 fDefaultCenturyStart = other.fDefaultCenturyStart;
557 fDefaultCenturyStartYear = other.fDefaultCenturyStartYear;
558 fHaveDefaultCentury = other.fHaveDefaultCentury;
559
560 fPattern = other.fPattern;
561 fHasMinute = other.fHasMinute;
562 fHasSecond = other.fHasSecond;
563
564 fLocale = other.fLocale;
565
566 // TimeZoneFormat can now be set independently via setter.
567 // If it is nullptr, it will be lazily initialized from locale.
568 delete fTimeZoneFormat;
569 fTimeZoneFormat = nullptr;
570 TimeZoneFormat *otherTZFormat;
571 {
572 // Synchronization is required here, when accessing other.fTimeZoneFormat,
573 // because another thread may be concurrently executing other.tzFormat(),
574 // a logically const function that lazily creates other.fTimeZoneFormat.
575 //
576 // Without synchronization, reordered memory writes could allow us
577 // to see a non-null fTimeZoneFormat before the object itself was
578 // fully initialized. In case of a race, it doesn't matter whether
579 // we see a null or a fully initialized other.fTimeZoneFormat,
580 // only that we avoid seeing a partially initialized object.
581 //
582 // Once initialized, no const function can modify fTimeZoneFormat,
583 // meaning that once we have safely grabbed the other.fTimeZoneFormat
584 // pointer, continued synchronization is not required to use it.
585 Mutex m(&LOCK);
586 otherTZFormat = other.fTimeZoneFormat;
587 }
588 if (otherTZFormat) {
589 fTimeZoneFormat = new TimeZoneFormat(*otherTZFormat);
590 }
591
592 #if !UCONFIG_NO_BREAK_ITERATION
593 if (other.fCapitalizationBrkIter != nullptr) {
594 fCapitalizationBrkIter = (other.fCapitalizationBrkIter)->clone();
595 }
596 #endif
597
598 if (fSharedNumberFormatters != nullptr) {
599 freeSharedNumberFormatters(fSharedNumberFormatters);
600 fSharedNumberFormatters = nullptr;
601 }
602 if (other.fSharedNumberFormatters != nullptr) {
603 fSharedNumberFormatters = allocSharedNumberFormatters();
604 if (fSharedNumberFormatters) {
605 for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) {
606 SharedObject::copyPtr(
607 other.fSharedNumberFormatters[i],
608 fSharedNumberFormatters[i]);
609 }
610 }
611 }
612
613 UErrorCode localStatus = U_ZERO_ERROR;
614 // SimpleNumberFormatter does not have a copy constructor. Furthermore,
615 // it references data from an internal field, fNumberFormatter,
616 // so we must rematerialize that reference after copying over the number formatter.
617 initSimpleNumberFormatter(localStatus);
618 return *this;
619 }
620
621 //----------------------------------------------------------------------
622
623 SimpleDateFormat*
clone() const624 SimpleDateFormat::clone() const
625 {
626 return new SimpleDateFormat(*this);
627 }
628
629 //----------------------------------------------------------------------
630
631 bool
operator ==(const Format & other) const632 SimpleDateFormat::operator==(const Format& other) const
633 {
634 if (DateFormat::operator==(other)) {
635 // The DateFormat::operator== check for fCapitalizationContext equality above
636 // is sufficient to check equality of all derived context-related data.
637 // DateFormat::operator== guarantees following cast is safe
638 SimpleDateFormat* that = (SimpleDateFormat*)&other;
639 return (fPattern == that->fPattern &&
640 fSymbols != nullptr && // Check for pathological object
641 that->fSymbols != nullptr && // Check for pathological object
642 *fSymbols == *that->fSymbols &&
643 fHaveDefaultCentury == that->fHaveDefaultCentury &&
644 fDefaultCenturyStart == that->fDefaultCenturyStart);
645 }
646 return false;
647 }
648
649 //----------------------------------------------------------------------
650 static const char16_t* timeSkeletons[4] = {
651 u"jmmsszzzz", // kFull
652 u"jmmssz", // kLong
653 u"jmmss", // kMedium
654 u"jmm", // kShort
655 };
656
construct(EStyle timeStyle,EStyle dateStyle,const Locale & locale,UErrorCode & status)657 void SimpleDateFormat::construct(EStyle timeStyle,
658 EStyle dateStyle,
659 const Locale& locale,
660 UErrorCode& status)
661 {
662 // called by several constructors to load pattern data from the resources
663 if (U_FAILURE(status)) return;
664
665 // We will need the calendar to know what type of symbols to load.
666 initializeCalendar(nullptr, locale, status);
667 if (U_FAILURE(status)) return;
668
669 // Load date time patterns directly from resources.
670 const char* cType = fCalendar ? fCalendar->getType() : nullptr;
671 LocalUResourceBundlePointer bundle(ures_open(nullptr, locale.getBaseName(), &status));
672 if (U_FAILURE(status)) return;
673
674 UBool cTypeIsGregorian = true;
675 LocalUResourceBundlePointer dateTimePatterns;
676 if (cType != nullptr && uprv_strcmp(cType, "gregorian") != 0) {
677 CharString resourcePath("calendar/", status);
678 resourcePath.append(cType, status).append("/DateTimePatterns", status);
679 dateTimePatterns.adoptInstead(
680 ures_getByKeyWithFallback(bundle.getAlias(), resourcePath.data(),
681 (UResourceBundle*)nullptr, &status));
682 cTypeIsGregorian = false;
683 }
684
685 // Check for "gregorian" fallback.
686 if (cTypeIsGregorian || status == U_MISSING_RESOURCE_ERROR) {
687 status = U_ZERO_ERROR;
688 dateTimePatterns.adoptInstead(
689 ures_getByKeyWithFallback(bundle.getAlias(),
690 "calendar/gregorian/DateTimePatterns",
691 (UResourceBundle*)nullptr, &status));
692 }
693 if (U_FAILURE(status)) return;
694
695 LocalUResourceBundlePointer currentBundle;
696
697 if (ures_getSize(dateTimePatterns.getAlias()) <= kDateTime)
698 {
699 status = U_INVALID_FORMAT_ERROR;
700 return;
701 }
702
703 setLocaleIDs(ures_getLocaleByType(dateTimePatterns.getAlias(), ULOC_VALID_LOCALE, &status),
704 ures_getLocaleByType(dateTimePatterns.getAlias(), ULOC_ACTUAL_LOCALE, &status));
705
706 // create a symbols object from the locale
707 fSymbols = DateFormatSymbols::createForLocale(locale, status);
708 if (U_FAILURE(status)) return;
709 /* test for nullptr */
710 if (fSymbols == nullptr) {
711 status = U_MEMORY_ALLOCATION_ERROR;
712 return;
713 }
714
715 const char16_t *resStr,*ovrStr;
716 int32_t resStrLen,ovrStrLen = 0;
717 fDateOverride.setToBogus();
718 fTimeOverride.setToBogus();
719
720 UnicodeString timePattern;
721 if (timeStyle >= kFull && timeStyle <= kShort) {
722 bool hasRgOrHcSubtag = false;
723 // also use DTPG if the locale has the "rg" or "hc" ("hours") subtag-- even if the overriding region
724 // or hour cycle is the same as the one we get by default, we go through the DateTimePatternGenerator
725 UErrorCode dummyErr1 = U_ZERO_ERROR, dummyErr2 = U_ZERO_ERROR;
726 if (locale.getKeywordValue("rg", nullptr, 0, dummyErr1) > 0 || locale.getKeywordValue("hours", nullptr, 0, dummyErr2) > 0) {
727 hasRgOrHcSubtag = true;
728 }
729
730 const char* baseLocID = locale.getBaseName();
731 if (baseLocID != nullptr && uprv_strcmp(baseLocID,"und")!=0) {
732 UErrorCode useStatus = U_ZERO_ERROR;
733 Locale baseLoc(baseLocID);
734 Locale validLoc(getLocale(ULOC_VALID_LOCALE, useStatus));
735 if (hasRgOrHcSubtag || (U_SUCCESS(useStatus) && validLoc!=baseLoc)) {
736 bool useDTPG = hasRgOrHcSubtag;
737 const char* baseReg = baseLoc.getCountry(); // empty string if no region
738 if ((baseReg != nullptr && baseReg[0] != 0 &&
739 uprv_strncmp(baseReg,validLoc.getCountry(),ULOC_COUNTRY_CAPACITY)!=0)
740 || uprv_strncmp(baseLoc.getLanguage(),validLoc.getLanguage(),ULOC_LANG_CAPACITY)!=0) {
741 // use DTPG if
742 // * baseLoc has a region and validLoc does not have the same one (or has none), OR
743 // * validLoc has a different language code than baseLoc
744 // * the original locale has the rg or hc subtag
745 useDTPG = true;
746 }
747 if (useDTPG) {
748 // The standard time formats may have the wrong time cycle, because:
749 // the valid locale differs in important ways (region, language) from
750 // the base locale.
751 // We could *also* check whether they do actually have a mismatch with
752 // the time cycle preferences for the region, but that is a lot more
753 // work for little or no additional benefit, since just going ahead
754 // and always synthesizing the time format as per the following should
755 // create a locale-appropriate pattern with cycle that matches the
756 // region preferences anyway.
757 LocalPointer<DateTimePatternGenerator> dtpg(DateTimePatternGenerator::createInstanceNoStdPat(locale, useStatus));
758 if (U_SUCCESS(useStatus)) {
759 UnicodeString timeSkeleton(true, timeSkeletons[timeStyle], -1);
760 timePattern = dtpg->getBestPattern(timeSkeleton, useStatus);
761 }
762 }
763 }
764 }
765 }
766
767 // if the pattern should include both date and time information, use the date/time
768 // pattern string as a guide to tell use how to glue together the appropriate date
769 // and time pattern strings.
770 if ((timeStyle != kNone) && (dateStyle != kNone))
771 {
772 UnicodeString tempus1(timePattern);
773 if (tempus1.length() == 0) {
774 currentBundle.adoptInstead(
775 ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, nullptr, &status));
776 if (U_FAILURE(status)) {
777 status = U_INVALID_FORMAT_ERROR;
778 return;
779 }
780 switch (ures_getType(currentBundle.getAlias())) {
781 case URES_STRING: {
782 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
783 break;
784 }
785 case URES_ARRAY: {
786 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
787 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
788 fTimeOverride.setTo(true, ovrStr, ovrStrLen);
789 break;
790 }
791 default: {
792 status = U_INVALID_FORMAT_ERROR;
793 return;
794 }
795 }
796
797 tempus1.setTo(true, resStr, resStrLen);
798 }
799
800 currentBundle.adoptInstead(
801 ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)dateStyle, nullptr, &status));
802 if (U_FAILURE(status)) {
803 status = U_INVALID_FORMAT_ERROR;
804 return;
805 }
806 switch (ures_getType(currentBundle.getAlias())) {
807 case URES_STRING: {
808 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
809 break;
810 }
811 case URES_ARRAY: {
812 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
813 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
814 fDateOverride.setTo(true, ovrStr, ovrStrLen);
815 break;
816 }
817 default: {
818 status = U_INVALID_FORMAT_ERROR;
819 return;
820 }
821 }
822
823 UnicodeString tempus2(true, resStr, resStrLen);
824
825 // Currently, for compatibility with pre-CLDR-42 data, we default to the "atTime"
826 // combining patterns. Depending on guidance in CLDR 42 spec and on DisplayOptions,
827 // we may change this.
828 LocalUResourceBundlePointer dateAtTimePatterns;
829 if (!cTypeIsGregorian) {
830 CharString resourcePath("calendar/", status);
831 resourcePath.append(cType, status).append("/DateTimePatterns%atTime", status);
832 dateAtTimePatterns.adoptInstead(
833 ures_getByKeyWithFallback(bundle.getAlias(), resourcePath.data(),
834 nullptr, &status));
835 }
836 if (cTypeIsGregorian || status == U_MISSING_RESOURCE_ERROR) {
837 status = U_ZERO_ERROR;
838 dateAtTimePatterns.adoptInstead(
839 ures_getByKeyWithFallback(bundle.getAlias(),
840 "calendar/gregorian/DateTimePatterns%atTime",
841 nullptr, &status));
842 }
843 if (U_SUCCESS(status) && ures_getSize(dateAtTimePatterns.getAlias()) >= 4) {
844 resStr = ures_getStringByIndex(dateAtTimePatterns.getAlias(), dateStyle - kDateOffset, &resStrLen, &status);
845 } else {
846 status = U_ZERO_ERROR;
847 int32_t glueIndex = kDateTime;
848 int32_t patternsSize = ures_getSize(dateTimePatterns.getAlias());
849 if (patternsSize >= (kDateTimeOffset + kShort + 1)) {
850 // Get proper date time format
851 glueIndex = (int32_t)(kDateTimeOffset + (dateStyle - kDateOffset));
852 }
853
854 resStr = ures_getStringByIndex(dateTimePatterns.getAlias(), glueIndex, &resStrLen, &status);
855 }
856 SimpleFormatter(UnicodeString(true, resStr, resStrLen), 2, 2, status).
857 format(tempus1, tempus2, fPattern, status);
858 }
859 // if the pattern includes just time data or just date date, load the appropriate
860 // pattern string from the resources
861 // setTo() - see DateFormatSymbols::assignArray comments
862 else if (timeStyle != kNone) {
863 fPattern.setTo(timePattern);
864 if (fPattern.length() == 0) {
865 currentBundle.adoptInstead(
866 ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, nullptr, &status));
867 if (U_FAILURE(status)) {
868 status = U_INVALID_FORMAT_ERROR;
869 return;
870 }
871 switch (ures_getType(currentBundle.getAlias())) {
872 case URES_STRING: {
873 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
874 break;
875 }
876 case URES_ARRAY: {
877 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
878 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
879 fDateOverride.setTo(true, ovrStr, ovrStrLen);
880 break;
881 }
882 default: {
883 status = U_INVALID_FORMAT_ERROR;
884 return;
885 }
886 }
887 fPattern.setTo(true, resStr, resStrLen);
888 }
889 }
890 else if (dateStyle != kNone) {
891 currentBundle.adoptInstead(
892 ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)dateStyle, nullptr, &status));
893 if (U_FAILURE(status)) {
894 status = U_INVALID_FORMAT_ERROR;
895 return;
896 }
897 switch (ures_getType(currentBundle.getAlias())) {
898 case URES_STRING: {
899 resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status);
900 break;
901 }
902 case URES_ARRAY: {
903 resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status);
904 ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status);
905 fDateOverride.setTo(true, ovrStr, ovrStrLen);
906 break;
907 }
908 default: {
909 status = U_INVALID_FORMAT_ERROR;
910 return;
911 }
912 }
913 fPattern.setTo(true, resStr, resStrLen);
914 }
915
916 // and if it includes _neither_, that's an error
917 else
918 status = U_INVALID_FORMAT_ERROR;
919
920 // finally, finish initializing by creating a Calendar and a NumberFormat
921 initialize(locale, status);
922 }
923
924 //----------------------------------------------------------------------
925
926 Calendar*
initializeCalendar(TimeZone * adoptZone,const Locale & locale,UErrorCode & status)927 SimpleDateFormat::initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status)
928 {
929 if(!U_FAILURE(status)) {
930 fCalendar = Calendar::createInstance(
931 adoptZone ? adoptZone : TimeZone::forLocaleOrDefault(locale), locale, status);
932 }
933 return fCalendar;
934 }
935
936 void
initialize(const Locale & locale,UErrorCode & status)937 SimpleDateFormat::initialize(const Locale& locale,
938 UErrorCode& status)
939 {
940 if (U_FAILURE(status)) return;
941
942 parsePattern(); // Need this before initNumberFormatters(), to set fHasHanYearChar
943
944 // Simple-minded hack to force Gannen year numbering for ja@calendar=japanese
945 // if format is non-numeric (includes 年) and fDateOverride is not already specified.
946 // Now this does get updated if applyPattern subsequently changes the pattern type.
947 if (fDateOverride.isBogus() && fHasHanYearChar &&
948 fCalendar != nullptr && uprv_strcmp(fCalendar->getType(),"japanese") == 0 &&
949 uprv_strcmp(fLocale.getLanguage(),"ja") == 0) {
950 fDateOverride.setTo(u"y=jpanyear", -1);
951 }
952
953 // We don't need to check that the row count is >= 1, since all 2d arrays have at
954 // least one row
955 fNumberFormat = NumberFormat::createInstance(locale, status);
956 if (fNumberFormat != nullptr && U_SUCCESS(status))
957 {
958 fixNumberFormatForDates(*fNumberFormat);
959 //fNumberFormat->setLenient(true); // Java uses a custom DateNumberFormat to format/parse
960
961 initNumberFormatters(locale, status);
962 initSimpleNumberFormatter(status);
963
964 }
965 else if (U_SUCCESS(status))
966 {
967 status = U_MISSING_RESOURCE_ERROR;
968 }
969 }
970
971 /* Initialize the fields we use to disambiguate ambiguous years. Separate
972 * so we can call it from readObject().
973 */
initializeDefaultCentury()974 void SimpleDateFormat::initializeDefaultCentury()
975 {
976 if(fCalendar) {
977 fHaveDefaultCentury = fCalendar->haveDefaultCentury();
978 if(fHaveDefaultCentury) {
979 fDefaultCenturyStart = fCalendar->defaultCenturyStart();
980 fDefaultCenturyStartYear = fCalendar->defaultCenturyStartYear();
981 } else {
982 fDefaultCenturyStart = DBL_MIN;
983 fDefaultCenturyStartYear = -1;
984 }
985 }
986 }
987
988 /*
989 * Initialize the boolean attributes. Separate so we can call it from all constructors.
990 */
initializeBooleanAttributes()991 void SimpleDateFormat::initializeBooleanAttributes()
992 {
993 UErrorCode status = U_ZERO_ERROR;
994
995 setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status);
996 setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status);
997 setBooleanAttribute(UDAT_PARSE_PARTIAL_LITERAL_MATCH, true, status);
998 setBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, true, status);
999 }
1000
1001 /* Define one-century window into which to disambiguate dates using
1002 * two-digit years. Make public in JDK 1.2.
1003 */
parseAmbiguousDatesAsAfter(UDate startDate,UErrorCode & status)1004 void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status)
1005 {
1006 if(U_FAILURE(status)) {
1007 return;
1008 }
1009 if(!fCalendar) {
1010 status = U_ILLEGAL_ARGUMENT_ERROR;
1011 return;
1012 }
1013
1014 fCalendar->setTime(startDate, status);
1015 if(U_SUCCESS(status)) {
1016 fHaveDefaultCentury = true;
1017 fDefaultCenturyStart = startDate;
1018 fDefaultCenturyStartYear = fCalendar->get(UCAL_YEAR, status);
1019 }
1020 }
1021
1022 //----------------------------------------------------------------------
1023
1024 UnicodeString&
format(Calendar & cal,UnicodeString & appendTo,FieldPosition & pos) const1025 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& pos) const
1026 {
1027 UErrorCode status = U_ZERO_ERROR;
1028 FieldPositionOnlyHandler handler(pos);
1029 return _format(cal, appendTo, handler, status);
1030 }
1031
1032 //----------------------------------------------------------------------
1033
1034 UnicodeString&
format(Calendar & cal,UnicodeString & appendTo,FieldPositionIterator * posIter,UErrorCode & status) const1035 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo,
1036 FieldPositionIterator* posIter, UErrorCode& status) const
1037 {
1038 FieldPositionIteratorHandler handler(posIter, status);
1039 return _format(cal, appendTo, handler, status);
1040 }
1041
1042 //----------------------------------------------------------------------
1043
1044 UnicodeString&
_format(Calendar & cal,UnicodeString & appendTo,FieldPositionHandler & handler,UErrorCode & status) const1045 SimpleDateFormat::_format(Calendar& cal, UnicodeString& appendTo,
1046 FieldPositionHandler& handler, UErrorCode& status) const
1047 {
1048 if ( U_FAILURE(status) ) {
1049 return appendTo;
1050 }
1051 Calendar* workCal = &cal;
1052 Calendar* calClone = nullptr;
1053 if (&cal != fCalendar && uprv_strcmp(cal.getType(), fCalendar->getType()) != 0) {
1054 // Different calendar type
1055 // We use the time and time zone from the input calendar, but
1056 // do not use the input calendar for field calculation.
1057 calClone = fCalendar->clone();
1058 if (calClone != nullptr) {
1059 UDate t = cal.getTime(status);
1060 calClone->setTime(t, status);
1061 calClone->setTimeZone(cal.getTimeZone());
1062 workCal = calClone;
1063 } else {
1064 status = U_MEMORY_ALLOCATION_ERROR;
1065 return appendTo;
1066 }
1067 }
1068
1069 UBool inQuote = false;
1070 char16_t prevCh = 0;
1071 int32_t count = 0;
1072 int32_t fieldNum = 0;
1073 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1074
1075 // loop through the pattern string character by character
1076 int32_t patternLength = fPattern.length();
1077 for (int32_t i = 0; i < patternLength && U_SUCCESS(status); ++i) {
1078 char16_t ch = fPattern[i];
1079
1080 // Use subFormat() to format a repeated pattern character
1081 // when a different pattern or non-pattern character is seen
1082 if (ch != prevCh && count > 0) {
1083 subFormat(appendTo, prevCh, count, capitalizationContext, fieldNum++,
1084 prevCh, handler, *workCal, status);
1085 count = 0;
1086 }
1087 if (ch == QUOTE) {
1088 // Consecutive single quotes are a single quote literal,
1089 // either outside of quotes or between quotes
1090 if ((i+1) < patternLength && fPattern[i+1] == QUOTE) {
1091 appendTo += (char16_t)QUOTE;
1092 ++i;
1093 } else {
1094 inQuote = ! inQuote;
1095 }
1096 }
1097 else if (!inQuote && isSyntaxChar(ch)) {
1098 // ch is a date-time pattern character to be interpreted
1099 // by subFormat(); count the number of times it is repeated
1100 prevCh = ch;
1101 ++count;
1102 }
1103 else {
1104 // Append quoted characters and unquoted non-pattern characters
1105 appendTo += ch;
1106 }
1107 }
1108
1109 // Format the last item in the pattern, if any
1110 if (count > 0) {
1111 subFormat(appendTo, prevCh, count, capitalizationContext, fieldNum++,
1112 prevCh, handler, *workCal, status);
1113 }
1114
1115 delete calClone;
1116
1117 return appendTo;
1118 }
1119
1120 //----------------------------------------------------------------------
1121
1122 /* Map calendar field into calendar field level.
1123 * the larger the level, the smaller the field unit.
1124 * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10,
1125 * UCAL_MONTH level is 20.
1126 * NOTE: if new fields adds in, the table needs to update.
1127 */
1128 const int32_t
1129 SimpleDateFormat::fgCalendarFieldToLevel[] =
1130 {
1131 /*GyM*/ 0, 10, 20,
1132 /*wW*/ 20, 30,
1133 /*dDEF*/ 30, 20, 30, 30,
1134 /*ahHm*/ 40, 50, 50, 60,
1135 /*sS*/ 70, 80,
1136 /*z?Y*/ 0, 0, 10,
1137 /*eug*/ 30, 10, 0,
1138 /*A?.*/ 40, 0, 0
1139 };
1140
getLevelFromChar(char16_t ch)1141 int32_t SimpleDateFormat::getLevelFromChar(char16_t ch) {
1142 // Map date field LETTER into calendar field level.
1143 // the larger the level, the smaller the field unit.
1144 // NOTE: if new fields adds in, the table needs to update.
1145 static const int32_t mapCharToLevel[] = {
1146 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1147 //
1148 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1149 // ! " # $ % & ' ( ) * + , - . /
1150 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1151 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1152 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
1153 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1,
1154 #else
1155 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
1156 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1157 #endif
1158 // @ A B C D E F G H I J K L M N O
1159 -1, 40, -1, -1, 20, 30, 30, 0, 50, -1, -1, 50, 20, 20, -1, 0,
1160 // P Q R S T U V W X Y Z [ \ ] ^ _
1161 -1, 20, -1, 80, -1, 10, 0, 30, 0, 10, 0, -1, -1, -1, -1, -1,
1162 // ` a b c d e f g h i j k l m n o
1163 -1, 40, -1, 30, 30, 30, -1, 0, 50, -1, -1, 50, 0, 60, -1, -1,
1164 // p q r s t u v w x y z { | } ~
1165 -1, 20, 10, 70, -1, 10, 0, 20, 0, 10, 0, -1, -1, -1, -1, -1
1166 };
1167
1168 return ch < UPRV_LENGTHOF(mapCharToLevel) ? mapCharToLevel[ch] : -1;
1169 }
1170
isSyntaxChar(char16_t ch)1171 UBool SimpleDateFormat::isSyntaxChar(char16_t ch) {
1172 static const UBool mapCharToIsSyntax[] = {
1173 //
1174 false, false, false, false, false, false, false, false,
1175 //
1176 false, false, false, false, false, false, false, false,
1177 //
1178 false, false, false, false, false, false, false, false,
1179 //
1180 false, false, false, false, false, false, false, false,
1181 // ! " # $ % & '
1182 false, false, false, false, false, false, false, false,
1183 // ( ) * + , - . /
1184 false, false, false, false, false, false, false, false,
1185 // 0 1 2 3 4 5 6 7
1186 false, false, false, false, false, false, false, false,
1187 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1188 // 8 9 : ; < = > ?
1189 false, false, true, false, false, false, false, false,
1190 #else
1191 // 8 9 : ; < = > ?
1192 false, false, false, false, false, false, false, false,
1193 #endif
1194 // @ A B C D E F G
1195 false, true, true, true, true, true, true, true,
1196 // H I J K L M N O
1197 true, true, true, true, true, true, true, true,
1198 // P Q R S T U V W
1199 true, true, true, true, true, true, true, true,
1200 // X Y Z [ \ ] ^ _
1201 true, true, true, false, false, false, false, false,
1202 // ` a b c d e f g
1203 false, true, true, true, true, true, true, true,
1204 // h i j k l m n o
1205 true, true, true, true, true, true, true, true,
1206 // p q r s t u v w
1207 true, true, true, true, true, true, true, true,
1208 // x y z { | } ~
1209 true, true, true, false, false, false, false, false
1210 };
1211
1212 return ch < UPRV_LENGTHOF(mapCharToIsSyntax) ? mapCharToIsSyntax[ch] : false;
1213 }
1214
1215 // Map index into pattern character string to Calendar field number.
1216 const UCalendarDateFields
1217 SimpleDateFormat::fgPatternIndexToCalendarField[] =
1218 {
1219 /*GyM*/ UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
1220 /*dkH*/ UCAL_DATE, UCAL_HOUR_OF_DAY, UCAL_HOUR_OF_DAY,
1221 /*msS*/ UCAL_MINUTE, UCAL_SECOND, UCAL_MILLISECOND,
1222 /*EDF*/ UCAL_DAY_OF_WEEK, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK_IN_MONTH,
1223 /*wWa*/ UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_AM_PM,
1224 /*hKz*/ UCAL_HOUR, UCAL_HOUR, UCAL_ZONE_OFFSET,
1225 /*Yeu*/ UCAL_YEAR_WOY, UCAL_DOW_LOCAL, UCAL_EXTENDED_YEAR,
1226 /*gAZ*/ UCAL_JULIAN_DAY, UCAL_MILLISECONDS_IN_DAY, UCAL_ZONE_OFFSET,
1227 /*v*/ UCAL_ZONE_OFFSET,
1228 /*c*/ UCAL_DOW_LOCAL,
1229 /*L*/ UCAL_MONTH,
1230 /*Q*/ UCAL_MONTH,
1231 /*q*/ UCAL_MONTH,
1232 /*V*/ UCAL_ZONE_OFFSET,
1233 /*U*/ UCAL_YEAR,
1234 /*O*/ UCAL_ZONE_OFFSET,
1235 /*Xx*/ UCAL_ZONE_OFFSET, UCAL_ZONE_OFFSET,
1236 /*r*/ UCAL_EXTENDED_YEAR,
1237 /*bB*/ UCAL_FIELD_COUNT, UCAL_FIELD_COUNT, // no mappings to calendar fields
1238 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1239 /*:*/ UCAL_FIELD_COUNT, /* => no useful mapping to any calendar field */
1240 #else
1241 /*no pattern char for UDAT_TIME_SEPARATOR_FIELD*/ UCAL_FIELD_COUNT, /* => no useful mapping to any calendar field */
1242 #endif
1243 };
1244
1245 // Map index into pattern character string to DateFormat field number
1246 const UDateFormatField
1247 SimpleDateFormat::fgPatternIndexToDateFormatField[] = {
1248 /*GyM*/ UDAT_ERA_FIELD, UDAT_YEAR_FIELD, UDAT_MONTH_FIELD,
1249 /*dkH*/ UDAT_DATE_FIELD, UDAT_HOUR_OF_DAY1_FIELD, UDAT_HOUR_OF_DAY0_FIELD,
1250 /*msS*/ UDAT_MINUTE_FIELD, UDAT_SECOND_FIELD, UDAT_FRACTIONAL_SECOND_FIELD,
1251 /*EDF*/ UDAT_DAY_OF_WEEK_FIELD, UDAT_DAY_OF_YEAR_FIELD, UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
1252 /*wWa*/ UDAT_WEEK_OF_YEAR_FIELD, UDAT_WEEK_OF_MONTH_FIELD, UDAT_AM_PM_FIELD,
1253 /*hKz*/ UDAT_HOUR1_FIELD, UDAT_HOUR0_FIELD, UDAT_TIMEZONE_FIELD,
1254 /*Yeu*/ UDAT_YEAR_WOY_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_EXTENDED_YEAR_FIELD,
1255 /*gAZ*/ UDAT_JULIAN_DAY_FIELD, UDAT_MILLISECONDS_IN_DAY_FIELD, UDAT_TIMEZONE_RFC_FIELD,
1256 /*v*/ UDAT_TIMEZONE_GENERIC_FIELD,
1257 /*c*/ UDAT_STANDALONE_DAY_FIELD,
1258 /*L*/ UDAT_STANDALONE_MONTH_FIELD,
1259 /*Q*/ UDAT_QUARTER_FIELD,
1260 /*q*/ UDAT_STANDALONE_QUARTER_FIELD,
1261 /*V*/ UDAT_TIMEZONE_SPECIAL_FIELD,
1262 /*U*/ UDAT_YEAR_NAME_FIELD,
1263 /*O*/ UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD,
1264 /*Xx*/ UDAT_TIMEZONE_ISO_FIELD, UDAT_TIMEZONE_ISO_LOCAL_FIELD,
1265 /*r*/ UDAT_RELATED_YEAR_FIELD,
1266 /*bB*/ UDAT_AM_PM_MIDNIGHT_NOON_FIELD, UDAT_FLEXIBLE_DAY_PERIOD_FIELD,
1267 #if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR
1268 /*:*/ UDAT_TIME_SEPARATOR_FIELD,
1269 #else
1270 /*no pattern char for UDAT_TIME_SEPARATOR_FIELD*/ UDAT_TIME_SEPARATOR_FIELD,
1271 #endif
1272 };
1273
1274 //----------------------------------------------------------------------
1275
1276 /**
1277 * Append symbols[value] to dst. Make sure the array index is not out
1278 * of bounds.
1279 */
1280 static inline void
_appendSymbol(UnicodeString & dst,int32_t value,const UnicodeString * symbols,int32_t symbolsCount)1281 _appendSymbol(UnicodeString& dst,
1282 int32_t value,
1283 const UnicodeString* symbols,
1284 int32_t symbolsCount) {
1285 U_ASSERT(0 <= value && value < symbolsCount);
1286 if (0 <= value && value < symbolsCount) {
1287 dst += symbols[value];
1288 }
1289 }
1290
1291 static inline void
_appendSymbolWithMonthPattern(UnicodeString & dst,int32_t value,const UnicodeString * symbols,int32_t symbolsCount,const UnicodeString * monthPattern,UErrorCode & status)1292 _appendSymbolWithMonthPattern(UnicodeString& dst, int32_t value, const UnicodeString* symbols, int32_t symbolsCount,
1293 const UnicodeString* monthPattern, UErrorCode& status) {
1294 U_ASSERT(0 <= value && value < symbolsCount);
1295 if (0 <= value && value < symbolsCount) {
1296 if (monthPattern == nullptr) {
1297 dst += symbols[value];
1298 } else {
1299 SimpleFormatter(*monthPattern, 1, 1, status).format(symbols[value], dst, status);
1300 }
1301 }
1302 }
1303
1304 //----------------------------------------------------------------------
1305
1306 void
initSimpleNumberFormatter(UErrorCode & status)1307 SimpleDateFormat::initSimpleNumberFormatter(UErrorCode &status) {
1308 if (U_FAILURE(status)) {
1309 return;
1310 }
1311 const auto* df = dynamic_cast<const DecimalFormat*>(fNumberFormat);
1312 if (df == nullptr) {
1313 return;
1314 }
1315 const DecimalFormatSymbols* syms = df->getDecimalFormatSymbols();
1316 if (syms == nullptr) {
1317 return;
1318 }
1319 fSimpleNumberFormatter = new number::SimpleNumberFormatter(
1320 number::SimpleNumberFormatter::forLocaleAndSymbolsAndGroupingStrategy(
1321 fLocale, *syms, UNUM_GROUPING_OFF, status
1322 )
1323 );
1324 if (fSimpleNumberFormatter == nullptr) {
1325 status = U_MEMORY_ALLOCATION_ERROR;
1326 }
1327 }
1328
1329 void
initNumberFormatters(const Locale & locale,UErrorCode & status)1330 SimpleDateFormat::initNumberFormatters(const Locale &locale,UErrorCode &status) {
1331 if (U_FAILURE(status)) {
1332 return;
1333 }
1334 if ( fDateOverride.isBogus() && fTimeOverride.isBogus() ) {
1335 return;
1336 }
1337 umtx_lock(&LOCK);
1338 if (fSharedNumberFormatters == nullptr) {
1339 fSharedNumberFormatters = allocSharedNumberFormatters();
1340 if (fSharedNumberFormatters == nullptr) {
1341 status = U_MEMORY_ALLOCATION_ERROR;
1342 }
1343 }
1344 umtx_unlock(&LOCK);
1345
1346 if (U_FAILURE(status)) {
1347 return;
1348 }
1349
1350 processOverrideString(locale,fDateOverride,kOvrStrDate,status);
1351 processOverrideString(locale,fTimeOverride,kOvrStrTime,status);
1352 }
1353
1354 void
processOverrideString(const Locale & locale,const UnicodeString & str,int8_t type,UErrorCode & status)1355 SimpleDateFormat::processOverrideString(const Locale &locale, const UnicodeString &str, int8_t type, UErrorCode &status) {
1356 if (str.isBogus() || U_FAILURE(status)) {
1357 return;
1358 }
1359
1360 int32_t start = 0;
1361 int32_t len;
1362 UnicodeString nsName;
1363 UnicodeString ovrField;
1364 UBool moreToProcess = true;
1365 NSOverride *overrideList = nullptr;
1366
1367 while (moreToProcess) {
1368 int32_t delimiterPosition = str.indexOf((char16_t)ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE,start);
1369 if (delimiterPosition == -1) {
1370 moreToProcess = false;
1371 len = str.length() - start;
1372 } else {
1373 len = delimiterPosition - start;
1374 }
1375 UnicodeString currentString(str,start,len);
1376 int32_t equalSignPosition = currentString.indexOf((char16_t)ULOC_KEYWORD_ASSIGN_UNICODE,0);
1377 if (equalSignPosition == -1) { // Simple override string such as "hebrew"
1378 nsName.setTo(currentString);
1379 ovrField.setToBogus();
1380 } else { // Field specific override string such as "y=hebrew"
1381 nsName.setTo(currentString,equalSignPosition+1);
1382 ovrField.setTo(currentString,0,1); // We just need the first character.
1383 }
1384
1385 int32_t nsNameHash = nsName.hashCode();
1386 // See if the numbering system is in the override list, if not, then add it.
1387 NSOverride *curr = overrideList;
1388 const SharedNumberFormat *snf = nullptr;
1389 UBool found = false;
1390 while ( curr && !found ) {
1391 if ( curr->hash == nsNameHash ) {
1392 snf = curr->snf;
1393 found = true;
1394 }
1395 curr = curr->next;
1396 }
1397
1398 if (!found) {
1399 LocalPointer<NSOverride> cur(new NSOverride);
1400 if (!cur.isNull()) {
1401 char kw[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1402 uprv_strcpy(kw,"numbers=");
1403 nsName.extract(0,len,kw+8,ULOC_KEYWORD_AND_VALUES_CAPACITY-8,US_INV);
1404
1405 Locale ovrLoc(locale.getLanguage(),locale.getCountry(),locale.getVariant(),kw);
1406 cur->hash = nsNameHash;
1407 cur->next = overrideList;
1408 SharedObject::copyPtr(
1409 createSharedNumberFormat(ovrLoc, status), cur->snf);
1410 if (U_FAILURE(status)) {
1411 if (overrideList) {
1412 overrideList->free();
1413 }
1414 return;
1415 }
1416 snf = cur->snf;
1417 overrideList = cur.orphan();
1418 } else {
1419 status = U_MEMORY_ALLOCATION_ERROR;
1420 if (overrideList) {
1421 overrideList->free();
1422 }
1423 return;
1424 }
1425 }
1426
1427 // Now that we have an appropriate number formatter, fill in the appropriate spaces in the
1428 // number formatters table.
1429 if (ovrField.isBogus()) {
1430 switch (type) {
1431 case kOvrStrDate:
1432 case kOvrStrBoth: {
1433 for ( int8_t i=0 ; i<kDateFieldsCount; i++ ) {
1434 SharedObject::copyPtr(snf, fSharedNumberFormatters[kDateFields[i]]);
1435 }
1436 if (type==kOvrStrDate) {
1437 break;
1438 }
1439 U_FALLTHROUGH;
1440 }
1441 case kOvrStrTime : {
1442 for ( int8_t i=0 ; i<kTimeFieldsCount; i++ ) {
1443 SharedObject::copyPtr(snf, fSharedNumberFormatters[kTimeFields[i]]);
1444 }
1445 break;
1446 }
1447 }
1448 } else {
1449 // if the pattern character is unrecognized, signal an error and bail out
1450 UDateFormatField patternCharIndex =
1451 DateFormatSymbols::getPatternCharIndex(ovrField.charAt(0));
1452 if (patternCharIndex == UDAT_FIELD_COUNT) {
1453 status = U_INVALID_FORMAT_ERROR;
1454 if (overrideList) {
1455 overrideList->free();
1456 }
1457 return;
1458 }
1459 SharedObject::copyPtr(snf, fSharedNumberFormatters[patternCharIndex]);
1460 }
1461
1462 start = delimiterPosition + 1;
1463 }
1464 if (overrideList) {
1465 overrideList->free();
1466 }
1467 }
1468
1469 //---------------------------------------------------------------------
1470 void
subFormat(UnicodeString & appendTo,char16_t ch,int32_t count,UDisplayContext capitalizationContext,int32_t fieldNum,char16_t fieldToOutput,FieldPositionHandler & handler,Calendar & cal,UErrorCode & status) const1471 SimpleDateFormat::subFormat(UnicodeString &appendTo,
1472 char16_t ch,
1473 int32_t count,
1474 UDisplayContext capitalizationContext,
1475 int32_t fieldNum,
1476 char16_t fieldToOutput,
1477 FieldPositionHandler& handler,
1478 Calendar& cal,
1479 UErrorCode& status) const
1480 {
1481 static const int32_t maxIntCount = 10;
1482 static const UnicodeString hebr(u"hebr");
1483
1484 if (U_FAILURE(status)) {
1485 return;
1486 }
1487
1488 // this function gets called by format() to produce the appropriate substitution
1489 // text for an individual pattern symbol (e.g., "HH" or "yyyy")
1490
1491 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch);
1492 int32_t beginOffset = appendTo.length();
1493 DateFormatSymbols::ECapitalizationContextUsageType capContextUsageType = DateFormatSymbols::kCapContextUsageOther;
1494
1495 // if the pattern character is unrecognized, signal an error and dump out
1496 if (patternCharIndex == UDAT_FIELD_COUNT)
1497 {
1498 if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored
1499 status = U_INVALID_FORMAT_ERROR;
1500 }
1501 return;
1502 }
1503
1504 UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
1505 int32_t value = 0;
1506 // Don't get value unless it is useful
1507 if (field < UCAL_FIELD_COUNT) {
1508 value = (patternCharIndex != UDAT_RELATED_YEAR_FIELD)? cal.get(field, status): cal.getRelatedYear(status);
1509 if (U_FAILURE(status)) {
1510 return;
1511 }
1512 }
1513
1514 const NumberFormat *currentNumberFormat = getNumberFormatByIndex(patternCharIndex);
1515 if (currentNumberFormat == nullptr) {
1516 status = U_INTERNAL_PROGRAM_ERROR;
1517 return;
1518 }
1519
1520 switch (patternCharIndex) {
1521
1522 // for any "G" symbol, write out the appropriate era string
1523 // "GGGG" is wide era name, "GGGGG" is narrow era name, anything else is abbreviated name
1524 case UDAT_ERA_FIELD:
1525 {
1526 const auto* calType = cal.getType();
1527 if (uprv_strcmp(calType,"chinese") == 0 || uprv_strcmp(calType,"dangi") == 0) {
1528 zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, 9); // as in ICU4J
1529 } else {
1530 if (count == 5) {
1531 _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount);
1532 capContextUsageType = DateFormatSymbols::kCapContextUsageEraNarrow;
1533 } else if (count == 4) {
1534 _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount);
1535 capContextUsageType = DateFormatSymbols::kCapContextUsageEraWide;
1536 } else {
1537 _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount);
1538 capContextUsageType = DateFormatSymbols::kCapContextUsageEraAbbrev;
1539 }
1540 }
1541 }
1542 break;
1543
1544 case UDAT_YEAR_NAME_FIELD:
1545 if (fSymbols->fShortYearNames != nullptr && value <= fSymbols->fShortYearNamesCount) {
1546 // the Calendar YEAR field runs 1 through 60 for cyclic years
1547 _appendSymbol(appendTo, value - 1, fSymbols->fShortYearNames, fSymbols->fShortYearNamesCount);
1548 break;
1549 }
1550 // else fall through to numeric year handling, do not break here
1551 U_FALLTHROUGH;
1552
1553 // OLD: for "yyyy", write out the whole year; for "yy", write out the last 2 digits
1554 // NEW: UTS#35:
1555 //Year y yy yyy yyyy yyyyy
1556 //AD 1 1 01 001 0001 00001
1557 //AD 12 12 12 012 0012 00012
1558 //AD 123 123 23 123 0123 00123
1559 //AD 1234 1234 34 1234 1234 01234
1560 //AD 12345 12345 45 12345 12345 12345
1561 case UDAT_YEAR_FIELD:
1562 case UDAT_YEAR_WOY_FIELD:
1563 if (fDateOverride.compare(hebr)==0 && value>HEBREW_CAL_CUR_MILLENIUM_START_YEAR && value<HEBREW_CAL_CUR_MILLENIUM_END_YEAR) {
1564 value-=HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
1565 }
1566 if(count == 2)
1567 zeroPaddingNumber(currentNumberFormat, appendTo, value, 2, 2);
1568 else
1569 zeroPaddingNumber(currentNumberFormat, appendTo, value, count, maxIntCount);
1570 break;
1571
1572 // for "MMMM"/"LLLL", write out the whole month name, for "MMM"/"LLL", write out the month
1573 // abbreviation, for "M"/"L" or "MM"/"LL", write out the month as a number with the
1574 // appropriate number of digits
1575 // for "MMMMM"/"LLLLL", use the narrow form
1576 case UDAT_MONTH_FIELD:
1577 case UDAT_STANDALONE_MONTH_FIELD:
1578 if (uprv_strcmp(cal.getType(),"hebrew") == 0) {
1579 HebrewCalendar *hc = (HebrewCalendar*)&cal;
1580 if (hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value == 6 && count >= 3 )
1581 value = 13; // Show alternate form for Adar II in leap years in Hebrew calendar.
1582 if (!hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6 && count < 3 )
1583 value--; // Adjust the month number down 1 in Hebrew non-leap years, i.e. Adar is 6, not 7.
1584 }
1585 {
1586 int32_t isLeapMonth = (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount)?
1587 cal.get(UCAL_IS_LEAP_MONTH, status): 0;
1588 // should consolidate the next section by using arrays of pointers & counts for the right symbols...
1589 if (count == 5) {
1590 if (patternCharIndex == UDAT_MONTH_FIELD) {
1591 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fNarrowMonths, fSymbols->fNarrowMonthsCount,
1592 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatNarrow]): nullptr, status);
1593 } else {
1594 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneNarrowMonths, fSymbols->fStandaloneNarrowMonthsCount,
1595 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneNarrow]): nullptr, status);
1596 }
1597 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthNarrow;
1598 } else if (count == 4) {
1599 if (patternCharIndex == UDAT_MONTH_FIELD) {
1600 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fMonths, fSymbols->fMonthsCount,
1601 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide]): nullptr, status);
1602 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthFormat;
1603 } else {
1604 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount,
1605 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide]): nullptr, status);
1606 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthStandalone;
1607 }
1608 } else if (count == 3) {
1609 if (patternCharIndex == UDAT_MONTH_FIELD) {
1610 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fShortMonths, fSymbols->fShortMonthsCount,
1611 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev]): nullptr, status);
1612 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthFormat;
1613 } else {
1614 _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount,
1615 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev]): nullptr, status);
1616 capContextUsageType = DateFormatSymbols::kCapContextUsageMonthStandalone;
1617 }
1618 } else {
1619 UnicodeString monthNumber;
1620 zeroPaddingNumber(currentNumberFormat,monthNumber, value + 1, count, maxIntCount);
1621 _appendSymbolWithMonthPattern(appendTo, 0, &monthNumber, 1,
1622 (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric]): nullptr, status);
1623 }
1624 }
1625 break;
1626
1627 // for "k" and "kk", write out the hour, adjusting midnight to appear as "24"
1628 case UDAT_HOUR_OF_DAY1_FIELD:
1629 if (value == 0)
1630 zeroPaddingNumber(currentNumberFormat,appendTo, cal.getMaximum(UCAL_HOUR_OF_DAY) + 1, count, maxIntCount);
1631 else
1632 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1633 break;
1634
1635 case UDAT_FRACTIONAL_SECOND_FIELD:
1636 // Fractional seconds left-justify
1637 {
1638 int32_t minDigits = (count > 3) ? 3 : count;
1639 if (count == 1) {
1640 value /= 100;
1641 } else if (count == 2) {
1642 value /= 10;
1643 }
1644 zeroPaddingNumber(currentNumberFormat, appendTo, value, minDigits, maxIntCount);
1645 if (count > 3) {
1646 zeroPaddingNumber(currentNumberFormat, appendTo, 0, count - 3, maxIntCount);
1647 }
1648 }
1649 break;
1650
1651 // for "ee" or "e", use local numeric day-of-the-week
1652 // for "EEEEEE" or "eeeeee", write out the short day-of-the-week name
1653 // for "EEEEE" or "eeeee", write out the narrow day-of-the-week name
1654 // for "EEEE" or "eeee", write out the wide day-of-the-week name
1655 // for "EEE" or "EE" or "E" or "eee", write out the abbreviated day-of-the-week name
1656 case UDAT_DOW_LOCAL_FIELD:
1657 if ( count < 3 ) {
1658 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1659 break;
1660 }
1661 // fall through to EEEEE-EEE handling, but for that we don't want local day-of-week,
1662 // we want standard day-of-week, so first fix value to work for EEEEE-EEE.
1663 value = cal.get(UCAL_DAY_OF_WEEK, status);
1664 if (U_FAILURE(status)) {
1665 return;
1666 }
1667 // fall through, do not break here
1668 U_FALLTHROUGH;
1669 case UDAT_DAY_OF_WEEK_FIELD:
1670 if (count == 5) {
1671 _appendSymbol(appendTo, value, fSymbols->fNarrowWeekdays,
1672 fSymbols->fNarrowWeekdaysCount);
1673 capContextUsageType = DateFormatSymbols::kCapContextUsageDayNarrow;
1674 } else if (count == 4) {
1675 _appendSymbol(appendTo, value, fSymbols->fWeekdays,
1676 fSymbols->fWeekdaysCount);
1677 capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1678 } else if (count == 6) {
1679 _appendSymbol(appendTo, value, fSymbols->fShorterWeekdays,
1680 fSymbols->fShorterWeekdaysCount);
1681 capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1682 } else {
1683 _appendSymbol(appendTo, value, fSymbols->fShortWeekdays,
1684 fSymbols->fShortWeekdaysCount);
1685 capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat;
1686 }
1687 break;
1688
1689 // for "ccc", write out the abbreviated day-of-the-week name
1690 // for "cccc", write out the wide day-of-the-week name
1691 // for "ccccc", use the narrow day-of-the-week name
1692 // for "ccccc", use the short day-of-the-week name
1693 case UDAT_STANDALONE_DAY_FIELD:
1694 if ( count < 3 ) {
1695 zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, maxIntCount);
1696 break;
1697 }
1698 // fall through to alpha DOW handling, but for that we don't want local day-of-week,
1699 // we want standard day-of-week, so first fix value.
1700 value = cal.get(UCAL_DAY_OF_WEEK, status);
1701 if (U_FAILURE(status)) {
1702 return;
1703 }
1704 if (count == 5) {
1705 _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowWeekdays,
1706 fSymbols->fStandaloneNarrowWeekdaysCount);
1707 capContextUsageType = DateFormatSymbols::kCapContextUsageDayNarrow;
1708 } else if (count == 4) {
1709 _appendSymbol(appendTo, value, fSymbols->fStandaloneWeekdays,
1710 fSymbols->fStandaloneWeekdaysCount);
1711 capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1712 } else if (count == 6) {
1713 _appendSymbol(appendTo, value, fSymbols->fStandaloneShorterWeekdays,
1714 fSymbols->fStandaloneShorterWeekdaysCount);
1715 capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1716 } else { // count == 3
1717 _appendSymbol(appendTo, value, fSymbols->fStandaloneShortWeekdays,
1718 fSymbols->fStandaloneShortWeekdaysCount);
1719 capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone;
1720 }
1721 break;
1722
1723 // for "a" symbol, write out the whole AM/PM string
1724 case UDAT_AM_PM_FIELD:
1725 if (count < 5) {
1726 _appendSymbol(appendTo, value, fSymbols->fAmPms,
1727 fSymbols->fAmPmsCount);
1728 } else {
1729 _appendSymbol(appendTo, value, fSymbols->fNarrowAmPms,
1730 fSymbols->fNarrowAmPmsCount);
1731 }
1732 break;
1733
1734 // if we see pattern character for UDAT_TIME_SEPARATOR_FIELD (none currently defined),
1735 // write out the time separator string. Leave support in for future definition.
1736 case UDAT_TIME_SEPARATOR_FIELD:
1737 {
1738 UnicodeString separator;
1739 appendTo += fSymbols->getTimeSeparatorString(separator);
1740 }
1741 break;
1742
1743 // for "h" and "hh", write out the hour, adjusting noon and midnight to show up
1744 // as "12"
1745 case UDAT_HOUR1_FIELD:
1746 if (value == 0)
1747 zeroPaddingNumber(currentNumberFormat,appendTo, cal.getLeastMaximum(UCAL_HOUR) + 1, count, maxIntCount);
1748 else
1749 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1750 break;
1751
1752 case UDAT_TIMEZONE_FIELD: // 'z'
1753 case UDAT_TIMEZONE_RFC_FIELD: // 'Z'
1754 case UDAT_TIMEZONE_GENERIC_FIELD: // 'v'
1755 case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V'
1756 case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: // 'O'
1757 case UDAT_TIMEZONE_ISO_FIELD: // 'X'
1758 case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x'
1759 {
1760 char16_t zsbuf[ZONE_NAME_U16_MAX];
1761 UnicodeString zoneString(zsbuf, 0, UPRV_LENGTHOF(zsbuf));
1762 const TimeZone& tz = cal.getTimeZone();
1763 UDate date = cal.getTime(status);
1764 const TimeZoneFormat *tzfmt = tzFormat(status);
1765 if (U_SUCCESS(status)) {
1766 switch (patternCharIndex) {
1767 case UDAT_TIMEZONE_FIELD:
1768 if (count < 4) {
1769 // "z", "zz", "zzz"
1770 tzfmt->format(UTZFMT_STYLE_SPECIFIC_SHORT, tz, date, zoneString);
1771 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneShort;
1772 } else {
1773 // "zzzz" or longer
1774 tzfmt->format(UTZFMT_STYLE_SPECIFIC_LONG, tz, date, zoneString);
1775 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong;
1776 }
1777 break;
1778 case UDAT_TIMEZONE_RFC_FIELD:
1779 if (count < 4) {
1780 // "Z"
1781 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString);
1782 } else if (count == 5) {
1783 // "ZZZZZ"
1784 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString);
1785 } else {
1786 // "ZZ", "ZZZ", "ZZZZ"
1787 tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString);
1788 }
1789 break;
1790 case UDAT_TIMEZONE_GENERIC_FIELD:
1791 if (count == 1) {
1792 // "v"
1793 tzfmt->format(UTZFMT_STYLE_GENERIC_SHORT, tz, date, zoneString);
1794 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneShort;
1795 } else if (count == 4) {
1796 // "vvvv"
1797 tzfmt->format(UTZFMT_STYLE_GENERIC_LONG, tz, date, zoneString);
1798 capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong;
1799 }
1800 break;
1801 case UDAT_TIMEZONE_SPECIAL_FIELD:
1802 if (count == 1) {
1803 // "V"
1804 tzfmt->format(UTZFMT_STYLE_ZONE_ID_SHORT, tz, date, zoneString);
1805 } else if (count == 2) {
1806 // "VV"
1807 tzfmt->format(UTZFMT_STYLE_ZONE_ID, tz, date, zoneString);
1808 } else if (count == 3) {
1809 // "VVV"
1810 tzfmt->format(UTZFMT_STYLE_EXEMPLAR_LOCATION, tz, date, zoneString);
1811 } else if (count == 4) {
1812 // "VVVV"
1813 tzfmt->format(UTZFMT_STYLE_GENERIC_LOCATION, tz, date, zoneString);
1814 capContextUsageType = DateFormatSymbols::kCapContextUsageZoneLong;
1815 }
1816 break;
1817 case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD:
1818 if (count == 1) {
1819 // "O"
1820 tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT_SHORT, tz, date, zoneString);
1821 } else if (count == 4) {
1822 // "OOOO"
1823 tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString);
1824 }
1825 break;
1826 case UDAT_TIMEZONE_ISO_FIELD:
1827 if (count == 1) {
1828 // "X"
1829 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_SHORT, tz, date, zoneString);
1830 } else if (count == 2) {
1831 // "XX"
1832 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_FIXED, tz, date, zoneString);
1833 } else if (count == 3) {
1834 // "XXX"
1835 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FIXED, tz, date, zoneString);
1836 } else if (count == 4) {
1837 // "XXXX"
1838 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_FULL, tz, date, zoneString);
1839 } else if (count == 5) {
1840 // "XXXXX"
1841 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString);
1842 }
1843 break;
1844 case UDAT_TIMEZONE_ISO_LOCAL_FIELD:
1845 if (count == 1) {
1846 // "x"
1847 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT, tz, date, zoneString);
1848 } else if (count == 2) {
1849 // "xx"
1850 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED, tz, date, zoneString);
1851 } else if (count == 3) {
1852 // "xxx"
1853 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED, tz, date, zoneString);
1854 } else if (count == 4) {
1855 // "xxxx"
1856 tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString);
1857 } else if (count == 5) {
1858 // "xxxxx"
1859 tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL, tz, date, zoneString);
1860 }
1861 break;
1862 default:
1863 UPRV_UNREACHABLE_EXIT;
1864 }
1865 }
1866 appendTo += zoneString;
1867 }
1868 break;
1869
1870 case UDAT_QUARTER_FIELD:
1871 if (count >= 5)
1872 _appendSymbol(appendTo, value/3, fSymbols->fNarrowQuarters,
1873 fSymbols->fNarrowQuartersCount);
1874 else if (count == 4)
1875 _appendSymbol(appendTo, value/3, fSymbols->fQuarters,
1876 fSymbols->fQuartersCount);
1877 else if (count == 3)
1878 _appendSymbol(appendTo, value/3, fSymbols->fShortQuarters,
1879 fSymbols->fShortQuartersCount);
1880 else
1881 zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount);
1882 break;
1883
1884 case UDAT_STANDALONE_QUARTER_FIELD:
1885 if (count >= 5)
1886 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneNarrowQuarters,
1887 fSymbols->fStandaloneNarrowQuartersCount);
1888 else if (count == 4)
1889 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneQuarters,
1890 fSymbols->fStandaloneQuartersCount);
1891 else if (count == 3)
1892 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneShortQuarters,
1893 fSymbols->fStandaloneShortQuartersCount);
1894 else
1895 zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount);
1896 break;
1897
1898 case UDAT_AM_PM_MIDNIGHT_NOON_FIELD:
1899 {
1900 const UnicodeString *toAppend = nullptr;
1901 int32_t hour = cal.get(UCAL_HOUR_OF_DAY, status);
1902
1903 // Note: "midnight" can be ambiguous as to whether it refers to beginning of day or end of day.
1904 // For ICU 57 output of "midnight" is temporarily suppressed.
1905
1906 // For "midnight" and "noon":
1907 // Time, as displayed, must be exactly noon or midnight.
1908 // This means minutes and seconds, if present, must be zero.
1909 if ((/*hour == 0 ||*/ hour == 12) &&
1910 (!fHasMinute || cal.get(UCAL_MINUTE, status) == 0) &&
1911 (!fHasSecond || cal.get(UCAL_SECOND, status) == 0)) {
1912 // Stealing am/pm value to use as our array index.
1913 // It works out: am/midnight are both 0, pm/noon are both 1,
1914 // 12 am is 12 midnight, and 12 pm is 12 noon.
1915 int32_t val = cal.get(UCAL_AM_PM, status);
1916
1917 if (count <= 3) {
1918 toAppend = &fSymbols->fAbbreviatedDayPeriods[val];
1919 } else if (count == 4 || count > 5) {
1920 toAppend = &fSymbols->fWideDayPeriods[val];
1921 } else { // count == 5
1922 toAppend = &fSymbols->fNarrowDayPeriods[val];
1923 }
1924 }
1925
1926 // toAppend is nullptr if time isn't exactly midnight or noon (as displayed).
1927 // toAppend is bogus if time is midnight or noon, but no localized string exists.
1928 // In either case, fall back to am/pm.
1929 if (toAppend == nullptr || toAppend->isBogus()) {
1930 // Reformat with identical arguments except ch, now changed to 'a'.
1931 // We are passing a different fieldToOutput because we want to add
1932 // 'b' to field position. This makes this fallback stable when
1933 // there is a data change on locales.
1934 subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'b', handler, cal, status);
1935 return;
1936 } else {
1937 appendTo += *toAppend;
1938 }
1939
1940 break;
1941 }
1942
1943 case UDAT_FLEXIBLE_DAY_PERIOD_FIELD:
1944 {
1945 // TODO: Maybe fetch the DayperiodRules during initialization (instead of at the first
1946 // loading of an instance) if a relevant pattern character (b or B) is used.
1947 const DayPeriodRules *ruleSet = DayPeriodRules::getInstance(this->getSmpFmtLocale(), status);
1948 if (U_FAILURE(status)) {
1949 // Data doesn't conform to spec, therefore loading failed.
1950 break;
1951 }
1952 if (ruleSet == nullptr) {
1953 // Data doesn't exist for the locale we're looking for.
1954 // Falling back to am/pm.
1955 // We are passing a different fieldToOutput because we want to add
1956 // 'B' to field position. This makes this fallback stable when
1957 // there is a data change on locales.
1958 subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'B', handler, cal, status);
1959 return;
1960 }
1961
1962 // Get current display time.
1963 int32_t hour = cal.get(UCAL_HOUR_OF_DAY, status);
1964 int32_t minute = 0;
1965 if (fHasMinute) {
1966 minute = cal.get(UCAL_MINUTE, status);
1967 }
1968 int32_t second = 0;
1969 if (fHasSecond) {
1970 second = cal.get(UCAL_SECOND, status);
1971 }
1972
1973 // Determine day period.
1974 DayPeriodRules::DayPeriod periodType;
1975 if (hour == 0 && minute == 0 && second == 0 && ruleSet->hasMidnight()) {
1976 periodType = DayPeriodRules::DAYPERIOD_MIDNIGHT;
1977 } else if (hour == 12 && minute == 0 && second == 0 && ruleSet->hasNoon()) {
1978 periodType = DayPeriodRules::DAYPERIOD_NOON;
1979 } else {
1980 periodType = ruleSet->getDayPeriodForHour(hour);
1981 }
1982
1983 // Rule set exists, therefore periodType can't be UNKNOWN.
1984 // Get localized string.
1985 U_ASSERT(periodType != DayPeriodRules::DAYPERIOD_UNKNOWN);
1986 UnicodeString *toAppend = nullptr;
1987 int32_t index;
1988
1989 // Note: "midnight" can be ambiguous as to whether it refers to beginning of day or end of day.
1990 // For ICU 57 output of "midnight" is temporarily suppressed.
1991
1992 if (periodType != DayPeriodRules::DAYPERIOD_AM &&
1993 periodType != DayPeriodRules::DAYPERIOD_PM &&
1994 periodType != DayPeriodRules::DAYPERIOD_MIDNIGHT) {
1995 index = (int32_t)periodType;
1996 if (count <= 3) {
1997 toAppend = &fSymbols->fAbbreviatedDayPeriods[index]; // i.e. short
1998 } else if (count == 4 || count > 5) {
1999 toAppend = &fSymbols->fWideDayPeriods[index];
2000 } else { // count == 5
2001 toAppend = &fSymbols->fNarrowDayPeriods[index];
2002 }
2003 }
2004
2005 // Fallback schedule:
2006 // Midnight/Noon -> General Periods -> AM/PM.
2007
2008 // Midnight/Noon -> General Periods.
2009 if ((toAppend == nullptr || toAppend->isBogus()) &&
2010 (periodType == DayPeriodRules::DAYPERIOD_MIDNIGHT ||
2011 periodType == DayPeriodRules::DAYPERIOD_NOON)) {
2012 periodType = ruleSet->getDayPeriodForHour(hour);
2013 index = (int32_t)periodType;
2014
2015 if (count <= 3) {
2016 toAppend = &fSymbols->fAbbreviatedDayPeriods[index]; // i.e. short
2017 } else if (count == 4 || count > 5) {
2018 toAppend = &fSymbols->fWideDayPeriods[index];
2019 } else { // count == 5
2020 toAppend = &fSymbols->fNarrowDayPeriods[index];
2021 }
2022 }
2023
2024 // General Periods -> AM/PM.
2025 if (periodType == DayPeriodRules::DAYPERIOD_AM ||
2026 periodType == DayPeriodRules::DAYPERIOD_PM ||
2027 toAppend->isBogus()) {
2028 // We are passing a different fieldToOutput because we want to add
2029 // 'B' to field position iterator. This makes this fallback stable when
2030 // there is a data change on locales.
2031 subFormat(appendTo, u'a', count, capitalizationContext, fieldNum, u'B', handler, cal, status);
2032 return;
2033 }
2034 else {
2035 appendTo += *toAppend;
2036 }
2037
2038 break;
2039 }
2040
2041 // all of the other pattern symbols can be formatted as simple numbers with
2042 // appropriate zero padding
2043 default:
2044 zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
2045 break;
2046 }
2047 #if !UCONFIG_NO_BREAK_ITERATION
2048 // if first field, check to see whether we need to and are able to titlecase it
2049 if (fieldNum == 0 && fCapitalizationBrkIter != nullptr && appendTo.length() > beginOffset &&
2050 u_islower(appendTo.char32At(beginOffset))) {
2051 UBool titlecase = false;
2052 switch (capitalizationContext) {
2053 case UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE:
2054 titlecase = true;
2055 break;
2056 case UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU:
2057 titlecase = fSymbols->fCapitalization[capContextUsageType][0];
2058 break;
2059 case UDISPCTX_CAPITALIZATION_FOR_STANDALONE:
2060 titlecase = fSymbols->fCapitalization[capContextUsageType][1];
2061 break;
2062 default:
2063 // titlecase = false;
2064 break;
2065 }
2066 if (titlecase) {
2067 BreakIterator* const mutableCapitalizationBrkIter = fCapitalizationBrkIter->clone();
2068 UnicodeString firstField(appendTo, beginOffset);
2069 firstField.toTitle(mutableCapitalizationBrkIter, fLocale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
2070 appendTo.replaceBetween(beginOffset, appendTo.length(), firstField);
2071 delete mutableCapitalizationBrkIter;
2072 }
2073 }
2074 #endif
2075
2076 handler.addAttribute(DateFormatSymbols::getPatternCharIndex(fieldToOutput), beginOffset, appendTo.length());
2077 }
2078
2079 //----------------------------------------------------------------------
2080
adoptNumberFormat(NumberFormat * formatToAdopt)2081 void SimpleDateFormat::adoptNumberFormat(NumberFormat *formatToAdopt) {
2082 // Null out the fast formatter, it references fNumberFormat which we're
2083 // about to invalidate
2084 delete fSimpleNumberFormatter;
2085 fSimpleNumberFormatter = nullptr;
2086
2087 fixNumberFormatForDates(*formatToAdopt);
2088 delete fNumberFormat;
2089 fNumberFormat = formatToAdopt;
2090
2091 // We successfully set the default number format. Now delete the overrides
2092 // (can't fail).
2093 if (fSharedNumberFormatters) {
2094 freeSharedNumberFormatters(fSharedNumberFormatters);
2095 fSharedNumberFormatters = nullptr;
2096 }
2097
2098 // Recompute fSimpleNumberFormatter if necessary
2099 UErrorCode localStatus = U_ZERO_ERROR;
2100 initSimpleNumberFormatter(localStatus);
2101 }
2102
adoptNumberFormat(const UnicodeString & fields,NumberFormat * formatToAdopt,UErrorCode & status)2103 void SimpleDateFormat::adoptNumberFormat(const UnicodeString& fields, NumberFormat *formatToAdopt, UErrorCode &status){
2104 fixNumberFormatForDates(*formatToAdopt);
2105 LocalPointer<NumberFormat> fmt(formatToAdopt);
2106 if (U_FAILURE(status)) {
2107 return;
2108 }
2109
2110 // We must ensure fSharedNumberFormatters is allocated.
2111 if (fSharedNumberFormatters == nullptr) {
2112 fSharedNumberFormatters = allocSharedNumberFormatters();
2113 if (fSharedNumberFormatters == nullptr) {
2114 status = U_MEMORY_ALLOCATION_ERROR;
2115 return;
2116 }
2117 }
2118 const SharedNumberFormat *newFormat = createSharedNumberFormat(fmt.orphan());
2119 if (newFormat == nullptr) {
2120 status = U_MEMORY_ALLOCATION_ERROR;
2121 return;
2122 }
2123 for (int i=0; i<fields.length(); i++) {
2124 char16_t field = fields.charAt(i);
2125 // if the pattern character is unrecognized, signal an error and bail out
2126 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(field);
2127 if (patternCharIndex == UDAT_FIELD_COUNT) {
2128 status = U_INVALID_FORMAT_ERROR;
2129 newFormat->deleteIfZeroRefCount();
2130 return;
2131 }
2132
2133 // Set the number formatter in the table
2134 SharedObject::copyPtr(
2135 newFormat, fSharedNumberFormatters[patternCharIndex]);
2136 }
2137 newFormat->deleteIfZeroRefCount();
2138 }
2139
2140 const NumberFormat *
getNumberFormatForField(char16_t field) const2141 SimpleDateFormat::getNumberFormatForField(char16_t field) const {
2142 UDateFormatField index = DateFormatSymbols::getPatternCharIndex(field);
2143 if (index == UDAT_FIELD_COUNT) {
2144 return nullptr;
2145 }
2146 return getNumberFormatByIndex(index);
2147 }
2148
2149 //----------------------------------------------------------------------
2150 void
zeroPaddingNumber(const NumberFormat * currentNumberFormat,UnicodeString & appendTo,int32_t value,int32_t minDigits,int32_t maxDigits) const2151 SimpleDateFormat::zeroPaddingNumber(
2152 const NumberFormat *currentNumberFormat,
2153 UnicodeString &appendTo,
2154 int32_t value, int32_t minDigits, int32_t maxDigits) const
2155 {
2156
2157 if (currentNumberFormat == fNumberFormat && fSimpleNumberFormatter) {
2158 // Can use fast path
2159 // We create UFormattedNumberData ourselves to avoid a heap allocation
2160 // and corresponding free. Set the pointer to null afterwards to prevent
2161 // the implementation from attempting to free it.
2162 UErrorCode localStatus = U_ZERO_ERROR;
2163 number::impl::UFormattedNumberData data;
2164 data.quantity.setToLong(value);
2165 number::SimpleNumber number(&data, localStatus);
2166 number.setMinimumIntegerDigits(minDigits, localStatus);
2167 number.setMaximumIntegerDigits(maxDigits, localStatus);
2168
2169 number::FormattedNumber result = fSimpleNumberFormatter->format(std::move(number), localStatus);
2170 if (U_FAILURE(localStatus)) {
2171 result.fData = nullptr;
2172 return;
2173 }
2174 UnicodeStringAppendable appendable(appendTo);
2175 result.appendTo(appendable, localStatus);
2176 result.fData = nullptr;
2177 return;
2178 }
2179
2180 // Check for RBNF (no clone necessary)
2181 const auto* rbnf = dynamic_cast<const RuleBasedNumberFormat*>(currentNumberFormat);
2182 if (rbnf != nullptr) {
2183 FieldPosition pos(FieldPosition::DONT_CARE);
2184 rbnf->format(value, appendTo, pos); // 3rd arg is there to speed up processing
2185 return;
2186 }
2187
2188 // Fall back to slow path (clone and mutate the NumberFormat)
2189 if (currentNumberFormat != nullptr) {
2190 FieldPosition pos(FieldPosition::DONT_CARE);
2191 LocalPointer<NumberFormat> nf(currentNumberFormat->clone());
2192 nf->setMinimumIntegerDigits(minDigits);
2193 nf->setMaximumIntegerDigits(maxDigits);
2194 nf->format(value, appendTo, pos); // 3rd arg is there to speed up processing
2195 }
2196 }
2197
2198 //----------------------------------------------------------------------
2199
2200 /**
2201 * Return true if the given format character, occurring count
2202 * times, represents a numeric field.
2203 */
isNumeric(char16_t formatChar,int32_t count)2204 UBool SimpleDateFormat::isNumeric(char16_t formatChar, int32_t count) {
2205 return DateFormatSymbols::isNumericPatternChar(formatChar, count);
2206 }
2207
2208 UBool
isAtNumericField(const UnicodeString & pattern,int32_t patternOffset)2209 SimpleDateFormat::isAtNumericField(const UnicodeString &pattern, int32_t patternOffset) {
2210 if (patternOffset >= pattern.length()) {
2211 // not at any field
2212 return false;
2213 }
2214 char16_t ch = pattern.charAt(patternOffset);
2215 UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch);
2216 if (f == UDAT_FIELD_COUNT) {
2217 // not at any field
2218 return false;
2219 }
2220 int32_t i = patternOffset;
2221 while (pattern.charAt(++i) == ch) {}
2222 return DateFormatSymbols::isNumericField(f, i - patternOffset);
2223 }
2224
2225 UBool
isAfterNonNumericField(const UnicodeString & pattern,int32_t patternOffset)2226 SimpleDateFormat::isAfterNonNumericField(const UnicodeString &pattern, int32_t patternOffset) {
2227 if (patternOffset <= 0) {
2228 // not after any field
2229 return false;
2230 }
2231 char16_t ch = pattern.charAt(--patternOffset);
2232 UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch);
2233 if (f == UDAT_FIELD_COUNT) {
2234 // not after any field
2235 return false;
2236 }
2237 int32_t i = patternOffset;
2238 while (pattern.charAt(--i) == ch) {}
2239 return !DateFormatSymbols::isNumericField(f, patternOffset - i);
2240 }
2241
2242 void
parse(const UnicodeString & text,Calendar & cal,ParsePosition & parsePos) const2243 SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& parsePos) const
2244 {
2245 UErrorCode status = U_ZERO_ERROR;
2246 int32_t pos = parsePos.getIndex();
2247 if(parsePos.getIndex() < 0) {
2248 parsePos.setErrorIndex(0);
2249 return;
2250 }
2251 int32_t start = pos;
2252
2253 // Hold the day period until everything else is parsed, because we need
2254 // the hour to interpret time correctly.
2255 int32_t dayPeriodInt = -1;
2256
2257 UBool ambiguousYear[] = { false };
2258 int32_t saveHebrewMonth = -1;
2259 int32_t count = 0;
2260 UTimeZoneFormatTimeType tzTimeType = UTZFMT_TIME_TYPE_UNKNOWN;
2261
2262 // For parsing abutting numeric fields. 'abutPat' is the
2263 // offset into 'pattern' of the first of 2 or more abutting
2264 // numeric fields. 'abutStart' is the offset into 'text'
2265 // where parsing the fields begins. 'abutPass' starts off as 0
2266 // and increments each time we try to parse the fields.
2267 int32_t abutPat = -1; // If >=0, we are in a run of abutting numeric fields
2268 int32_t abutStart = 0;
2269 int32_t abutPass = 0;
2270 UBool inQuote = false;
2271
2272 MessageFormat * numericLeapMonthFormatter = nullptr;
2273
2274 Calendar* calClone = nullptr;
2275 Calendar *workCal = &cal;
2276 if (&cal != fCalendar && uprv_strcmp(cal.getType(), fCalendar->getType()) != 0) {
2277 // Different calendar type
2278 // We use the time/zone from the input calendar, but
2279 // do not use the input calendar for field calculation.
2280 calClone = fCalendar->clone();
2281 if (calClone != nullptr) {
2282 calClone->setTime(cal.getTime(status),status);
2283 if (U_FAILURE(status)) {
2284 goto ExitParse;
2285 }
2286 calClone->setTimeZone(cal.getTimeZone());
2287 workCal = calClone;
2288 } else {
2289 status = U_MEMORY_ALLOCATION_ERROR;
2290 goto ExitParse;
2291 }
2292 }
2293
2294 if (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) {
2295 numericLeapMonthFormatter = new MessageFormat(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric], fLocale, status);
2296 if (numericLeapMonthFormatter == nullptr) {
2297 status = U_MEMORY_ALLOCATION_ERROR;
2298 goto ExitParse;
2299 } else if (U_FAILURE(status)) {
2300 goto ExitParse; // this will delete numericLeapMonthFormatter
2301 }
2302 }
2303
2304 for (int32_t i=0; i<fPattern.length(); ++i) {
2305 char16_t ch = fPattern.charAt(i);
2306
2307 // Handle alphabetic field characters.
2308 if (!inQuote && isSyntaxChar(ch)) {
2309 int32_t fieldPat = i;
2310
2311 // Count the length of this field specifier
2312 count = 1;
2313 while ((i+1)<fPattern.length() &&
2314 fPattern.charAt(i+1) == ch) {
2315 ++count;
2316 ++i;
2317 }
2318
2319 if (isNumeric(ch, count)) {
2320 if (abutPat < 0) {
2321 // Determine if there is an abutting numeric field.
2322 // Record the start of a set of abutting numeric fields.
2323 if (isAtNumericField(fPattern, i + 1)) {
2324 abutPat = fieldPat;
2325 abutStart = pos;
2326 abutPass = 0;
2327 }
2328 }
2329 } else {
2330 abutPat = -1; // End of any abutting fields
2331 }
2332
2333 // Handle fields within a run of abutting numeric fields. Take
2334 // the pattern "HHmmss" as an example. We will try to parse
2335 // 2/2/2 characters of the input text, then if that fails,
2336 // 1/2/2. We only adjust the width of the leftmost field; the
2337 // others remain fixed. This allows "123456" => 12:34:56, but
2338 // "12345" => 1:23:45. Likewise, for the pattern "yyyyMMdd" we
2339 // try 4/2/2, 3/2/2, 2/2/2, and finally 1/2/2.
2340 if (abutPat >= 0) {
2341 // If we are at the start of a run of abutting fields, then
2342 // shorten this field in each pass. If we can't shorten
2343 // this field any more, then the parse of this set of
2344 // abutting numeric fields has failed.
2345 if (fieldPat == abutPat) {
2346 count -= abutPass++;
2347 if (count == 0) {
2348 status = U_PARSE_ERROR;
2349 goto ExitParse;
2350 }
2351 }
2352
2353 pos = subParse(text, pos, ch, count,
2354 true, false, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter, &tzTimeType);
2355
2356 // If the parse fails anywhere in the run, back up to the
2357 // start of the run and retry.
2358 if (pos < 0) {
2359 i = abutPat - 1;
2360 pos = abutStart;
2361 continue;
2362 }
2363 }
2364
2365 // Handle non-numeric fields and non-abutting numeric
2366 // fields.
2367 else if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored
2368 int32_t s = subParse(text, pos, ch, count,
2369 false, true, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter, &tzTimeType, &dayPeriodInt);
2370
2371 if (s == -pos-1) {
2372 // era not present, in special cases allow this to continue
2373 // from the position where the era was expected
2374 s = pos;
2375
2376 if (i+1 < fPattern.length()) {
2377 // move to next pattern character
2378 char16_t c = fPattern.charAt(i+1);
2379
2380 // check for whitespace
2381 if (PatternProps::isWhiteSpace(c)) {
2382 i++;
2383 // Advance over run in pattern
2384 while ((i+1)<fPattern.length() &&
2385 PatternProps::isWhiteSpace(fPattern.charAt(i+1))) {
2386 ++i;
2387 }
2388 }
2389 }
2390 }
2391 else if (s <= 0) {
2392 status = U_PARSE_ERROR;
2393 goto ExitParse;
2394 }
2395 pos = s;
2396 }
2397 }
2398
2399 // Handle literal pattern characters. These are any
2400 // quoted characters and non-alphabetic unquoted
2401 // characters.
2402 else {
2403
2404 abutPat = -1; // End of any abutting fields
2405
2406 if (! matchLiterals(fPattern, i, text, pos, getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status), getBooleanAttribute(UDAT_PARSE_PARTIAL_LITERAL_MATCH, status), isLenient())) {
2407 status = U_PARSE_ERROR;
2408 goto ExitParse;
2409 }
2410 }
2411 }
2412
2413 // Special hack for trailing "." after non-numeric field.
2414 if (text.charAt(pos) == 0x2e && getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) {
2415 // only do if the last field is not numeric
2416 if (isAfterNonNumericField(fPattern, fPattern.length())) {
2417 pos++; // skip the extra "."
2418 }
2419 }
2420
2421 // If dayPeriod is set, use it in conjunction with hour-of-day to determine am/pm.
2422 if (dayPeriodInt >= 0) {
2423 DayPeriodRules::DayPeriod dayPeriod = (DayPeriodRules::DayPeriod)dayPeriodInt;
2424 const DayPeriodRules *ruleSet = DayPeriodRules::getInstance(this->getSmpFmtLocale(), status);
2425
2426 if (!cal.isSet(UCAL_HOUR) && !cal.isSet(UCAL_HOUR_OF_DAY)) {
2427 // If hour is not set, set time to the midpoint of current day period, overwriting
2428 // minutes if it's set.
2429 double midPoint = ruleSet->getMidPointForDayPeriod(dayPeriod, status);
2430
2431 // If we can't get midPoint we do nothing.
2432 if (U_SUCCESS(status)) {
2433 // Truncate midPoint toward zero to get the hour.
2434 // Any leftover means it was a half-hour.
2435 int32_t midPointHour = (int32_t) midPoint;
2436 int32_t midPointMinute = (midPoint - midPointHour) > 0 ? 30 : 0;
2437
2438 // No need to set am/pm because hour-of-day is set last therefore takes precedence.
2439 cal.set(UCAL_HOUR_OF_DAY, midPointHour);
2440 cal.set(UCAL_MINUTE, midPointMinute);
2441 }
2442 } else {
2443 int hourOfDay;
2444
2445 if (cal.isSet(UCAL_HOUR_OF_DAY)) { // Hour is parsed in 24-hour format.
2446 hourOfDay = cal.get(UCAL_HOUR_OF_DAY, status);
2447 } else { // Hour is parsed in 12-hour format.
2448 hourOfDay = cal.get(UCAL_HOUR, status);
2449 // cal.get() turns 12 to 0 for 12-hour time; change 0 to 12
2450 // so 0 unambiguously means a 24-hour time from above.
2451 if (hourOfDay == 0) { hourOfDay = 12; }
2452 }
2453 U_ASSERT(0 <= hourOfDay && hourOfDay <= 23);
2454
2455
2456 // If hour-of-day is 0 or 13 thru 23 then input time in unambiguously in 24-hour format.
2457 if (hourOfDay == 0 || (13 <= hourOfDay && hourOfDay <= 23)) {
2458 // Make hour-of-day take precedence over (hour + am/pm) by setting it again.
2459 cal.set(UCAL_HOUR_OF_DAY, hourOfDay);
2460 } else {
2461 // We have a 12-hour time and need to choose between am and pm.
2462 // Behave as if dayPeriod spanned 6 hours each way from its center point.
2463 // This will parse correctly for consistent time + period (e.g. 10 at night) as
2464 // well as provide a reasonable recovery for inconsistent time + period (e.g.
2465 // 9 in the afternoon).
2466
2467 // Assume current time is in the AM.
2468 // - Change 12 back to 0 for easier handling of 12am.
2469 // - Append minutes as fractional hours because e.g. 8:15 and 8:45 could be parsed
2470 // into different half-days if center of dayPeriod is at 14:30.
2471 // - cal.get(MINUTE) will return 0 if MINUTE is unset, which works.
2472 if (hourOfDay == 12) { hourOfDay = 0; }
2473 double currentHour = hourOfDay + (cal.get(UCAL_MINUTE, status)) / 60.0;
2474 double midPointHour = ruleSet->getMidPointForDayPeriod(dayPeriod, status);
2475
2476 if (U_SUCCESS(status)) {
2477 double hoursAheadMidPoint = currentHour - midPointHour;
2478
2479 // Assume current time is in the AM.
2480 if (-6 <= hoursAheadMidPoint && hoursAheadMidPoint < 6) {
2481 // Assumption holds; set time as such.
2482 cal.set(UCAL_AM_PM, 0);
2483 } else {
2484 cal.set(UCAL_AM_PM, 1);
2485 }
2486 }
2487 }
2488 }
2489 }
2490
2491 // At this point the fields of Calendar have been set. Calendar
2492 // will fill in default values for missing fields when the time
2493 // is computed.
2494
2495 parsePos.setIndex(pos);
2496
2497 // This part is a problem: When we call parsedDate.after, we compute the time.
2498 // Take the date April 3 2004 at 2:30 am. When this is first set up, the year
2499 // will be wrong if we're parsing a 2-digit year pattern. It will be 1904.
2500 // April 3 1904 is a Sunday (unlike 2004) so it is the DST onset day. 2:30 am
2501 // is therefore an "impossible" time, since the time goes from 1:59 to 3:00 am
2502 // on that day. It is therefore parsed out to fields as 3:30 am. Then we
2503 // add 100 years, and get April 3 2004 at 3:30 am. Note that April 3 2004 is
2504 // a Saturday, so it can have a 2:30 am -- and it should. [LIU]
2505 /*
2506 UDate parsedDate = calendar.getTime();
2507 if( ambiguousYear[0] && !parsedDate.after(fDefaultCenturyStart) ) {
2508 calendar.add(Calendar.YEAR, 100);
2509 parsedDate = calendar.getTime();
2510 }
2511 */
2512 // Because of the above condition, save off the fields in case we need to readjust.
2513 // The procedure we use here is not particularly efficient, but there is no other
2514 // way to do this given the API restrictions present in Calendar. We minimize
2515 // inefficiency by only performing this computation when it might apply, that is,
2516 // when the two-digit year is equal to the start year, and thus might fall at the
2517 // front or the back of the default century. This only works because we adjust
2518 // the year correctly to start with in other cases -- see subParse().
2519 if (ambiguousYear[0] || tzTimeType != UTZFMT_TIME_TYPE_UNKNOWN) // If this is true then the two-digit year == the default start year
2520 {
2521 // We need a copy of the fields, and we need to avoid triggering a call to
2522 // complete(), which will recalculate the fields. Since we can't access
2523 // the fields[] array in Calendar, we clone the entire object. This will
2524 // stop working if Calendar.clone() is ever rewritten to call complete().
2525 Calendar *copy;
2526 if (ambiguousYear[0]) {
2527 copy = cal.clone();
2528 // Check for failed cloning.
2529 if (copy == nullptr) {
2530 status = U_MEMORY_ALLOCATION_ERROR;
2531 goto ExitParse;
2532 }
2533 UDate parsedDate = copy->getTime(status);
2534 // {sfb} check internalGetDefaultCenturyStart
2535 if (fHaveDefaultCentury && (parsedDate < fDefaultCenturyStart)) {
2536 // We can't use add here because that does a complete() first.
2537 cal.set(UCAL_YEAR, fDefaultCenturyStartYear + 100);
2538 }
2539 delete copy;
2540 }
2541
2542 if (tzTimeType != UTZFMT_TIME_TYPE_UNKNOWN) {
2543 copy = cal.clone();
2544 // Check for failed cloning.
2545 if (copy == nullptr) {
2546 status = U_MEMORY_ALLOCATION_ERROR;
2547 goto ExitParse;
2548 }
2549 const TimeZone & tz = cal.getTimeZone();
2550 BasicTimeZone *btz = nullptr;
2551
2552 if (dynamic_cast<const OlsonTimeZone *>(&tz) != nullptr
2553 || dynamic_cast<const SimpleTimeZone *>(&tz) != nullptr
2554 || dynamic_cast<const RuleBasedTimeZone *>(&tz) != nullptr
2555 || dynamic_cast<const VTimeZone *>(&tz) != nullptr) {
2556 btz = (BasicTimeZone*)&tz;
2557 }
2558
2559 // Get local millis
2560 copy->set(UCAL_ZONE_OFFSET, 0);
2561 copy->set(UCAL_DST_OFFSET, 0);
2562 UDate localMillis = copy->getTime(status);
2563
2564 // Make sure parsed time zone type (Standard or Daylight)
2565 // matches the rule used by the parsed time zone.
2566 int32_t raw, dst;
2567 if (btz != nullptr) {
2568 if (tzTimeType == UTZFMT_TIME_TYPE_STANDARD) {
2569 btz->getOffsetFromLocal(localMillis,
2570 UCAL_TZ_LOCAL_STANDARD_FORMER, UCAL_TZ_LOCAL_STANDARD_LATTER, raw, dst, status);
2571 } else {
2572 btz->getOffsetFromLocal(localMillis,
2573 UCAL_TZ_LOCAL_DAYLIGHT_FORMER, UCAL_TZ_LOCAL_DAYLIGHT_LATTER, raw, dst, status);
2574 }
2575 } else {
2576 // No good way to resolve ambiguous time at transition,
2577 // but following code work in most case.
2578 tz.getOffset(localMillis, true, raw, dst, status);
2579 }
2580
2581 // Now, compare the results with parsed type, either standard or daylight saving time
2582 int32_t resolvedSavings = dst;
2583 if (tzTimeType == UTZFMT_TIME_TYPE_STANDARD) {
2584 if (dst != 0) {
2585 // Override DST_OFFSET = 0 in the result calendar
2586 resolvedSavings = 0;
2587 }
2588 } else { // tztype == TZTYPE_DST
2589 if (dst == 0) {
2590 if (btz != nullptr) {
2591 // This implementation resolves daylight saving time offset
2592 // closest rule after the given time.
2593 UDate baseTime = localMillis + raw;
2594 UDate time = baseTime;
2595 UDate limit = baseTime + MAX_DAYLIGHT_DETECTION_RANGE;
2596 TimeZoneTransition trs;
2597 UBool trsAvail;
2598
2599 // Search for DST rule after the given time
2600 while (time < limit) {
2601 trsAvail = btz->getNextTransition(time, false, trs);
2602 if (!trsAvail) {
2603 break;
2604 }
2605 resolvedSavings = trs.getTo()->getDSTSavings();
2606 if (resolvedSavings != 0) {
2607 break;
2608 }
2609 time = trs.getTime();
2610 }
2611
2612 if (resolvedSavings == 0) {
2613 // If no DST rule after the given time was found, search for
2614 // DST rule before.
2615 time = baseTime;
2616 limit = baseTime - MAX_DAYLIGHT_DETECTION_RANGE;
2617 while (time > limit) {
2618 trsAvail = btz->getPreviousTransition(time, true, trs);
2619 if (!trsAvail) {
2620 break;
2621 }
2622 resolvedSavings = trs.getFrom()->getDSTSavings();
2623 if (resolvedSavings != 0) {
2624 break;
2625 }
2626 time = trs.getTime() - 1;
2627 }
2628
2629 if (resolvedSavings == 0) {
2630 resolvedSavings = btz->getDSTSavings();
2631 }
2632 }
2633 } else {
2634 resolvedSavings = tz.getDSTSavings();
2635 }
2636 if (resolvedSavings == 0) {
2637 // final fallback
2638 resolvedSavings = U_MILLIS_PER_HOUR;
2639 }
2640 }
2641 }
2642 cal.set(UCAL_ZONE_OFFSET, raw);
2643 cal.set(UCAL_DST_OFFSET, resolvedSavings);
2644 delete copy;
2645 }
2646 }
2647 ExitParse:
2648 // Set the parsed result if local calendar is used
2649 // instead of the input calendar
2650 if (U_SUCCESS(status) && workCal != &cal) {
2651 cal.setTimeZone(workCal->getTimeZone());
2652 cal.setTime(workCal->getTime(status), status);
2653 }
2654
2655 delete numericLeapMonthFormatter;
2656 delete calClone;
2657
2658 // If any Calendar calls failed, we pretend that we
2659 // couldn't parse the string, when in reality this isn't quite accurate--
2660 // we did parse it; the Calendar calls just failed.
2661 if (U_FAILURE(status)) {
2662 parsePos.setErrorIndex(pos);
2663 parsePos.setIndex(start);
2664 }
2665 }
2666
2667 //----------------------------------------------------------------------
2668
2669 static int32_t
2670 matchStringWithOptionalDot(const UnicodeString &text,
2671 int32_t index,
2672 const UnicodeString &data);
2673
matchQuarterString(const UnicodeString & text,int32_t start,UCalendarDateFields field,const UnicodeString * data,int32_t dataCount,Calendar & cal) const2674 int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
2675 int32_t start,
2676 UCalendarDateFields field,
2677 const UnicodeString* data,
2678 int32_t dataCount,
2679 Calendar& cal) const
2680 {
2681 int32_t i = 0;
2682 int32_t count = dataCount;
2683
2684 // There may be multiple strings in the data[] array which begin with
2685 // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
2686 // We keep track of the longest match, and return that. Note that this
2687 // unfortunately requires us to test all array elements.
2688 int32_t bestMatchLength = 0, bestMatch = -1;
2689 UnicodeString bestMatchName;
2690
2691 for (; i < count; ++i) {
2692 int32_t matchLength = 0;
2693 if ((matchLength = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) {
2694 bestMatchLength = matchLength;
2695 bestMatch = i;
2696 }
2697 }
2698
2699 if (bestMatch >= 0) {
2700 cal.set(field, bestMatch * 3);
2701 return start + bestMatchLength;
2702 }
2703
2704 return -start;
2705 }
2706
matchDayPeriodStrings(const UnicodeString & text,int32_t start,const UnicodeString * data,int32_t dataCount,int32_t & dayPeriod) const2707 int32_t SimpleDateFormat::matchDayPeriodStrings(const UnicodeString& text, int32_t start,
2708 const UnicodeString* data, int32_t dataCount,
2709 int32_t &dayPeriod) const
2710 {
2711
2712 int32_t bestMatchLength = 0, bestMatch = -1;
2713
2714 for (int32_t i = 0; i < dataCount; ++i) {
2715 int32_t matchLength = 0;
2716 if ((matchLength = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) {
2717 bestMatchLength = matchLength;
2718 bestMatch = i;
2719 }
2720 }
2721
2722 if (bestMatch >= 0) {
2723 dayPeriod = bestMatch;
2724 return start + bestMatchLength;
2725 }
2726
2727 return -start;
2728 }
2729
2730 //----------------------------------------------------------------------
matchLiterals(const UnicodeString & pattern,int32_t & patternOffset,const UnicodeString & text,int32_t & textOffset,UBool whitespaceLenient,UBool partialMatchLenient,UBool oldLeniency)2731 UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern,
2732 int32_t &patternOffset,
2733 const UnicodeString &text,
2734 int32_t &textOffset,
2735 UBool whitespaceLenient,
2736 UBool partialMatchLenient,
2737 UBool oldLeniency)
2738 {
2739 UBool inQuote = false;
2740 UnicodeString literal;
2741 int32_t i = patternOffset;
2742
2743 // scan pattern looking for contiguous literal characters
2744 for ( ; i < pattern.length(); i += 1) {
2745 char16_t ch = pattern.charAt(i);
2746
2747 if (!inQuote && isSyntaxChar(ch)) {
2748 break;
2749 }
2750
2751 if (ch == QUOTE) {
2752 // Match a quote literal ('') inside OR outside of quotes
2753 if ((i + 1) < pattern.length() && pattern.charAt(i + 1) == QUOTE) {
2754 i += 1;
2755 } else {
2756 inQuote = !inQuote;
2757 continue;
2758 }
2759 }
2760
2761 literal += ch;
2762 }
2763
2764 // at this point, literal contains the literal text
2765 // and i is the index of the next non-literal pattern character.
2766 int32_t p;
2767 int32_t t = textOffset;
2768
2769 if (whitespaceLenient) {
2770 // trim leading, trailing whitespace from
2771 // the literal text
2772 literal.trim();
2773
2774 // ignore any leading whitespace in the text
2775 while (t < text.length() && u_isWhitespace(text.charAt(t))) {
2776 t += 1;
2777 }
2778 }
2779
2780 for (p = 0; p < literal.length() && t < text.length();) {
2781 UBool needWhitespace = false;
2782
2783 while (p < literal.length() && PatternProps::isWhiteSpace(literal.charAt(p))) {
2784 needWhitespace = true;
2785 p += 1;
2786 }
2787
2788 if (needWhitespace) {
2789 int32_t tStart = t;
2790
2791 while (t < text.length()) {
2792 char16_t tch = text.charAt(t);
2793
2794 if (!u_isUWhiteSpace(tch) && !PatternProps::isWhiteSpace(tch)) {
2795 break;
2796 }
2797
2798 t += 1;
2799 }
2800
2801 // TODO: should we require internal spaces
2802 // in lenient mode? (There won't be any
2803 // leading or trailing spaces)
2804 if (!whitespaceLenient && t == tStart) {
2805 // didn't find matching whitespace:
2806 // an error in strict mode
2807 return false;
2808 }
2809
2810 // In strict mode, this run of whitespace
2811 // may have been at the end.
2812 if (p >= literal.length()) {
2813 break;
2814 }
2815 }
2816 if (t >= text.length() || literal.charAt(p) != text.charAt(t)) {
2817 // Ran out of text, or found a non-matching character:
2818 // OK in lenient mode, an error in strict mode.
2819 if (whitespaceLenient) {
2820 if (t == textOffset && text.charAt(t) == 0x2e &&
2821 isAfterNonNumericField(pattern, patternOffset)) {
2822 // Lenient mode and the literal input text begins with a "." and
2823 // we are after a non-numeric field: We skip the "."
2824 ++t;
2825 continue; // Do not update p.
2826 }
2827 // if it is actual whitespace and we're whitespace lenient it's OK
2828
2829 char16_t wsc = text.charAt(t);
2830 if(PatternProps::isWhiteSpace(wsc)) {
2831 // Lenient mode and it's just whitespace we skip it
2832 ++t;
2833 continue; // Do not update p.
2834 }
2835 }
2836 // hack around oldleniency being a bit of a catch-all bucket and we're just adding support specifically for partial matches
2837 if(partialMatchLenient && oldLeniency) {
2838 break;
2839 }
2840
2841 return false;
2842 }
2843 ++p;
2844 ++t;
2845 }
2846
2847 // At this point if we're in strict mode we have a complete match.
2848 // If we're in lenient mode we may have a partial match, or no
2849 // match at all.
2850 if (p <= 0) {
2851 // no match. Pretend it matched a run of whitespace
2852 // and ignorables in the text.
2853 const UnicodeSet *ignorables = nullptr;
2854 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(pattern.charAt(i));
2855 if (patternCharIndex != UDAT_FIELD_COUNT) {
2856 ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex);
2857 }
2858
2859 for (t = textOffset; t < text.length(); t += 1) {
2860 char16_t ch = text.charAt(t);
2861
2862 if (ignorables == nullptr || !ignorables->contains(ch)) {
2863 break;
2864 }
2865 }
2866 }
2867
2868 // if we get here, we've got a complete match.
2869 patternOffset = i - 1;
2870 textOffset = t;
2871
2872 return true;
2873 }
2874
2875 //----------------------------------------------------------------------
2876 // check both wide and abbrev months.
2877 // Does not currently handle monthPattern.
2878 // UCalendarDateFields field = UCAL_MONTH
2879
matchAlphaMonthStrings(const UnicodeString & text,int32_t start,const UnicodeString * wideData,const UnicodeString * shortData,int32_t dataCount,Calendar & cal) const2880 int32_t SimpleDateFormat::matchAlphaMonthStrings(const UnicodeString& text,
2881 int32_t start,
2882 const UnicodeString* wideData,
2883 const UnicodeString* shortData,
2884 int32_t dataCount,
2885 Calendar& cal) const
2886 {
2887 int32_t i;
2888 int32_t bestMatchLength = 0, bestMatch = -1;
2889
2890 for (i = 0; i < dataCount; ++i) {
2891 int32_t matchLen = 0;
2892 if ((matchLen = matchStringWithOptionalDot(text, start, wideData[i])) > bestMatchLength) {
2893 bestMatch = i;
2894 bestMatchLength = matchLen;
2895 }
2896 }
2897 for (i = 0; i < dataCount; ++i) {
2898 int32_t matchLen = 0;
2899 if ((matchLen = matchStringWithOptionalDot(text, start, shortData[i])) > bestMatchLength) {
2900 bestMatch = i;
2901 bestMatchLength = matchLen;
2902 }
2903 }
2904
2905 if (bestMatch >= 0) {
2906 // Adjustment for Hebrew Calendar month Adar II
2907 if (!strcmp(cal.getType(),"hebrew") && bestMatch==13) {
2908 cal.set(UCAL_MONTH,6);
2909 } else {
2910 cal.set(UCAL_MONTH, bestMatch);
2911 }
2912 return start + bestMatchLength;
2913 }
2914
2915 return -start;
2916 }
2917
2918 //----------------------------------------------------------------------
2919
matchString(const UnicodeString & text,int32_t start,UCalendarDateFields field,const UnicodeString * data,int32_t dataCount,const UnicodeString * monthPattern,Calendar & cal) const2920 int32_t SimpleDateFormat::matchString(const UnicodeString& text,
2921 int32_t start,
2922 UCalendarDateFields field,
2923 const UnicodeString* data,
2924 int32_t dataCount,
2925 const UnicodeString* monthPattern,
2926 Calendar& cal) const
2927 {
2928 int32_t i = 0;
2929 int32_t count = dataCount;
2930
2931 if (field == UCAL_DAY_OF_WEEK) i = 1;
2932
2933 // There may be multiple strings in the data[] array which begin with
2934 // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
2935 // We keep track of the longest match, and return that. Note that this
2936 // unfortunately requires us to test all array elements.
2937 // But this does not really work for cases such as Chuvash in which
2938 // May is "ҫу" and August is "ҫурла"/"ҫур.", hence matchAlphaMonthStrings.
2939 int32_t bestMatchLength = 0, bestMatch = -1;
2940 UnicodeString bestMatchName;
2941 int32_t isLeapMonth = 0;
2942
2943 for (; i < count; ++i) {
2944 int32_t matchLen = 0;
2945 if ((matchLen = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) {
2946 bestMatch = i;
2947 bestMatchLength = matchLen;
2948 }
2949
2950 if (monthPattern != nullptr) {
2951 UErrorCode status = U_ZERO_ERROR;
2952 UnicodeString leapMonthName;
2953 SimpleFormatter(*monthPattern, 1, 1, status).format(data[i], leapMonthName, status);
2954 if (U_SUCCESS(status)) {
2955 if ((matchLen = matchStringWithOptionalDot(text, start, leapMonthName)) > bestMatchLength) {
2956 bestMatch = i;
2957 bestMatchLength = matchLen;
2958 isLeapMonth = 1;
2959 }
2960 }
2961 }
2962 }
2963
2964 if (bestMatch >= 0) {
2965 if (field < UCAL_FIELD_COUNT) {
2966 // Adjustment for Hebrew Calendar month Adar II
2967 if (!strcmp(cal.getType(),"hebrew") && field==UCAL_MONTH && bestMatch==13) {
2968 cal.set(field,6);
2969 } else {
2970 if (field == UCAL_YEAR) {
2971 bestMatch++; // only get here for cyclic year names, which match 1-based years 1-60
2972 }
2973 cal.set(field, bestMatch);
2974 }
2975 if (monthPattern != nullptr) {
2976 cal.set(UCAL_IS_LEAP_MONTH, isLeapMonth);
2977 }
2978 }
2979
2980 return start + bestMatchLength;
2981 }
2982
2983 return -start;
2984 }
2985
2986 static int32_t
matchStringWithOptionalDot(const UnicodeString & text,int32_t index,const UnicodeString & data)2987 matchStringWithOptionalDot(const UnicodeString &text,
2988 int32_t index,
2989 const UnicodeString &data) {
2990 UErrorCode sts = U_ZERO_ERROR;
2991 int32_t matchLenText = 0;
2992 int32_t matchLenData = 0;
2993
2994 u_caseInsensitivePrefixMatch(text.getBuffer() + index, text.length() - index,
2995 data.getBuffer(), data.length(),
2996 0 /* default case option */,
2997 &matchLenText, &matchLenData,
2998 &sts);
2999 U_ASSERT (U_SUCCESS(sts));
3000
3001 if (matchLenData == data.length() /* normal match */
3002 || (data.charAt(data.length() - 1) == 0x2e
3003 && matchLenData == data.length() - 1 /* match without trailing dot */)) {
3004 return matchLenText;
3005 }
3006
3007 return 0;
3008 }
3009
3010 //----------------------------------------------------------------------
3011
3012 void
set2DigitYearStart(UDate d,UErrorCode & status)3013 SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status)
3014 {
3015 parseAmbiguousDatesAsAfter(d, status);
3016 }
3017
3018 /**
3019 * Private member function that converts the parsed date strings into
3020 * timeFields. Returns -start (for ParsePosition) if failed.
3021 */
subParse(const UnicodeString & text,int32_t & start,char16_t ch,int32_t count,UBool obeyCount,UBool allowNegative,UBool ambiguousYear[],int32_t & saveHebrewMonth,Calendar & cal,int32_t patLoc,MessageFormat * numericLeapMonthFormatter,UTimeZoneFormatTimeType * tzTimeType,int32_t * dayPeriod) const3022 int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, char16_t ch, int32_t count,
3023 UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal,
3024 int32_t patLoc, MessageFormat * numericLeapMonthFormatter, UTimeZoneFormatTimeType *tzTimeType,
3025 int32_t *dayPeriod) const
3026 {
3027 Formattable number;
3028 int32_t value = 0;
3029 int32_t i;
3030 int32_t ps = 0;
3031 UErrorCode status = U_ZERO_ERROR;
3032 ParsePosition pos(0);
3033 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch);
3034 const NumberFormat *currentNumberFormat;
3035 UnicodeString temp;
3036 UBool gotNumber = false;
3037
3038 #if defined (U_DEBUG_CAL)
3039 //fprintf(stderr, "%s:%d - [%c] st=%d \n", __FILE__, __LINE__, (char) ch, start);
3040 #endif
3041
3042 if (patternCharIndex == UDAT_FIELD_COUNT) {
3043 return -start;
3044 }
3045
3046 currentNumberFormat = getNumberFormatByIndex(patternCharIndex);
3047 if (currentNumberFormat == nullptr) {
3048 return -start;
3049 }
3050 UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex]; // UCAL_FIELD_COUNT if irrelevant
3051 UnicodeString hebr("hebr", 4, US_INV);
3052
3053 if (numericLeapMonthFormatter != nullptr) {
3054 numericLeapMonthFormatter->setFormats((const Format **)¤tNumberFormat, 1);
3055 }
3056 UBool isChineseCalendar = (uprv_strcmp(cal.getType(),"chinese") == 0 || uprv_strcmp(cal.getType(),"dangi") == 0);
3057
3058 // If there are any spaces here, skip over them. If we hit the end
3059 // of the string, then fail.
3060 for (;;) {
3061 if (start >= text.length()) {
3062 return -start;
3063 }
3064 UChar32 c = text.char32At(start);
3065 if (!u_isUWhiteSpace(c) /*||*/ && !PatternProps::isWhiteSpace(c)) {
3066 break;
3067 }
3068 start += U16_LENGTH(c);
3069 }
3070 pos.setIndex(start);
3071
3072 // We handle a few special cases here where we need to parse
3073 // a number value. We handle further, more generic cases below. We need
3074 // to handle some of them here because some fields require extra processing on
3075 // the parsed value.
3076 if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD || // k
3077 patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD || // H
3078 patternCharIndex == UDAT_HOUR1_FIELD || // h
3079 patternCharIndex == UDAT_HOUR0_FIELD || // K
3080 (patternCharIndex == UDAT_DOW_LOCAL_FIELD && count <= 2) || // e
3081 (patternCharIndex == UDAT_STANDALONE_DAY_FIELD && count <= 2) || // c
3082 (patternCharIndex == UDAT_MONTH_FIELD && count <= 2) || // M
3083 (patternCharIndex == UDAT_STANDALONE_MONTH_FIELD && count <= 2) || // L
3084 (patternCharIndex == UDAT_QUARTER_FIELD && count <= 2) || // Q
3085 (patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD && count <= 2) || // q
3086 patternCharIndex == UDAT_YEAR_FIELD || // y
3087 patternCharIndex == UDAT_YEAR_WOY_FIELD || // Y
3088 patternCharIndex == UDAT_YEAR_NAME_FIELD || // U (falls back to numeric)
3089 (patternCharIndex == UDAT_ERA_FIELD && isChineseCalendar) || // G
3090 patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD) // S
3091 {
3092 int32_t parseStart = pos.getIndex();
3093 // It would be good to unify this with the obeyCount logic below,
3094 // but that's going to be difficult.
3095 const UnicodeString* src;
3096
3097 UBool parsedNumericLeapMonth = false;
3098 if (numericLeapMonthFormatter != nullptr && (patternCharIndex == UDAT_MONTH_FIELD || patternCharIndex == UDAT_STANDALONE_MONTH_FIELD)) {
3099 int32_t argCount;
3100 Formattable * args = numericLeapMonthFormatter->parse(text, pos, argCount);
3101 if (args != nullptr && argCount == 1 && pos.getIndex() > parseStart && args[0].isNumeric()) {
3102 parsedNumericLeapMonth = true;
3103 number.setLong(args[0].getLong());
3104 cal.set(UCAL_IS_LEAP_MONTH, 1);
3105 delete[] args;
3106 } else {
3107 pos.setIndex(parseStart);
3108 cal.set(UCAL_IS_LEAP_MONTH, 0);
3109 }
3110 }
3111
3112 if (!parsedNumericLeapMonth) {
3113 if (obeyCount) {
3114 if ((start+count) > text.length()) {
3115 return -start;
3116 }
3117
3118 text.extractBetween(0, start + count, temp);
3119 src = &temp;
3120 } else {
3121 src = &text;
3122 }
3123
3124 parseInt(*src, number, pos, allowNegative,currentNumberFormat);
3125 }
3126
3127 int32_t txtLoc = pos.getIndex();
3128
3129 if (txtLoc > parseStart) {
3130 value = number.getLong();
3131 gotNumber = true;
3132
3133 // suffix processing
3134 if (value < 0 ) {
3135 txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, true);
3136 if (txtLoc != pos.getIndex()) {
3137 value *= -1;
3138 }
3139 }
3140 else {
3141 txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, false);
3142 }
3143
3144 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) {
3145 // Check the range of the value
3146 int32_t bias = gFieldRangeBias[patternCharIndex];
3147 if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
3148 return -start;
3149 }
3150 }
3151
3152 pos.setIndex(txtLoc);
3153 }
3154 }
3155
3156 // Make sure that we got a number if
3157 // we want one, and didn't get one
3158 // if we don't want one.
3159 switch (patternCharIndex) {
3160 case UDAT_HOUR_OF_DAY1_FIELD:
3161 case UDAT_HOUR_OF_DAY0_FIELD:
3162 case UDAT_HOUR1_FIELD:
3163 case UDAT_HOUR0_FIELD:
3164 // special range check for hours:
3165 if (value < 0 || value > 24) {
3166 return -start;
3167 }
3168
3169 // fall through to gotNumber check
3170 U_FALLTHROUGH;
3171 case UDAT_YEAR_FIELD:
3172 case UDAT_YEAR_WOY_FIELD:
3173 case UDAT_FRACTIONAL_SECOND_FIELD:
3174 // these must be a number
3175 if (! gotNumber) {
3176 return -start;
3177 }
3178
3179 break;
3180
3181 default:
3182 // we check the rest of the fields below.
3183 break;
3184 }
3185
3186 switch (patternCharIndex) {
3187 case UDAT_ERA_FIELD:
3188 if (isChineseCalendar) {
3189 if (!gotNumber) {
3190 return -start;
3191 }
3192 cal.set(UCAL_ERA, value);
3193 return pos.getIndex();
3194 }
3195 if (count == 5) {
3196 ps = matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, nullptr, cal);
3197 } else if (count == 4) {
3198 ps = matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, nullptr, cal);
3199 } else {
3200 ps = matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, nullptr, cal);
3201 }
3202
3203 // check return position, if it equals -start, then matchString error
3204 // special case the return code so we don't necessarily fail out until we
3205 // verify no year information also
3206 if (ps == -start)
3207 ps--;
3208
3209 return ps;
3210
3211 case UDAT_YEAR_FIELD:
3212 // If there are 3 or more YEAR pattern characters, this indicates
3213 // that the year value is to be treated literally, without any
3214 // two-digit year adjustments (e.g., from "01" to 2001). Otherwise
3215 // we made adjustments to place the 2-digit year in the proper
3216 // century, for parsed strings from "00" to "99". Any other string
3217 // is treated literally: "2250", "-1", "1", "002".
3218 if (fDateOverride.compare(hebr)==0 && value < 1000) {
3219 value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
3220 } else if (text.moveIndex32(start, 2) == pos.getIndex() && !isChineseCalendar
3221 && u_isdigit(text.char32At(start))
3222 && u_isdigit(text.char32At(text.moveIndex32(start, 1))))
3223 {
3224 // only adjust year for patterns less than 3.
3225 if(count < 3) {
3226 // Assume for example that the defaultCenturyStart is 6/18/1903.
3227 // This means that two-digit years will be forced into the range
3228 // 6/18/1903 to 6/17/2003. As a result, years 00, 01, and 02
3229 // correspond to 2000, 2001, and 2002. Years 04, 05, etc. correspond
3230 // to 1904, 1905, etc. If the year is 03, then it is 2003 if the
3231 // other fields specify a date before 6/18, or 1903 if they specify a
3232 // date afterwards. As a result, 03 is an ambiguous year. All other
3233 // two-digit years are unambiguous.
3234 if(fHaveDefaultCentury) { // check if this formatter even has a pivot year
3235 int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
3236 ambiguousYear[0] = (value == ambiguousTwoDigitYear);
3237 value += (fDefaultCenturyStartYear/100)*100 +
3238 (value < ambiguousTwoDigitYear ? 100 : 0);
3239 }
3240 }
3241 }
3242 cal.set(UCAL_YEAR, value);
3243
3244 // Delayed checking for adjustment of Hebrew month numbers in non-leap years.
3245 if (saveHebrewMonth >= 0) {
3246 HebrewCalendar *hc = (HebrewCalendar*)&cal;
3247 if (!hc->isLeapYear(value) && saveHebrewMonth >= 6) {
3248 cal.set(UCAL_MONTH,saveHebrewMonth);
3249 } else {
3250 cal.set(UCAL_MONTH,saveHebrewMonth-1);
3251 }
3252 saveHebrewMonth = -1;
3253 }
3254 return pos.getIndex();
3255
3256 case UDAT_YEAR_WOY_FIELD:
3257 // Comment is the same as for UDAT_Year_FIELDs - look above
3258 if (fDateOverride.compare(hebr)==0 && value < 1000) {
3259 value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR;
3260 } else if (text.moveIndex32(start, 2) == pos.getIndex()
3261 && u_isdigit(text.char32At(start))
3262 && u_isdigit(text.char32At(text.moveIndex32(start, 1)))
3263 && fHaveDefaultCentury )
3264 {
3265 int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
3266 ambiguousYear[0] = (value == ambiguousTwoDigitYear);
3267 value += (fDefaultCenturyStartYear/100)*100 +
3268 (value < ambiguousTwoDigitYear ? 100 : 0);
3269 }
3270 cal.set(UCAL_YEAR_WOY, value);
3271 return pos.getIndex();
3272
3273 case UDAT_YEAR_NAME_FIELD:
3274 if (fSymbols->fShortYearNames != nullptr) {
3275 int32_t newStart = matchString(text, start, UCAL_YEAR, fSymbols->fShortYearNames, fSymbols->fShortYearNamesCount, nullptr, cal);
3276 if (newStart > 0) {
3277 return newStart;
3278 }
3279 }
3280 if (gotNumber && (getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC,status) || value > fSymbols->fShortYearNamesCount)) {
3281 cal.set(UCAL_YEAR, value);
3282 return pos.getIndex();
3283 }
3284 return -start;
3285
3286 case UDAT_MONTH_FIELD:
3287 case UDAT_STANDALONE_MONTH_FIELD:
3288 if (gotNumber) // i.e., M or MM.
3289 {
3290 // When parsing month numbers from the Hebrew Calendar, we might need to adjust the month depending on whether
3291 // or not it was a leap year. We may or may not yet know what year it is, so might have to delay checking until
3292 // the year is parsed.
3293 if (!strcmp(cal.getType(),"hebrew")) {
3294 HebrewCalendar *hc = (HebrewCalendar*)&cal;
3295 if (cal.isSet(UCAL_YEAR)) {
3296 UErrorCode monthStatus = U_ZERO_ERROR;
3297 if (!hc->isLeapYear(hc->get(UCAL_YEAR, monthStatus)) && value >= 6) {
3298 cal.set(UCAL_MONTH, value);
3299 } else {
3300 cal.set(UCAL_MONTH, value - 1);
3301 }
3302 } else {
3303 saveHebrewMonth = value;
3304 }
3305 } else {
3306 // Don't want to parse the month if it is a string
3307 // while pattern uses numeric style: M/MM, L/LL
3308 // [We computed 'value' above.]
3309 cal.set(UCAL_MONTH, value - 1);
3310 }
3311 return pos.getIndex();
3312 } else {
3313 // count >= 3 // i.e., MMM/MMMM, LLL/LLLL
3314 // Want to be able to parse both short and long forms.
3315 // Try count == 4 first:
3316 UnicodeString * wideMonthPat = nullptr;
3317 UnicodeString * shortMonthPat = nullptr;
3318 if (fSymbols->fLeapMonthPatterns != nullptr && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) {
3319 if (patternCharIndex==UDAT_MONTH_FIELD) {
3320 wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide];
3321 shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev];
3322 } else {
3323 wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide];
3324 shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev];
3325 }
3326 }
3327 int32_t newStart = 0;
3328 if (patternCharIndex==UDAT_MONTH_FIELD) {
3329 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) && count>=3 && count <=4 &&
3330 fSymbols->fLeapMonthPatterns==nullptr && fSymbols->fMonthsCount==fSymbols->fShortMonthsCount) {
3331 // single function to check both wide and short, an experiment
3332 newStart = matchAlphaMonthStrings(text, start, fSymbols->fMonths, fSymbols->fShortMonths, fSymbols->fMonthsCount, cal); // try MMMM,MMM
3333 if (newStart > 0) {
3334 return newStart;
3335 }
3336 }
3337 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3338 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fMonths, fSymbols->fMonthsCount, wideMonthPat, cal); // try MMMM
3339 if (newStart > 0) {
3340 return newStart;
3341 }
3342 }
3343 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3344 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fShortMonths, fSymbols->fShortMonthsCount, shortMonthPat, cal); // try MMM
3345 }
3346 } else {
3347 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) && count>=3 && count <=4 &&
3348 fSymbols->fLeapMonthPatterns==nullptr && fSymbols->fStandaloneMonthsCount==fSymbols->fStandaloneShortMonthsCount) {
3349 // single function to check both wide and short, an experiment
3350 newStart = matchAlphaMonthStrings(text, start, fSymbols->fStandaloneMonths, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneMonthsCount, cal); // try MMMM,MMM
3351 if (newStart > 0) {
3352 return newStart;
3353 }
3354 }
3355 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3356 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, wideMonthPat, cal); // try LLLL
3357 if (newStart > 0) {
3358 return newStart;
3359 }
3360 }
3361 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3362 newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, shortMonthPat, cal); // try LLL
3363 }
3364 }
3365 if (newStart > 0 || !getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) // currently we do not try to parse MMMMM/LLLLL: #8860
3366 return newStart;
3367 // else we allowing parsing as number, below
3368 }
3369 break;
3370
3371 case UDAT_HOUR_OF_DAY1_FIELD:
3372 // [We computed 'value' above.]
3373 if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1)
3374 value = 0;
3375
3376 // fall through to set field
3377 U_FALLTHROUGH;
3378 case UDAT_HOUR_OF_DAY0_FIELD:
3379 cal.set(UCAL_HOUR_OF_DAY, value);
3380 return pos.getIndex();
3381
3382 case UDAT_FRACTIONAL_SECOND_FIELD:
3383 // Fractional seconds left-justify
3384 i = countDigits(text, start, pos.getIndex());
3385 if (i < 3) {
3386 while (i < 3) {
3387 value *= 10;
3388 i++;
3389 }
3390 } else {
3391 int32_t a = 1;
3392 while (i > 3) {
3393 a *= 10;
3394 i--;
3395 }
3396 value /= a;
3397 }
3398 cal.set(UCAL_MILLISECOND, value);
3399 return pos.getIndex();
3400
3401 case UDAT_DOW_LOCAL_FIELD:
3402 if (gotNumber) // i.e., e or ee
3403 {
3404 // [We computed 'value' above.]
3405 cal.set(UCAL_DOW_LOCAL, value);
3406 return pos.getIndex();
3407 }
3408 // else for eee-eeeee fall through to handling of EEE-EEEEE
3409 // fall through, do not break here
3410 U_FALLTHROUGH;
3411 case UDAT_DAY_OF_WEEK_FIELD:
3412 {
3413 // Want to be able to parse both short and long forms.
3414 // Try count == 4 (EEEE) wide first:
3415 int32_t newStart = 0;
3416 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3417 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3418 fSymbols->fWeekdays, fSymbols->fWeekdaysCount, nullptr, cal)) > 0)
3419 return newStart;
3420 }
3421 // EEEE wide failed, now try EEE abbreviated
3422 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3423 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3424 fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, nullptr, cal)) > 0)
3425 return newStart;
3426 }
3427 // EEE abbreviated failed, now try EEEEEE short
3428 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 6) {
3429 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3430 fSymbols->fShorterWeekdays, fSymbols->fShorterWeekdaysCount, nullptr, cal)) > 0)
3431 return newStart;
3432 }
3433 // EEEEEE short failed, now try EEEEE narrow
3434 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3435 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3436 fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, nullptr, cal)) > 0)
3437 return newStart;
3438 }
3439 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status) || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD)
3440 return newStart;
3441 // else we allowing parsing as number, below
3442 }
3443 break;
3444
3445 case UDAT_STANDALONE_DAY_FIELD:
3446 {
3447 if (gotNumber) // c or cc
3448 {
3449 // [We computed 'value' above.]
3450 cal.set(UCAL_DOW_LOCAL, value);
3451 return pos.getIndex();
3452 }
3453 // Want to be able to parse both short and long forms.
3454 // Try count == 4 (cccc) first:
3455 int32_t newStart = 0;
3456 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3457 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3458 fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, nullptr, cal)) > 0)
3459 return newStart;
3460 }
3461 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3462 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3463 fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, nullptr, cal)) > 0)
3464 return newStart;
3465 }
3466 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 6) {
3467 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
3468 fSymbols->fStandaloneShorterWeekdays, fSymbols->fStandaloneShorterWeekdaysCount, nullptr, cal)) > 0)
3469 return newStart;
3470 }
3471 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
3472 return newStart;
3473 // else we allowing parsing as number, below
3474 }
3475 break;
3476
3477 case UDAT_AM_PM_FIELD:
3478 {
3479 // optionally try both wide/abbrev and narrow forms
3480 int32_t newStart = 0;
3481 // try wide/abbrev
3482 if( getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count < 5 ) {
3483 if ((newStart = matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, nullptr, cal)) > 0) {
3484 return newStart;
3485 }
3486 }
3487 // try narrow
3488 if( getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count >= 5 ) {
3489 if ((newStart = matchString(text, start, UCAL_AM_PM, fSymbols->fNarrowAmPms, fSymbols->fNarrowAmPmsCount, nullptr, cal)) > 0) {
3490 return newStart;
3491 }
3492 }
3493 // no matches for given options
3494 return -start;
3495 }
3496
3497 case UDAT_HOUR1_FIELD:
3498 // [We computed 'value' above.]
3499 if (value == cal.getLeastMaximum(UCAL_HOUR)+1)
3500 value = 0;
3501
3502 // fall through to set field
3503 U_FALLTHROUGH;
3504 case UDAT_HOUR0_FIELD:
3505 cal.set(UCAL_HOUR, value);
3506 return pos.getIndex();
3507
3508 case UDAT_QUARTER_FIELD:
3509 if (gotNumber) // i.e., Q or QQ.
3510 {
3511 // Don't want to parse the month if it is a string
3512 // while pattern uses numeric style: Q or QQ.
3513 // [We computed 'value' above.]
3514 cal.set(UCAL_MONTH, (value - 1) * 3);
3515 return pos.getIndex();
3516 } else {
3517 // count >= 3 // i.e., QQQ or QQQQ
3518 // Want to be able to parse short, long, and narrow forms.
3519 // Try count == 4 first:
3520 int32_t newStart = 0;
3521
3522 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3523 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3524 fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0)
3525 return newStart;
3526 }
3527 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3528 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3529 fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal)) > 0)
3530 return newStart;
3531 }
3532 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3533 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3534 fSymbols->fNarrowQuarters, fSymbols->fNarrowQuartersCount, cal)) > 0)
3535 return newStart;
3536 }
3537 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
3538 return newStart;
3539 // else we allowing parsing as number, below
3540 if(!getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status))
3541 return -start;
3542 }
3543 break;
3544
3545 case UDAT_STANDALONE_QUARTER_FIELD:
3546 if (gotNumber) // i.e., q or qq.
3547 {
3548 // Don't want to parse the month if it is a string
3549 // while pattern uses numeric style: q or q.
3550 // [We computed 'value' above.]
3551 cal.set(UCAL_MONTH, (value - 1) * 3);
3552 return pos.getIndex();
3553 } else {
3554 // count >= 3 // i.e., qqq or qqqq
3555 // Want to be able to parse both short and long forms.
3556 // Try count == 4 first:
3557 int32_t newStart = 0;
3558
3559 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3560 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3561 fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0)
3562 return newStart;
3563 }
3564 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3565 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3566 fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal)) > 0)
3567 return newStart;
3568 }
3569 if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3570 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
3571 fSymbols->fStandaloneNarrowQuarters, fSymbols->fStandaloneNarrowQuartersCount, cal)) > 0)
3572 return newStart;
3573 }
3574 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status))
3575 return newStart;
3576 // else we allowing parsing as number, below
3577 if(!getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status))
3578 return -start;
3579 }
3580 break;
3581
3582 case UDAT_TIMEZONE_FIELD: // 'z'
3583 {
3584 UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_SPECIFIC_SHORT : UTZFMT_STYLE_SPECIFIC_LONG;
3585 const TimeZoneFormat *tzfmt = tzFormat(status);
3586 if (U_SUCCESS(status)) {
3587 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3588 if (tz != nullptr) {
3589 cal.adoptTimeZone(tz);
3590 return pos.getIndex();
3591 }
3592 }
3593 return -start;
3594 }
3595 break;
3596 case UDAT_TIMEZONE_RFC_FIELD: // 'Z'
3597 {
3598 UTimeZoneFormatStyle style = (count < 4) ?
3599 UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL : ((count == 5) ? UTZFMT_STYLE_ISO_EXTENDED_FULL: UTZFMT_STYLE_LOCALIZED_GMT);
3600 const TimeZoneFormat *tzfmt = tzFormat(status);
3601 if (U_SUCCESS(status)) {
3602 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3603 if (tz != nullptr) {
3604 cal.adoptTimeZone(tz);
3605 return pos.getIndex();
3606 }
3607 }
3608 return -start;
3609 }
3610 case UDAT_TIMEZONE_GENERIC_FIELD: // 'v'
3611 {
3612 UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_GENERIC_SHORT : UTZFMT_STYLE_GENERIC_LONG;
3613 const TimeZoneFormat *tzfmt = tzFormat(status);
3614 if (U_SUCCESS(status)) {
3615 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3616 if (tz != nullptr) {
3617 cal.adoptTimeZone(tz);
3618 return pos.getIndex();
3619 }
3620 }
3621 return -start;
3622 }
3623 case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V'
3624 {
3625 UTimeZoneFormatStyle style;
3626 switch (count) {
3627 case 1:
3628 style = UTZFMT_STYLE_ZONE_ID_SHORT;
3629 break;
3630 case 2:
3631 style = UTZFMT_STYLE_ZONE_ID;
3632 break;
3633 case 3:
3634 style = UTZFMT_STYLE_EXEMPLAR_LOCATION;
3635 break;
3636 default:
3637 style = UTZFMT_STYLE_GENERIC_LOCATION;
3638 break;
3639 }
3640 const TimeZoneFormat *tzfmt = tzFormat(status);
3641 if (U_SUCCESS(status)) {
3642 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3643 if (tz != nullptr) {
3644 cal.adoptTimeZone(tz);
3645 return pos.getIndex();
3646 }
3647 }
3648 return -start;
3649 }
3650 case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: // 'O'
3651 {
3652 UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_LOCALIZED_GMT_SHORT : UTZFMT_STYLE_LOCALIZED_GMT;
3653 const TimeZoneFormat *tzfmt = tzFormat(status);
3654 if (U_SUCCESS(status)) {
3655 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3656 if (tz != nullptr) {
3657 cal.adoptTimeZone(tz);
3658 return pos.getIndex();
3659 }
3660 }
3661 return -start;
3662 }
3663 case UDAT_TIMEZONE_ISO_FIELD: // 'X'
3664 {
3665 UTimeZoneFormatStyle style;
3666 switch (count) {
3667 case 1:
3668 style = UTZFMT_STYLE_ISO_BASIC_SHORT;
3669 break;
3670 case 2:
3671 style = UTZFMT_STYLE_ISO_BASIC_FIXED;
3672 break;
3673 case 3:
3674 style = UTZFMT_STYLE_ISO_EXTENDED_FIXED;
3675 break;
3676 case 4:
3677 style = UTZFMT_STYLE_ISO_BASIC_FULL;
3678 break;
3679 default:
3680 style = UTZFMT_STYLE_ISO_EXTENDED_FULL;
3681 break;
3682 }
3683 const TimeZoneFormat *tzfmt = tzFormat(status);
3684 if (U_SUCCESS(status)) {
3685 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3686 if (tz != nullptr) {
3687 cal.adoptTimeZone(tz);
3688 return pos.getIndex();
3689 }
3690 }
3691 return -start;
3692 }
3693 case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x'
3694 {
3695 UTimeZoneFormatStyle style;
3696 switch (count) {
3697 case 1:
3698 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT;
3699 break;
3700 case 2:
3701 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED;
3702 break;
3703 case 3:
3704 style = UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED;
3705 break;
3706 case 4:
3707 style = UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL;
3708 break;
3709 default:
3710 style = UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL;
3711 break;
3712 }
3713 const TimeZoneFormat *tzfmt = tzFormat(status);
3714 if (U_SUCCESS(status)) {
3715 TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType);
3716 if (tz != nullptr) {
3717 cal.adoptTimeZone(tz);
3718 return pos.getIndex();
3719 }
3720 }
3721 return -start;
3722 }
3723 // currently no pattern character is defined for UDAT_TIME_SEPARATOR_FIELD
3724 // so we should not get here. Leave support in for future definition.
3725 case UDAT_TIME_SEPARATOR_FIELD:
3726 {
3727 static const char16_t def_sep = DateFormatSymbols::DEFAULT_TIME_SEPARATOR;
3728 static const char16_t alt_sep = DateFormatSymbols::ALTERNATE_TIME_SEPARATOR;
3729
3730 // Try matching a time separator.
3731 int32_t count_sep = 1;
3732 UnicodeString data[3];
3733 fSymbols->getTimeSeparatorString(data[0]);
3734
3735 // Add the default, if different from the locale.
3736 if (data[0].compare(&def_sep, 1) != 0) {
3737 data[count_sep++].setTo(def_sep);
3738 }
3739
3740 // If lenient, add also the alternate, if different from the locale.
3741 if (isLenient() && data[0].compare(&alt_sep, 1) != 0) {
3742 data[count_sep++].setTo(alt_sep);
3743 }
3744
3745 return matchString(text, start, UCAL_FIELD_COUNT /* => nothing to set */, data, count_sep, nullptr, cal);
3746 }
3747
3748 case UDAT_AM_PM_MIDNIGHT_NOON_FIELD:
3749 {
3750 U_ASSERT(dayPeriod != nullptr);
3751 int32_t ampmStart = subParse(text, start, 0x61, count,
3752 obeyCount, allowNegative, ambiguousYear, saveHebrewMonth, cal,
3753 patLoc, numericLeapMonthFormatter, tzTimeType);
3754
3755 if (ampmStart > 0) {
3756 return ampmStart;
3757 } else {
3758 int32_t newStart = 0;
3759
3760 // Only match the first two strings from the day period strings array.
3761 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3762 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fAbbreviatedDayPeriods,
3763 2, *dayPeriod)) > 0) {
3764 return newStart;
3765 }
3766 }
3767 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3768 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fNarrowDayPeriods,
3769 2, *dayPeriod)) > 0) {
3770 return newStart;
3771 }
3772 }
3773 // count == 4, but allow other counts
3774 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status)) {
3775 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fWideDayPeriods,
3776 2, *dayPeriod)) > 0) {
3777 return newStart;
3778 }
3779 }
3780
3781 return -start;
3782 }
3783 }
3784
3785 case UDAT_FLEXIBLE_DAY_PERIOD_FIELD:
3786 {
3787 U_ASSERT(dayPeriod != nullptr);
3788 int32_t newStart = 0;
3789
3790 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) {
3791 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fAbbreviatedDayPeriods,
3792 fSymbols->fAbbreviatedDayPeriodsCount, *dayPeriod)) > 0) {
3793 return newStart;
3794 }
3795 }
3796 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) {
3797 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fNarrowDayPeriods,
3798 fSymbols->fNarrowDayPeriodsCount, *dayPeriod)) > 0) {
3799 return newStart;
3800 }
3801 }
3802 if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) {
3803 if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fWideDayPeriods,
3804 fSymbols->fWideDayPeriodsCount, *dayPeriod)) > 0) {
3805 return newStart;
3806 }
3807 }
3808
3809 return -start;
3810 }
3811
3812 default:
3813 // Handle "generic" fields
3814 // this is now handled below, outside the switch block
3815 break;
3816 }
3817 // Handle "generic" fields:
3818 // switch default case now handled here (outside switch block) to allow
3819 // parsing of some string fields as digits for lenient case
3820
3821 int32_t parseStart = pos.getIndex();
3822 const UnicodeString* src;
3823 if (obeyCount) {
3824 if ((start+count) > text.length()) {
3825 return -start;
3826 }
3827 text.extractBetween(0, start + count, temp);
3828 src = &temp;
3829 } else {
3830 src = &text;
3831 }
3832 parseInt(*src, number, pos, allowNegative,currentNumberFormat);
3833 if (obeyCount && !isLenient() && pos.getIndex() < start + count) {
3834 return -start;
3835 }
3836 if (pos.getIndex() != parseStart) {
3837 int32_t val = number.getLong();
3838
3839 // Don't need suffix processing here (as in number processing at the beginning of the function);
3840 // the new fields being handled as numeric values (month, weekdays, quarters) should not have suffixes.
3841
3842 if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) {
3843 // Check the range of the value
3844 int32_t bias = gFieldRangeBias[patternCharIndex];
3845 if (bias >= 0 && (val > cal.getMaximum(field) + bias || val < cal.getMinimum(field) + bias)) {
3846 return -start;
3847 }
3848 }
3849
3850 // For the following, need to repeat some of the "if (gotNumber)" code above:
3851 // UDAT_[STANDALONE_]MONTH_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_STANDALONE_DAY_FIELD,
3852 // UDAT_[STANDALONE_]QUARTER_FIELD
3853 switch (patternCharIndex) {
3854 case UDAT_MONTH_FIELD:
3855 // See notes under UDAT_MONTH_FIELD case above
3856 if (!strcmp(cal.getType(),"hebrew")) {
3857 HebrewCalendar *hc = (HebrewCalendar*)&cal;
3858 if (cal.isSet(UCAL_YEAR)) {
3859 UErrorCode monthStatus = U_ZERO_ERROR;
3860 if (!hc->isLeapYear(hc->get(UCAL_YEAR, monthStatus)) && val >= 6) {
3861 cal.set(UCAL_MONTH, val);
3862 } else {
3863 cal.set(UCAL_MONTH, val - 1);
3864 }
3865 } else {
3866 saveHebrewMonth = val;
3867 }
3868 } else {
3869 cal.set(UCAL_MONTH, val - 1);
3870 }
3871 break;
3872 case UDAT_STANDALONE_MONTH_FIELD:
3873 cal.set(UCAL_MONTH, val - 1);
3874 break;
3875 case UDAT_DOW_LOCAL_FIELD:
3876 case UDAT_STANDALONE_DAY_FIELD:
3877 cal.set(UCAL_DOW_LOCAL, val);
3878 break;
3879 case UDAT_QUARTER_FIELD:
3880 case UDAT_STANDALONE_QUARTER_FIELD:
3881 cal.set(UCAL_MONTH, (val - 1) * 3);
3882 break;
3883 case UDAT_RELATED_YEAR_FIELD:
3884 cal.setRelatedYear(val);
3885 break;
3886 default:
3887 cal.set(field, val);
3888 break;
3889 }
3890 return pos.getIndex();
3891 }
3892 return -start;
3893 }
3894
3895 /**
3896 * Parse an integer using fNumberFormat. This method is semantically
3897 * const, but actually may modify fNumberFormat.
3898 */
parseInt(const UnicodeString & text,Formattable & number,ParsePosition & pos,UBool allowNegative,const NumberFormat * fmt) const3899 void SimpleDateFormat::parseInt(const UnicodeString& text,
3900 Formattable& number,
3901 ParsePosition& pos,
3902 UBool allowNegative,
3903 const NumberFormat *fmt) const {
3904 parseInt(text, number, -1, pos, allowNegative,fmt);
3905 }
3906
3907 /**
3908 * Parse an integer using fNumberFormat up to maxDigits.
3909 */
parseInt(const UnicodeString & text,Formattable & number,int32_t maxDigits,ParsePosition & pos,UBool allowNegative,const NumberFormat * fmt) const3910 void SimpleDateFormat::parseInt(const UnicodeString& text,
3911 Formattable& number,
3912 int32_t maxDigits,
3913 ParsePosition& pos,
3914 UBool allowNegative,
3915 const NumberFormat *fmt) const {
3916 UnicodeString oldPrefix;
3917 const auto* fmtAsDF = dynamic_cast<const DecimalFormat*>(fmt);
3918 LocalPointer<DecimalFormat> df;
3919 if (!allowNegative && fmtAsDF != nullptr) {
3920 df.adoptInstead(fmtAsDF->clone());
3921 if (df.isNull()) {
3922 // Memory allocation error
3923 return;
3924 }
3925 df->setNegativePrefix(UnicodeString(true, SUPPRESS_NEGATIVE_PREFIX, -1));
3926 fmt = df.getAlias();
3927 }
3928 int32_t oldPos = pos.getIndex();
3929 fmt->parse(text, number, pos);
3930
3931 if (maxDigits > 0) {
3932 // adjust the result to fit into
3933 // the maxDigits and move the position back
3934 int32_t nDigits = pos.getIndex() - oldPos;
3935 if (nDigits > maxDigits) {
3936 int32_t val = number.getLong();
3937 nDigits -= maxDigits;
3938 while (nDigits > 0) {
3939 val /= 10;
3940 nDigits--;
3941 }
3942 pos.setIndex(oldPos + maxDigits);
3943 number.setLong(val);
3944 }
3945 }
3946 }
3947
countDigits(const UnicodeString & text,int32_t start,int32_t end) const3948 int32_t SimpleDateFormat::countDigits(const UnicodeString& text, int32_t start, int32_t end) const {
3949 int32_t numDigits = 0;
3950 int32_t idx = start;
3951 while (idx < end) {
3952 UChar32 cp = text.char32At(idx);
3953 if (u_isdigit(cp)) {
3954 numDigits++;
3955 }
3956 idx += U16_LENGTH(cp);
3957 }
3958 return numDigits;
3959 }
3960
3961 //----------------------------------------------------------------------
3962
translatePattern(const UnicodeString & originalPattern,UnicodeString & translatedPattern,const UnicodeString & from,const UnicodeString & to,UErrorCode & status)3963 void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern,
3964 UnicodeString& translatedPattern,
3965 const UnicodeString& from,
3966 const UnicodeString& to,
3967 UErrorCode& status)
3968 {
3969 // run through the pattern and convert any pattern symbols from the version
3970 // in "from" to the corresponding character in "to". This code takes
3971 // quoted strings into account (it doesn't try to translate them), and it signals
3972 // an error if a particular "pattern character" doesn't appear in "from".
3973 // Depending on the values of "from" and "to" this can convert from generic
3974 // to localized patterns or localized to generic.
3975 if (U_FAILURE(status)) {
3976 return;
3977 }
3978
3979 translatedPattern.remove();
3980 UBool inQuote = false;
3981 for (int32_t i = 0; i < originalPattern.length(); ++i) {
3982 char16_t c = originalPattern[i];
3983 if (inQuote) {
3984 if (c == QUOTE) {
3985 inQuote = false;
3986 }
3987 } else {
3988 if (c == QUOTE) {
3989 inQuote = true;
3990 } else if (isSyntaxChar(c)) {
3991 int32_t ci = from.indexOf(c);
3992 if (ci == -1) {
3993 status = U_INVALID_FORMAT_ERROR;
3994 return;
3995 }
3996 c = to[ci];
3997 }
3998 }
3999 translatedPattern += c;
4000 }
4001 if (inQuote) {
4002 status = U_INVALID_FORMAT_ERROR;
4003 return;
4004 }
4005 }
4006
4007 //----------------------------------------------------------------------
4008
4009 UnicodeString&
toPattern(UnicodeString & result) const4010 SimpleDateFormat::toPattern(UnicodeString& result) const
4011 {
4012 result = fPattern;
4013 return result;
4014 }
4015
4016 //----------------------------------------------------------------------
4017
4018 UnicodeString&
toLocalizedPattern(UnicodeString & result,UErrorCode & status) const4019 SimpleDateFormat::toLocalizedPattern(UnicodeString& result,
4020 UErrorCode& status) const
4021 {
4022 translatePattern(fPattern, result,
4023 UnicodeString(DateFormatSymbols::getPatternUChars()),
4024 fSymbols->fLocalPatternChars, status);
4025 return result;
4026 }
4027
4028 //----------------------------------------------------------------------
4029
4030 void
applyPattern(const UnicodeString & pattern)4031 SimpleDateFormat::applyPattern(const UnicodeString& pattern)
4032 {
4033 fPattern = pattern;
4034 parsePattern();
4035
4036 // Hack to update use of Gannen year numbering for ja@calendar=japanese -
4037 // use only if format is non-numeric (includes 年) and no other fDateOverride.
4038 if (fCalendar != nullptr && uprv_strcmp(fCalendar->getType(),"japanese") == 0 &&
4039 uprv_strcmp(fLocale.getLanguage(),"ja") == 0) {
4040 if (fDateOverride==UnicodeString(u"y=jpanyear") && !fHasHanYearChar) {
4041 // Gannen numbering is set but new pattern should not use it, unset;
4042 // use procedure from adoptNumberFormat to clear overrides
4043 if (fSharedNumberFormatters) {
4044 freeSharedNumberFormatters(fSharedNumberFormatters);
4045 fSharedNumberFormatters = nullptr;
4046 }
4047 fDateOverride.setToBogus(); // record status
4048 } else if (fDateOverride.isBogus() && fHasHanYearChar) {
4049 // No current override (=> no Gannen numbering) but new pattern needs it;
4050 // use procedures from initNUmberFormatters / adoptNumberFormat
4051 umtx_lock(&LOCK);
4052 if (fSharedNumberFormatters == nullptr) {
4053 fSharedNumberFormatters = allocSharedNumberFormatters();
4054 }
4055 umtx_unlock(&LOCK);
4056 if (fSharedNumberFormatters != nullptr) {
4057 Locale ovrLoc(fLocale.getLanguage(),fLocale.getCountry(),fLocale.getVariant(),"numbers=jpanyear");
4058 UErrorCode status = U_ZERO_ERROR;
4059 const SharedNumberFormat *snf = createSharedNumberFormat(ovrLoc, status);
4060 if (U_SUCCESS(status)) {
4061 // Now that we have an appropriate number formatter, fill in the
4062 // appropriate slot in the number formatters table.
4063 UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(u'y');
4064 SharedObject::copyPtr(snf, fSharedNumberFormatters[patternCharIndex]);
4065 snf->deleteIfZeroRefCount();
4066 fDateOverride.setTo(u"y=jpanyear", -1); // record status
4067 }
4068 }
4069 }
4070 }
4071 }
4072
4073 //----------------------------------------------------------------------
4074
4075 void
applyLocalizedPattern(const UnicodeString & pattern,UErrorCode & status)4076 SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern,
4077 UErrorCode &status)
4078 {
4079 translatePattern(pattern, fPattern,
4080 fSymbols->fLocalPatternChars,
4081 UnicodeString(DateFormatSymbols::getPatternUChars()), status);
4082 }
4083
4084 //----------------------------------------------------------------------
4085
4086 const DateFormatSymbols*
getDateFormatSymbols() const4087 SimpleDateFormat::getDateFormatSymbols() const
4088 {
4089 return fSymbols;
4090 }
4091
4092 //----------------------------------------------------------------------
4093
4094 void
adoptDateFormatSymbols(DateFormatSymbols * newFormatSymbols)4095 SimpleDateFormat::adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols)
4096 {
4097 delete fSymbols;
4098 fSymbols = newFormatSymbols;
4099 }
4100
4101 //----------------------------------------------------------------------
4102 void
setDateFormatSymbols(const DateFormatSymbols & newFormatSymbols)4103 SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols)
4104 {
4105 delete fSymbols;
4106 fSymbols = new DateFormatSymbols(newFormatSymbols);
4107 }
4108
4109 //----------------------------------------------------------------------
4110 const TimeZoneFormat*
getTimeZoneFormat() const4111 SimpleDateFormat::getTimeZoneFormat() const {
4112 // TimeZoneFormat initialization might fail when out of memory.
4113 // If we always initialize TimeZoneFormat instance, we can return
4114 // such status there. For now, this implementation lazily instantiates
4115 // a TimeZoneFormat for performance optimization reasons, but cannot
4116 // propagate such error (probably just out of memory case) to the caller.
4117 UErrorCode status = U_ZERO_ERROR;
4118 return (const TimeZoneFormat*)tzFormat(status);
4119 }
4120
4121 //----------------------------------------------------------------------
4122 void
adoptTimeZoneFormat(TimeZoneFormat * timeZoneFormatToAdopt)4123 SimpleDateFormat::adoptTimeZoneFormat(TimeZoneFormat* timeZoneFormatToAdopt)
4124 {
4125 delete fTimeZoneFormat;
4126 fTimeZoneFormat = timeZoneFormatToAdopt;
4127 }
4128
4129 //----------------------------------------------------------------------
4130 void
setTimeZoneFormat(const TimeZoneFormat & newTimeZoneFormat)4131 SimpleDateFormat::setTimeZoneFormat(const TimeZoneFormat& newTimeZoneFormat)
4132 {
4133 delete fTimeZoneFormat;
4134 fTimeZoneFormat = new TimeZoneFormat(newTimeZoneFormat);
4135 }
4136
4137 //----------------------------------------------------------------------
4138
4139
adoptCalendar(Calendar * calendarToAdopt)4140 void SimpleDateFormat::adoptCalendar(Calendar* calendarToAdopt)
4141 {
4142 UErrorCode status = U_ZERO_ERROR;
4143 Locale calLocale(fLocale);
4144 calLocale.setKeywordValue("calendar", calendarToAdopt->getType(), status);
4145 DateFormatSymbols *newSymbols =
4146 DateFormatSymbols::createForLocale(calLocale, status);
4147 if (U_FAILURE(status)) {
4148 delete calendarToAdopt;
4149 return;
4150 }
4151 DateFormat::adoptCalendar(calendarToAdopt);
4152 delete fSymbols;
4153 fSymbols = newSymbols;
4154 initializeDefaultCentury(); // we need a new century (possibly)
4155 }
4156
4157
4158 //----------------------------------------------------------------------
4159
4160
4161 // override the DateFormat implementation in order to
4162 // lazily initialize fCapitalizationBrkIter
4163 void
setContext(UDisplayContext value,UErrorCode & status)4164 SimpleDateFormat::setContext(UDisplayContext value, UErrorCode& status)
4165 {
4166 DateFormat::setContext(value, status);
4167 #if !UCONFIG_NO_BREAK_ITERATION
4168 if (U_SUCCESS(status)) {
4169 if ( fCapitalizationBrkIter == nullptr && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
4170 value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE) ) {
4171 status = U_ZERO_ERROR;
4172 fCapitalizationBrkIter = BreakIterator::createSentenceInstance(fLocale, status);
4173 if (U_FAILURE(status)) {
4174 delete fCapitalizationBrkIter;
4175 fCapitalizationBrkIter = nullptr;
4176 }
4177 }
4178 }
4179 #endif
4180 }
4181
4182
4183 //----------------------------------------------------------------------
4184
4185
4186 UBool
isFieldUnitIgnored(UCalendarDateFields field) const4187 SimpleDateFormat::isFieldUnitIgnored(UCalendarDateFields field) const {
4188 return isFieldUnitIgnored(fPattern, field);
4189 }
4190
4191
4192 UBool
isFieldUnitIgnored(const UnicodeString & pattern,UCalendarDateFields field)4193 SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern,
4194 UCalendarDateFields field) {
4195 int32_t fieldLevel = fgCalendarFieldToLevel[field];
4196 int32_t level;
4197 char16_t ch;
4198 UBool inQuote = false;
4199 char16_t prevCh = 0;
4200 int32_t count = 0;
4201
4202 for (int32_t i = 0; i < pattern.length(); ++i) {
4203 ch = pattern[i];
4204 if (ch != prevCh && count > 0) {
4205 level = getLevelFromChar(prevCh);
4206 // the larger the level, the smaller the field unit.
4207 if (fieldLevel <= level) {
4208 return false;
4209 }
4210 count = 0;
4211 }
4212 if (ch == QUOTE) {
4213 if ((i+1) < pattern.length() && pattern[i+1] == QUOTE) {
4214 ++i;
4215 } else {
4216 inQuote = ! inQuote;
4217 }
4218 }
4219 else if (!inQuote && isSyntaxChar(ch)) {
4220 prevCh = ch;
4221 ++count;
4222 }
4223 }
4224 if (count > 0) {
4225 // last item
4226 level = getLevelFromChar(prevCh);
4227 if (fieldLevel <= level) {
4228 return false;
4229 }
4230 }
4231 return true;
4232 }
4233
4234 //----------------------------------------------------------------------
4235
4236 const Locale&
getSmpFmtLocale() const4237 SimpleDateFormat::getSmpFmtLocale() const {
4238 return fLocale;
4239 }
4240
4241 //----------------------------------------------------------------------
4242
4243 int32_t
checkIntSuffix(const UnicodeString & text,int32_t start,int32_t patLoc,UBool isNegative) const4244 SimpleDateFormat::checkIntSuffix(const UnicodeString& text, int32_t start,
4245 int32_t patLoc, UBool isNegative) const {
4246 // local variables
4247 UnicodeString suf;
4248 int32_t patternMatch;
4249 int32_t textPreMatch;
4250 int32_t textPostMatch;
4251
4252 // check that we are still in range
4253 if ( (start > text.length()) ||
4254 (start < 0) ||
4255 (patLoc < 0) ||
4256 (patLoc > fPattern.length())) {
4257 // out of range, don't advance location in text
4258 return start;
4259 }
4260
4261 // get the suffix
4262 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fNumberFormat);
4263 if (decfmt != nullptr) {
4264 if (isNegative) {
4265 suf = decfmt->getNegativeSuffix(suf);
4266 }
4267 else {
4268 suf = decfmt->getPositiveSuffix(suf);
4269 }
4270 }
4271
4272 // check for suffix
4273 if (suf.length() <= 0) {
4274 return start;
4275 }
4276
4277 // check suffix will be encountered in the pattern
4278 patternMatch = compareSimpleAffix(suf,fPattern,patLoc);
4279
4280 // check if a suffix will be encountered in the text
4281 textPreMatch = compareSimpleAffix(suf,text,start);
4282
4283 // check if a suffix was encountered in the text
4284 textPostMatch = compareSimpleAffix(suf,text,start-suf.length());
4285
4286 // check for suffix match
4287 if ((textPreMatch >= 0) && (patternMatch >= 0) && (textPreMatch == patternMatch)) {
4288 return start;
4289 }
4290 else if ((textPostMatch >= 0) && (patternMatch >= 0) && (textPostMatch == patternMatch)) {
4291 return start - suf.length();
4292 }
4293
4294 // should not get here
4295 return start;
4296 }
4297
4298 //----------------------------------------------------------------------
4299
4300 int32_t
compareSimpleAffix(const UnicodeString & affix,const UnicodeString & input,int32_t pos) const4301 SimpleDateFormat::compareSimpleAffix(const UnicodeString& affix,
4302 const UnicodeString& input,
4303 int32_t pos) const {
4304 int32_t start = pos;
4305 for (int32_t i=0; i<affix.length(); ) {
4306 UChar32 c = affix.char32At(i);
4307 int32_t len = U16_LENGTH(c);
4308 if (PatternProps::isWhiteSpace(c)) {
4309 // We may have a pattern like: \u200F \u0020
4310 // and input text like: \u200F \u0020
4311 // Note that U+200F and U+0020 are Pattern_White_Space but only
4312 // U+0020 is UWhiteSpace. So we have to first do a direct
4313 // match of the run of Pattern_White_Space in the pattern,
4314 // then match any extra characters.
4315 UBool literalMatch = false;
4316 while (pos < input.length() &&
4317 input.char32At(pos) == c) {
4318 literalMatch = true;
4319 i += len;
4320 pos += len;
4321 if (i == affix.length()) {
4322 break;
4323 }
4324 c = affix.char32At(i);
4325 len = U16_LENGTH(c);
4326 if (!PatternProps::isWhiteSpace(c)) {
4327 break;
4328 }
4329 }
4330
4331 // Advance over run in pattern
4332 i = skipPatternWhiteSpace(affix, i);
4333
4334 // Advance over run in input text
4335 // Must see at least one white space char in input,
4336 // unless we've already matched some characters literally.
4337 int32_t s = pos;
4338 pos = skipUWhiteSpace(input, pos);
4339 if (pos == s && !literalMatch) {
4340 return -1;
4341 }
4342
4343 // If we skip UWhiteSpace in the input text, we need to skip it in the pattern.
4344 // Otherwise, the previous lines may have skipped over text (such as U+00A0) that
4345 // is also in the affix.
4346 i = skipUWhiteSpace(affix, i);
4347 } else {
4348 if (pos < input.length() &&
4349 input.char32At(pos) == c) {
4350 i += len;
4351 pos += len;
4352 } else {
4353 return -1;
4354 }
4355 }
4356 }
4357 return pos - start;
4358 }
4359
4360 //----------------------------------------------------------------------
4361
4362 int32_t
skipPatternWhiteSpace(const UnicodeString & text,int32_t pos) const4363 SimpleDateFormat::skipPatternWhiteSpace(const UnicodeString& text, int32_t pos) const {
4364 const char16_t* s = text.getBuffer();
4365 return (int32_t)(PatternProps::skipWhiteSpace(s + pos, text.length() - pos) - s);
4366 }
4367
4368 //----------------------------------------------------------------------
4369
4370 int32_t
skipUWhiteSpace(const UnicodeString & text,int32_t pos) const4371 SimpleDateFormat::skipUWhiteSpace(const UnicodeString& text, int32_t pos) const {
4372 while (pos < text.length()) {
4373 UChar32 c = text.char32At(pos);
4374 if (!u_isUWhiteSpace(c)) {
4375 break;
4376 }
4377 pos += U16_LENGTH(c);
4378 }
4379 return pos;
4380 }
4381
4382 //----------------------------------------------------------------------
4383
4384 // Lazy TimeZoneFormat instantiation, semantically const.
4385 TimeZoneFormat *
tzFormat(UErrorCode & status) const4386 SimpleDateFormat::tzFormat(UErrorCode &status) const {
4387 Mutex m(&LOCK);
4388 if (fTimeZoneFormat == nullptr && U_SUCCESS(status)) {
4389 const_cast<SimpleDateFormat *>(this)->fTimeZoneFormat =
4390 TimeZoneFormat::createInstance(fLocale, status);
4391 }
4392 return fTimeZoneFormat;
4393 }
4394
parsePattern()4395 void SimpleDateFormat::parsePattern() {
4396 fHasMinute = false;
4397 fHasSecond = false;
4398 fHasHanYearChar = false;
4399
4400 int len = fPattern.length();
4401 UBool inQuote = false;
4402 for (int32_t i = 0; i < len; ++i) {
4403 char16_t ch = fPattern[i];
4404 if (ch == QUOTE) {
4405 inQuote = !inQuote;
4406 }
4407 if (ch == 0x5E74) { // don't care whether this is inside quotes
4408 fHasHanYearChar = true;
4409 }
4410 if (!inQuote) {
4411 if (ch == 0x6D) { // 0x6D == 'm'
4412 fHasMinute = true;
4413 }
4414 if (ch == 0x73) { // 0x73 == 's'
4415 fHasSecond = true;
4416 }
4417 }
4418 }
4419 }
4420
4421 U_NAMESPACE_END
4422
4423 #endif /* #if !UCONFIG_NO_FORMATTING */
4424
4425 //eof
4426