• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2007-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 *
9 * File DTPTNGEN.CPP
10 *
11 *******************************************************************************
12 */
13 
14 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_FORMATTING
16 
17 #include "unicode/datefmt.h"
18 #include "unicode/decimfmt.h"
19 #include "unicode/dtfmtsym.h"
20 #include "unicode/dtptngen.h"
21 #include "unicode/localpointer.h"
22 #include "unicode/simpleformatter.h"
23 #include "unicode/smpdtfmt.h"
24 #include "unicode/udat.h"
25 #include "unicode/udatpg.h"
26 #include "unicode/uniset.h"
27 #include "unicode/uloc.h"
28 #include "unicode/ures.h"
29 #include "unicode/ustring.h"
30 #include "unicode/rep.h"
31 #include "unicode/region.h"
32 #include "cpputils.h"
33 #include "mutex.h"
34 #include "umutex.h"
35 #include "cmemory.h"
36 #include "cstring.h"
37 #include "locbased.h"
38 #include "hash.h"
39 #include "uhash.h"
40 #include "uresimp.h"
41 #include "dtptngen_impl.h"
42 #include "ucln_in.h"
43 #include "charstr.h"
44 #include "uassert.h"
45 
46 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
47 /**
48  * If we are on EBCDIC, use an iterator which will
49  * traverse the bundles in ASCII order.
50  */
51 #define U_USE_ASCII_BUNDLE_ITERATOR
52 #define U_SORT_ASCII_BUNDLE_ITERATOR
53 #endif
54 
55 #if defined(U_USE_ASCII_BUNDLE_ITERATOR)
56 
57 #include "unicode/ustring.h"
58 #include "uarrsort.h"
59 
60 struct UResAEntry {
61     UChar *key;
62     UResourceBundle *item;
63 };
64 
65 struct UResourceBundleAIterator {
66     UResourceBundle  *bund;
67     UResAEntry *entries;
68     int32_t num;
69     int32_t cursor;
70 };
71 
72 /* Must be C linkage to pass function pointer to the sort function */
73 
74 U_CDECL_BEGIN
75 
76 static int32_t U_CALLCONV
ures_a_codepointSort(const void * context,const void * left,const void * right)77 ures_a_codepointSort(const void *context, const void *left, const void *right) {
78     //CompareContext *cmp=(CompareContext *)context;
79     return u_strcmp(((const UResAEntry *)left)->key,
80                     ((const UResAEntry *)right)->key);
81 }
82 
83 U_CDECL_END
84 
ures_a_open(UResourceBundleAIterator * aiter,UResourceBundle * bund,UErrorCode * status)85 static void ures_a_open(UResourceBundleAIterator *aiter, UResourceBundle *bund, UErrorCode *status) {
86     if(U_FAILURE(*status)) {
87         return;
88     }
89     aiter->bund = bund;
90     aiter->num = ures_getSize(aiter->bund);
91     aiter->cursor = 0;
92 #if !defined(U_SORT_ASCII_BUNDLE_ITERATOR)
93     aiter->entries = nullptr;
94 #else
95     aiter->entries = (UResAEntry*)uprv_malloc(sizeof(UResAEntry)*aiter->num);
96     for(int i=0;i<aiter->num;i++) {
97         aiter->entries[i].item = ures_getByIndex(aiter->bund, i, nullptr, status);
98         const char *akey = ures_getKey(aiter->entries[i].item);
99         int32_t len = uprv_strlen(akey)+1;
100         aiter->entries[i].key = (UChar*)uprv_malloc(len*sizeof(UChar));
101         u_charsToUChars(akey, aiter->entries[i].key, len);
102     }
103     uprv_sortArray(aiter->entries, aiter->num, sizeof(UResAEntry), ures_a_codepointSort, nullptr, true, status);
104 #endif
105 }
106 
ures_a_close(UResourceBundleAIterator * aiter)107 static void ures_a_close(UResourceBundleAIterator *aiter) {
108 #if defined(U_SORT_ASCII_BUNDLE_ITERATOR)
109     for(int i=0;i<aiter->num;i++) {
110         uprv_free(aiter->entries[i].key);
111         ures_close(aiter->entries[i].item);
112     }
113 #endif
114 }
115 
ures_a_getNextString(UResourceBundleAIterator * aiter,int32_t * len,const char ** key,UErrorCode * err)116 static const UChar *ures_a_getNextString(UResourceBundleAIterator *aiter, int32_t *len, const char **key, UErrorCode *err) {
117 #if !defined(U_SORT_ASCII_BUNDLE_ITERATOR)
118     return ures_getNextString(aiter->bund, len, key, err);
119 #else
120     if(U_FAILURE(*err)) return nullptr;
121     UResourceBundle *item = aiter->entries[aiter->cursor].item;
122     const UChar* ret = ures_getString(item, len, err);
123     *key = ures_getKey(item);
124     aiter->cursor++;
125     return ret;
126 #endif
127 }
128 
129 
130 #endif
131 
132 
133 U_NAMESPACE_BEGIN
134 
135 // *****************************************************************************
136 // class DateTimePatternGenerator
137 // *****************************************************************************
138 static const UChar Canonical_Items[] = {
139     // GyQMwWEDFdaHmsSv
140     CAP_G, LOW_Y, CAP_Q, CAP_M, LOW_W, CAP_W, CAP_E,
141     CAP_D, CAP_F, LOW_D, LOW_A, // The UDATPG_x_FIELD constants and these fields have a different order than in ICU4J
142     CAP_H, LOW_M, LOW_S, CAP_S, LOW_V, 0
143 };
144 
145 static const dtTypeElem dtTypes[] = {
146     // patternChar, field, type, minLen, weight
147     {CAP_G, UDATPG_ERA_FIELD, DT_SHORT, 1, 3,},
148     {CAP_G, UDATPG_ERA_FIELD, DT_LONG,  4, 0},
149     {CAP_G, UDATPG_ERA_FIELD, DT_NARROW, 5, 0},
150 
151     {LOW_Y, UDATPG_YEAR_FIELD, DT_NUMERIC, 1, 20},
152     {CAP_Y, UDATPG_YEAR_FIELD, DT_NUMERIC + DT_DELTA, 1, 20},
153     {LOW_U, UDATPG_YEAR_FIELD, DT_NUMERIC + 2*DT_DELTA, 1, 20},
154     {LOW_R, UDATPG_YEAR_FIELD, DT_NUMERIC + 3*DT_DELTA, 1, 20},
155     {CAP_U, UDATPG_YEAR_FIELD, DT_SHORT, 1, 3},
156     {CAP_U, UDATPG_YEAR_FIELD, DT_LONG, 4, 0},
157     {CAP_U, UDATPG_YEAR_FIELD, DT_NARROW, 5, 0},
158 
159     {CAP_Q, UDATPG_QUARTER_FIELD, DT_NUMERIC, 1, 2},
160     {CAP_Q, UDATPG_QUARTER_FIELD, DT_SHORT, 3, 0},
161     {CAP_Q, UDATPG_QUARTER_FIELD, DT_LONG, 4, 0},
162     {CAP_Q, UDATPG_QUARTER_FIELD, DT_NARROW, 5, 0},
163     {LOW_Q, UDATPG_QUARTER_FIELD, DT_NUMERIC + DT_DELTA, 1, 2},
164     {LOW_Q, UDATPG_QUARTER_FIELD, DT_SHORT - DT_DELTA, 3, 0},
165     {LOW_Q, UDATPG_QUARTER_FIELD, DT_LONG - DT_DELTA, 4, 0},
166     {LOW_Q, UDATPG_QUARTER_FIELD, DT_NARROW - DT_DELTA, 5, 0},
167 
168     {CAP_M, UDATPG_MONTH_FIELD, DT_NUMERIC, 1, 2},
169     {CAP_M, UDATPG_MONTH_FIELD, DT_SHORT, 3, 0},
170     {CAP_M, UDATPG_MONTH_FIELD, DT_LONG, 4, 0},
171     {CAP_M, UDATPG_MONTH_FIELD, DT_NARROW, 5, 0},
172     {CAP_L, UDATPG_MONTH_FIELD, DT_NUMERIC + DT_DELTA, 1, 2},
173     {CAP_L, UDATPG_MONTH_FIELD, DT_SHORT - DT_DELTA, 3, 0},
174     {CAP_L, UDATPG_MONTH_FIELD, DT_LONG - DT_DELTA, 4, 0},
175     {CAP_L, UDATPG_MONTH_FIELD, DT_NARROW - DT_DELTA, 5, 0},
176     {LOW_L, UDATPG_MONTH_FIELD, DT_NUMERIC + DT_DELTA, 1, 1},
177 
178     {LOW_W, UDATPG_WEEK_OF_YEAR_FIELD, DT_NUMERIC, 1, 2},
179 
180     {CAP_W, UDATPG_WEEK_OF_MONTH_FIELD, DT_NUMERIC, 1, 0},
181 
182     {CAP_E, UDATPG_WEEKDAY_FIELD, DT_SHORT, 1, 3},
183     {CAP_E, UDATPG_WEEKDAY_FIELD, DT_LONG, 4, 0},
184     {CAP_E, UDATPG_WEEKDAY_FIELD, DT_NARROW, 5, 0},
185     {CAP_E, UDATPG_WEEKDAY_FIELD, DT_SHORTER, 6, 0},
186     {LOW_C, UDATPG_WEEKDAY_FIELD, DT_NUMERIC + 2*DT_DELTA, 1, 2},
187     {LOW_C, UDATPG_WEEKDAY_FIELD, DT_SHORT - 2*DT_DELTA, 3, 0},
188     {LOW_C, UDATPG_WEEKDAY_FIELD, DT_LONG - 2*DT_DELTA, 4, 0},
189     {LOW_C, UDATPG_WEEKDAY_FIELD, DT_NARROW - 2*DT_DELTA, 5, 0},
190     {LOW_C, UDATPG_WEEKDAY_FIELD, DT_SHORTER - 2*DT_DELTA, 6, 0},
191     {LOW_E, UDATPG_WEEKDAY_FIELD, DT_NUMERIC + DT_DELTA, 1, 2}, // LOW_E is currently not used in CLDR data, should not be canonical
192     {LOW_E, UDATPG_WEEKDAY_FIELD, DT_SHORT - DT_DELTA, 3, 0},
193     {LOW_E, UDATPG_WEEKDAY_FIELD, DT_LONG - DT_DELTA, 4, 0},
194     {LOW_E, UDATPG_WEEKDAY_FIELD, DT_NARROW - DT_DELTA, 5, 0},
195     {LOW_E, UDATPG_WEEKDAY_FIELD, DT_SHORTER - DT_DELTA, 6, 0},
196 
197     {LOW_D, UDATPG_DAY_FIELD, DT_NUMERIC, 1, 2},
198     {LOW_G, UDATPG_DAY_FIELD, DT_NUMERIC + DT_DELTA, 1, 20}, // really internal use, so we don't care
199 
200     {CAP_D, UDATPG_DAY_OF_YEAR_FIELD, DT_NUMERIC, 1, 3},
201 
202     {CAP_F, UDATPG_DAY_OF_WEEK_IN_MONTH_FIELD, DT_NUMERIC, 1, 0},
203 
204     {LOW_A, UDATPG_DAYPERIOD_FIELD, DT_SHORT, 1, 3},
205     {LOW_A, UDATPG_DAYPERIOD_FIELD, DT_LONG, 4, 0},
206     {LOW_A, UDATPG_DAYPERIOD_FIELD, DT_NARROW, 5, 0},
207     {LOW_B, UDATPG_DAYPERIOD_FIELD, DT_SHORT - DT_DELTA, 1, 3},
208     {LOW_B, UDATPG_DAYPERIOD_FIELD, DT_LONG - DT_DELTA, 4, 0},
209     {LOW_B, UDATPG_DAYPERIOD_FIELD, DT_NARROW - DT_DELTA, 5, 0},
210     // b needs to be closer to a than to B, so we make this 3*DT_DELTA
211     {CAP_B, UDATPG_DAYPERIOD_FIELD, DT_SHORT - 3*DT_DELTA, 1, 3},
212     {CAP_B, UDATPG_DAYPERIOD_FIELD, DT_LONG - 3*DT_DELTA, 4, 0},
213     {CAP_B, UDATPG_DAYPERIOD_FIELD, DT_NARROW - 3*DT_DELTA, 5, 0},
214 
215     {CAP_H, UDATPG_HOUR_FIELD, DT_NUMERIC + 10*DT_DELTA, 1, 2}, // 24 hour
216     {LOW_K, UDATPG_HOUR_FIELD, DT_NUMERIC + 11*DT_DELTA, 1, 2}, // 24 hour
217     {LOW_H, UDATPG_HOUR_FIELD, DT_NUMERIC, 1, 2}, // 12 hour
218     {CAP_K, UDATPG_HOUR_FIELD, DT_NUMERIC + DT_DELTA, 1, 2}, // 12 hour
219     // The C code has had versions of the following 3, keep & update. Should not need these, but...
220     // Without these, certain tests using e.g. staticGetSkeleton fail because j/J in patterns
221     // get skipped instead of mapped to the right hour chars, for example in
222     //   DateFormatTest::TestPatternFromSkeleton
223     //   IntlTestDateTimePatternGeneratorAPI:: testStaticGetSkeleton
224     //   DateIntervalFormatTest::testTicket11985
225     // Need to investigate better handling of jJC replacement e.g. in staticGetSkeleton.
226     {CAP_J, UDATPG_HOUR_FIELD, DT_NUMERIC + 5*DT_DELTA, 1, 2}, // 12/24 hour no AM/PM
227     {LOW_J, UDATPG_HOUR_FIELD, DT_NUMERIC + 6*DT_DELTA, 1, 6}, // 12/24 hour
228     {CAP_C, UDATPG_HOUR_FIELD, DT_NUMERIC + 7*DT_DELTA, 1, 6}, // 12/24 hour with preferred dayPeriods for 12
229 
230     {LOW_M, UDATPG_MINUTE_FIELD, DT_NUMERIC, 1, 2},
231 
232     {LOW_S, UDATPG_SECOND_FIELD, DT_NUMERIC, 1, 2},
233     {CAP_A, UDATPG_SECOND_FIELD, DT_NUMERIC + DT_DELTA, 1, 1000},
234 
235     {CAP_S, UDATPG_FRACTIONAL_SECOND_FIELD, DT_NUMERIC, 1, 1000},
236 
237     {LOW_V, UDATPG_ZONE_FIELD, DT_SHORT - 2*DT_DELTA, 1, 0},
238     {LOW_V, UDATPG_ZONE_FIELD, DT_LONG - 2*DT_DELTA, 4, 0},
239     {LOW_Z, UDATPG_ZONE_FIELD, DT_SHORT, 1, 3},
240     {LOW_Z, UDATPG_ZONE_FIELD, DT_LONG, 4, 0},
241     {CAP_Z, UDATPG_ZONE_FIELD, DT_NARROW - DT_DELTA, 1, 3},
242     {CAP_Z, UDATPG_ZONE_FIELD, DT_LONG - DT_DELTA, 4, 0},
243     {CAP_Z, UDATPG_ZONE_FIELD, DT_SHORT - DT_DELTA, 5, 0},
244     {CAP_O, UDATPG_ZONE_FIELD, DT_SHORT - DT_DELTA, 1, 0},
245     {CAP_O, UDATPG_ZONE_FIELD, DT_LONG - DT_DELTA, 4, 0},
246     {CAP_V, UDATPG_ZONE_FIELD, DT_SHORT - DT_DELTA, 1, 0},
247     {CAP_V, UDATPG_ZONE_FIELD, DT_LONG - DT_DELTA, 2, 0},
248     {CAP_V, UDATPG_ZONE_FIELD, DT_LONG-1 - DT_DELTA, 3, 0},
249     {CAP_V, UDATPG_ZONE_FIELD, DT_LONG-2 - DT_DELTA, 4, 0},
250     {CAP_X, UDATPG_ZONE_FIELD, DT_NARROW - DT_DELTA, 1, 0},
251     {CAP_X, UDATPG_ZONE_FIELD, DT_SHORT - DT_DELTA, 2, 0},
252     {CAP_X, UDATPG_ZONE_FIELD, DT_LONG - DT_DELTA, 4, 0},
253     {LOW_X, UDATPG_ZONE_FIELD, DT_NARROW - DT_DELTA, 1, 0},
254     {LOW_X, UDATPG_ZONE_FIELD, DT_SHORT - DT_DELTA, 2, 0},
255     {LOW_X, UDATPG_ZONE_FIELD, DT_LONG - DT_DELTA, 4, 0},
256 
257     {0, UDATPG_FIELD_COUNT, 0, 0, 0} , // last row of dtTypes[]
258  };
259 
260 static const char* const CLDR_FIELD_APPEND[] = {
261     "Era", "Year", "Quarter", "Month", "Week", "*", "Day-Of-Week",
262     "*", "*", "Day", "*", // The UDATPG_x_FIELD constants and these fields have a different order than in ICU4J
263     "Hour", "Minute", "Second", "*", "Timezone"
264 };
265 
266 static const char* const CLDR_FIELD_NAME[UDATPG_FIELD_COUNT] = {
267     "era", "year", "quarter", "month", "week", "weekOfMonth", "weekday",
268     "dayOfYear", "weekdayOfMonth", "day", "dayperiod", // The UDATPG_x_FIELD constants and these fields have a different order than in ICU4J
269     "hour", "minute", "second", "*", "zone"
270 };
271 
272 static const char* const CLDR_FIELD_WIDTH[] = { // [UDATPG_WIDTH_COUNT]
273     "", "-short", "-narrow"
274 };
275 
276 static constexpr UDateTimePGDisplayWidth UDATPG_WIDTH_APPENDITEM = UDATPG_WIDE;
277 static constexpr int32_t UDATPG_FIELD_KEY_MAX = 24; // max length of CLDR field tag (type + width)
278 
279 // For appendItems
280 static const UChar UDATPG_ItemFormat[]= {0x7B, 0x30, 0x7D, 0x20, 0x251C, 0x7B, 0x32, 0x7D, 0x3A,
281     0x20, 0x7B, 0x31, 0x7D, 0x2524, 0};  // {0} \u251C{2}: {1}\u2524
282 
283 //static const UChar repeatedPatterns[6]={CAP_G, CAP_E, LOW_Z, LOW_V, CAP_Q, 0}; // "GEzvQ"
284 
285 static const char DT_DateTimePatternsTag[]="DateTimePatterns";
286 static const char DT_DateAtTimePatternsTag[]="DateTimePatterns%atTime";
287 static const char DT_DateTimeCalendarTag[]="calendar";
288 static const char DT_DateTimeGregorianTag[]="gregorian";
289 static const char DT_DateTimeAppendItemsTag[]="appendItems";
290 static const char DT_DateTimeFieldsTag[]="fields";
291 static const char DT_DateTimeAvailableFormatsTag[]="availableFormats";
292 //static const UnicodeString repeatedPattern=UnicodeString(repeatedPatterns);
293 
294 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateTimePatternGenerator)
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DTSkeletonEnumeration)295 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DTSkeletonEnumeration)
296 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DTRedundantEnumeration)
297 
298 DateTimePatternGenerator*  U_EXPORT2
299 DateTimePatternGenerator::createInstance(UErrorCode& status) {
300     return createInstance(Locale::getDefault(), status);
301 }
302 
303 DateTimePatternGenerator* U_EXPORT2
createInstance(const Locale & locale,UErrorCode & status)304 DateTimePatternGenerator::createInstance(const Locale& locale, UErrorCode& status) {
305     if (U_FAILURE(status)) {
306         return nullptr;
307     }
308     LocalPointer<DateTimePatternGenerator> result(
309             new DateTimePatternGenerator(locale, status), status);
310     return U_SUCCESS(status) ? result.orphan() : nullptr;
311 }
312 
313 DateTimePatternGenerator* U_EXPORT2
createInstanceNoStdPat(const Locale & locale,UErrorCode & status)314 DateTimePatternGenerator::createInstanceNoStdPat(const Locale& locale, UErrorCode& status) {
315     if (U_FAILURE(status)) {
316         return nullptr;
317     }
318     LocalPointer<DateTimePatternGenerator> result(
319             new DateTimePatternGenerator(locale, status, true), status);
320     return U_SUCCESS(status) ? result.orphan() : nullptr;
321 }
322 
323 DateTimePatternGenerator*  U_EXPORT2
createEmptyInstance(UErrorCode & status)324 DateTimePatternGenerator::createEmptyInstance(UErrorCode& status) {
325     if (U_FAILURE(status)) {
326         return nullptr;
327     }
328     LocalPointer<DateTimePatternGenerator> result(
329             new DateTimePatternGenerator(status), status);
330     return U_SUCCESS(status) ? result.orphan() : nullptr;
331 }
332 
DateTimePatternGenerator(UErrorCode & status)333 DateTimePatternGenerator::DateTimePatternGenerator(UErrorCode &status) :
334     skipMatcher(nullptr),
335     fAvailableFormatKeyHash(nullptr),
336     fDefaultHourFormatChar(0),
337     internalErrorCode(U_ZERO_ERROR)
338 {
339     fp = new FormatParser();
340     dtMatcher = new DateTimeMatcher();
341     distanceInfo = new DistanceInfo();
342     patternMap = new PatternMap();
343     if (fp == nullptr || dtMatcher == nullptr || distanceInfo == nullptr || patternMap == nullptr) {
344         internalErrorCode = status = U_MEMORY_ALLOCATION_ERROR;
345     }
346 }
347 
DateTimePatternGenerator(const Locale & locale,UErrorCode & status,UBool skipStdPatterns)348 DateTimePatternGenerator::DateTimePatternGenerator(const Locale& locale, UErrorCode &status, UBool skipStdPatterns) :
349     skipMatcher(nullptr),
350     fAvailableFormatKeyHash(nullptr),
351     fDefaultHourFormatChar(0),
352     internalErrorCode(U_ZERO_ERROR)
353 {
354     fp = new FormatParser();
355     dtMatcher = new DateTimeMatcher();
356     distanceInfo = new DistanceInfo();
357     patternMap = new PatternMap();
358     if (fp == nullptr || dtMatcher == nullptr || distanceInfo == nullptr || patternMap == nullptr) {
359         internalErrorCode = status = U_MEMORY_ALLOCATION_ERROR;
360     }
361     else {
362         initData(locale, status, skipStdPatterns);
363     }
364 }
365 
DateTimePatternGenerator(const DateTimePatternGenerator & other)366 DateTimePatternGenerator::DateTimePatternGenerator(const DateTimePatternGenerator& other) :
367     UObject(),
368     skipMatcher(nullptr),
369     fAvailableFormatKeyHash(nullptr),
370     fDefaultHourFormatChar(0),
371     internalErrorCode(U_ZERO_ERROR)
372 {
373     fp = new FormatParser();
374     dtMatcher = new DateTimeMatcher();
375     distanceInfo = new DistanceInfo();
376     patternMap = new PatternMap();
377     if (fp == nullptr || dtMatcher == nullptr || distanceInfo == nullptr || patternMap == nullptr) {
378         internalErrorCode = U_MEMORY_ALLOCATION_ERROR;
379     }
380     *this=other;
381 }
382 
383 DateTimePatternGenerator&
operator =(const DateTimePatternGenerator & other)384 DateTimePatternGenerator::operator=(const DateTimePatternGenerator& other) {
385     // reflexive case
386     if (&other == this) {
387         return *this;
388     }
389     internalErrorCode = other.internalErrorCode;
390     pLocale = other.pLocale;
391     fDefaultHourFormatChar = other.fDefaultHourFormatChar;
392     *fp = *(other.fp);
393     dtMatcher->copyFrom(other.dtMatcher->skeleton);
394     *distanceInfo = *(other.distanceInfo);
395     for (int32_t style = UDAT_FULL; style <= UDAT_SHORT; style++) {
396         dateTimeFormat[style] = other.dateTimeFormat[style];
397     }
398     decimal = other.decimal;
399     for (int32_t style = UDAT_FULL; style <= UDAT_SHORT; style++) {
400         dateTimeFormat[style].getTerminatedBuffer(); // NUL-terminate for the C API.
401     }
402     decimal.getTerminatedBuffer();
403     delete skipMatcher;
404     if ( other.skipMatcher == nullptr ) {
405         skipMatcher = nullptr;
406     }
407     else {
408         skipMatcher = new DateTimeMatcher(*other.skipMatcher);
409         if (skipMatcher == nullptr)
410         {
411             internalErrorCode = U_MEMORY_ALLOCATION_ERROR;
412             return *this;
413         }
414     }
415     for (int32_t i=0; i< UDATPG_FIELD_COUNT; ++i ) {
416         appendItemFormats[i] = other.appendItemFormats[i];
417         appendItemFormats[i].getTerminatedBuffer(); // NUL-terminate for the C API.
418         for (int32_t j=0; j< UDATPG_WIDTH_COUNT; ++j ) {
419             fieldDisplayNames[i][j] = other.fieldDisplayNames[i][j];
420             fieldDisplayNames[i][j].getTerminatedBuffer(); // NUL-terminate for the C API.
421         }
422     }
423     patternMap->copyFrom(*other.patternMap, internalErrorCode);
424     copyHashtable(other.fAvailableFormatKeyHash, internalErrorCode);
425     return *this;
426 }
427 
428 
429 bool
operator ==(const DateTimePatternGenerator & other) const430 DateTimePatternGenerator::operator==(const DateTimePatternGenerator& other) const {
431     if (this == &other) {
432         return true;
433     }
434     if ((pLocale==other.pLocale) && (patternMap->equals(*other.patternMap)) &&
435         (decimal==other.decimal)) {
436         for (int32_t style = UDAT_FULL; style <= UDAT_SHORT; style++) {
437             if (dateTimeFormat[style] != other.dateTimeFormat[style]) {
438                 return false;
439             }
440         }
441         for ( int32_t i=0 ; i<UDATPG_FIELD_COUNT; ++i ) {
442             if (appendItemFormats[i] != other.appendItemFormats[i]) {
443                 return false;
444             }
445             for (int32_t j=0; j< UDATPG_WIDTH_COUNT; ++j ) {
446                 if (fieldDisplayNames[i][j] != other.fieldDisplayNames[i][j]) {
447                     return false;
448                 }
449             }
450         }
451         return true;
452     }
453     else {
454         return false;
455     }
456 }
457 
458 bool
operator !=(const DateTimePatternGenerator & other) const459 DateTimePatternGenerator::operator!=(const DateTimePatternGenerator& other) const {
460     return  !operator==(other);
461 }
462 
~DateTimePatternGenerator()463 DateTimePatternGenerator::~DateTimePatternGenerator() {
464     if (fAvailableFormatKeyHash!=nullptr) {
465         delete fAvailableFormatKeyHash;
466     }
467 
468     if (fp != nullptr) delete fp;
469     if (dtMatcher != nullptr) delete dtMatcher;
470     if (distanceInfo != nullptr) delete distanceInfo;
471     if (patternMap != nullptr) delete patternMap;
472     if (skipMatcher != nullptr) delete skipMatcher;
473 }
474 
475 namespace {
476 
477 UInitOnce initOnce {};
478 UHashtable *localeToAllowedHourFormatsMap = nullptr;
479 
480 // Value deleter for hashmap.
deleteAllowedHourFormats(void * ptr)481 U_CFUNC void U_CALLCONV deleteAllowedHourFormats(void *ptr) {
482     uprv_free(ptr);
483 }
484 
485 // Close hashmap at cleanup.
allowedHourFormatsCleanup()486 U_CFUNC UBool U_CALLCONV allowedHourFormatsCleanup() {
487     uhash_close(localeToAllowedHourFormatsMap);
488     return true;
489 }
490 
491 enum AllowedHourFormat{
492     ALLOWED_HOUR_FORMAT_UNKNOWN = -1,
493     ALLOWED_HOUR_FORMAT_h,
494     ALLOWED_HOUR_FORMAT_H,
495     ALLOWED_HOUR_FORMAT_K,  // Added ICU-20383, used by JP
496     ALLOWED_HOUR_FORMAT_k,  // Added ICU-20383, not currently used
497     ALLOWED_HOUR_FORMAT_hb,
498     ALLOWED_HOUR_FORMAT_hB,
499     ALLOWED_HOUR_FORMAT_Kb, // Added ICU-20383, not currently used
500     ALLOWED_HOUR_FORMAT_KB, // Added ICU-20383, not currently used
501     // ICU-20383 The following are unlikely and not currently used
502     ALLOWED_HOUR_FORMAT_Hb,
503     ALLOWED_HOUR_FORMAT_HB
504 };
505 
506 }  // namespace
507 
508 void
initData(const Locale & locale,UErrorCode & status,UBool skipStdPatterns)509 DateTimePatternGenerator::initData(const Locale& locale, UErrorCode &status, UBool skipStdPatterns) {
510     //const char *baseLangName = locale.getBaseName(); // unused
511 
512     skipMatcher = nullptr;
513     fAvailableFormatKeyHash=nullptr;
514     addCanonicalItems(status);
515     if (!skipStdPatterns) { // skip to prevent circular dependency when called from SimpleDateFormat::construct
516         addICUPatterns(locale, status);
517     }
518     addCLDRData(locale, status);
519     setDateTimeFromCalendar(locale, status);
520     setDecimalSymbols(locale, status);
521     umtx_initOnce(initOnce, loadAllowedHourFormatsData, status);
522     getAllowedHourFormats(locale, status);
523     // If any of the above methods failed then the object is in an invalid state.
524     internalErrorCode = status;
525 } // DateTimePatternGenerator::initData
526 
527 namespace {
528 
529 struct AllowedHourFormatsSink : public ResourceSink {
530     // Initialize sub-sinks.
AllowedHourFormatsSink__anonfb78e61e0211::AllowedHourFormatsSink531     AllowedHourFormatsSink() {}
532     virtual ~AllowedHourFormatsSink();
533 
put__anonfb78e61e0211::AllowedHourFormatsSink534     virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/,
535                      UErrorCode &errorCode) override {
536         ResourceTable timeData = value.getTable(errorCode);
537         if (U_FAILURE(errorCode)) { return; }
538         for (int32_t i = 0; timeData.getKeyAndValue(i, key, value); ++i) {
539             const char *regionOrLocale = key;
540             ResourceTable formatList = value.getTable(errorCode);
541             if (U_FAILURE(errorCode)) { return; }
542             // below we construct a list[] that has an entry for the "preferred" value at [0],
543             // followed by 1 or more entries for the "allowed" values, terminated with an
544             // entry for ALLOWED_HOUR_FORMAT_UNKNOWN (not included in length below)
545             LocalMemory<int32_t> list;
546             int32_t length = 0;
547             int32_t preferredFormat = ALLOWED_HOUR_FORMAT_UNKNOWN;
548             for (int32_t j = 0; formatList.getKeyAndValue(j, key, value); ++j) {
549                 if (uprv_strcmp(key, "allowed") == 0) {
550                     if (value.getType() == URES_STRING) {
551                         length = 2; // 1 preferred to add later, 1 allowed to add now
552                         if (list.allocateInsteadAndReset(length + 1) == nullptr) {
553                             errorCode = U_MEMORY_ALLOCATION_ERROR;
554                             return;
555                         }
556                         list[1] = getHourFormatFromUnicodeString(value.getUnicodeString(errorCode));
557                     }
558                     else {
559                         ResourceArray allowedFormats = value.getArray(errorCode);
560                         length = allowedFormats.getSize() + 1; // 1 preferred, getSize allowed
561                         if (list.allocateInsteadAndReset(length + 1) == nullptr) {
562                             errorCode = U_MEMORY_ALLOCATION_ERROR;
563                             return;
564                         }
565                         for (int32_t k = 1; k < length; ++k) {
566                             allowedFormats.getValue(k-1, value);
567                             list[k] = getHourFormatFromUnicodeString(value.getUnicodeString(errorCode));
568                         }
569                     }
570                 } else if (uprv_strcmp(key, "preferred") == 0) {
571                     preferredFormat = getHourFormatFromUnicodeString(value.getUnicodeString(errorCode));
572                 }
573             }
574             if (length > 1) {
575                 list[0] = (preferredFormat!=ALLOWED_HOUR_FORMAT_UNKNOWN)? preferredFormat: list[1];
576             } else {
577                 // fallback handling for missing data
578                 length = 2; // 1 preferred, 1 allowed
579                 if (list.allocateInsteadAndReset(length + 1) == nullptr) {
580                     errorCode = U_MEMORY_ALLOCATION_ERROR;
581                     return;
582                 }
583                 list[0] = (preferredFormat!=ALLOWED_HOUR_FORMAT_UNKNOWN)? preferredFormat: ALLOWED_HOUR_FORMAT_H;
584                 list[1] = list[0];
585             }
586             list[length] = ALLOWED_HOUR_FORMAT_UNKNOWN;
587             // At this point list[] will have at least two non-ALLOWED_HOUR_FORMAT_UNKNOWN entries,
588             // followed by ALLOWED_HOUR_FORMAT_UNKNOWN.
589             uhash_put(localeToAllowedHourFormatsMap, const_cast<char *>(regionOrLocale), list.orphan(), &errorCode);
590             if (U_FAILURE(errorCode)) { return; }
591         }
592     }
593 
getHourFormatFromUnicodeString__anonfb78e61e0211::AllowedHourFormatsSink594     AllowedHourFormat getHourFormatFromUnicodeString(const UnicodeString &s) {
595         if (s.length() == 1) {
596             if (s[0] == LOW_H) { return ALLOWED_HOUR_FORMAT_h; }
597             if (s[0] == CAP_H) { return ALLOWED_HOUR_FORMAT_H; }
598             if (s[0] == CAP_K) { return ALLOWED_HOUR_FORMAT_K; }
599             if (s[0] == LOW_K) { return ALLOWED_HOUR_FORMAT_k; }
600         } else if (s.length() == 2) {
601             if (s[0] == LOW_H && s[1] == LOW_B) { return ALLOWED_HOUR_FORMAT_hb; }
602             if (s[0] == LOW_H && s[1] == CAP_B) { return ALLOWED_HOUR_FORMAT_hB; }
603             if (s[0] == CAP_K && s[1] == LOW_B) { return ALLOWED_HOUR_FORMAT_Kb; }
604             if (s[0] == CAP_K && s[1] == CAP_B) { return ALLOWED_HOUR_FORMAT_KB; }
605             if (s[0] == CAP_H && s[1] == LOW_B) { return ALLOWED_HOUR_FORMAT_Hb; }
606             if (s[0] == CAP_H && s[1] == CAP_B) { return ALLOWED_HOUR_FORMAT_HB; }
607         }
608 
609         return ALLOWED_HOUR_FORMAT_UNKNOWN;
610     }
611 };
612 
613 }  // namespace
614 
~AllowedHourFormatsSink()615 AllowedHourFormatsSink::~AllowedHourFormatsSink() {}
616 
loadAllowedHourFormatsData(UErrorCode & status)617 U_CFUNC void U_CALLCONV DateTimePatternGenerator::loadAllowedHourFormatsData(UErrorCode &status) {
618     if (U_FAILURE(status)) { return; }
619     localeToAllowedHourFormatsMap = uhash_open(
620         uhash_hashChars, uhash_compareChars, nullptr, &status);
621     if (U_FAILURE(status)) { return; }
622 
623     uhash_setValueDeleter(localeToAllowedHourFormatsMap, deleteAllowedHourFormats);
624     ucln_i18n_registerCleanup(UCLN_I18N_ALLOWED_HOUR_FORMATS, allowedHourFormatsCleanup);
625 
626     LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "supplementalData", &status));
627     if (U_FAILURE(status)) { return; }
628 
629     AllowedHourFormatsSink sink;
630     // TODO: Currently in the enumeration each table allocates a new array.
631     // Try to reduce the number of memory allocations. Consider storing a
632     // UVector32 with the concatenation of all of the sub-arrays, put the start index
633     // into the hashmap, store 6 single-value sub-arrays right at the beginning of the
634     // vector (at index enum*2) for easy data sharing, copy sub-arrays into runtime
635     // object. Remember to clean up the vector, too.
636     ures_getAllItemsWithFallback(rb.getAlias(), "timeData", sink, status);
637 }
638 
getAllowedHourFormatsLangCountry(const char * language,const char * country,UErrorCode & status)639 static int32_t* getAllowedHourFormatsLangCountry(const char* language, const char* country, UErrorCode& status) {
640     CharString langCountry;
641     langCountry.append(language, status);
642     langCountry.append('_', status);
643     langCountry.append(country, status);
644 
645     int32_t* allowedFormats;
646     allowedFormats = (int32_t *)uhash_get(localeToAllowedHourFormatsMap, langCountry.data());
647     if (allowedFormats == nullptr) {
648         allowedFormats = (int32_t *)uhash_get(localeToAllowedHourFormatsMap, const_cast<char *>(country));
649     }
650 
651     return allowedFormats;
652 }
653 
getAllowedHourFormats(const Locale & locale,UErrorCode & status)654 void DateTimePatternGenerator::getAllowedHourFormats(const Locale &locale, UErrorCode &status) {
655     if (U_FAILURE(status)) { return; }
656 
657     const char *language = locale.getLanguage();
658     const char *country = locale.getCountry();
659 
660     char regionOverride[8];
661     int32_t regionOverrideLength = locale.getKeywordValue("rg", regionOverride, sizeof(regionOverride), status);
662     if (U_SUCCESS(status) && regionOverrideLength > 0) {
663         country = regionOverride;
664         if (regionOverrideLength > 2) {
665             // chop off any subdivision codes that may have been included
666             regionOverride[2] = '\0';
667         }
668     }
669 
670     Locale maxLocale;  // must be here for correct lifetime
671     if (*language == '\0' || *country == '\0') {
672         maxLocale = locale;
673         UErrorCode localStatus = U_ZERO_ERROR;
674         maxLocale.addLikelySubtags(localStatus);
675         if (U_SUCCESS(localStatus)) {
676             language = maxLocale.getLanguage();
677             country = maxLocale.getCountry();
678         }
679     }
680     if (*language == '\0') {
681         // Unexpected, but fail gracefully
682         language = "und";
683     }
684     if (*country == '\0') {
685         country = "001";
686     }
687 
688     int32_t* allowedFormats = getAllowedHourFormatsLangCountry(language, country, status);
689 
690     // We need to check if there is an hour cycle on locale
691     char buffer[8];
692     int32_t count = locale.getKeywordValue("hours", buffer, sizeof(buffer), status);
693 
694     fDefaultHourFormatChar = 0;
695     if (U_SUCCESS(status) && count > 0) {
696         if(uprv_strcmp(buffer, "h24") == 0) {
697             fDefaultHourFormatChar = LOW_K;
698         } else if(uprv_strcmp(buffer, "h23") == 0) {
699             fDefaultHourFormatChar = CAP_H;
700         } else if(uprv_strcmp(buffer, "h12") == 0) {
701             fDefaultHourFormatChar = LOW_H;
702         } else if(uprv_strcmp(buffer, "h11") == 0) {
703             fDefaultHourFormatChar = CAP_K;
704         }
705     }
706 
707     // Check if the region has an alias
708     if (allowedFormats == nullptr) {
709         UErrorCode localStatus = U_ZERO_ERROR;
710         const Region* region = Region::getInstance(country, localStatus);
711         if (U_SUCCESS(localStatus)) {
712             country = region->getRegionCode(); // the real region code
713             allowedFormats = getAllowedHourFormatsLangCountry(language, country, status);
714         }
715     }
716 
717     if (allowedFormats != nullptr) {  // Lookup is successful
718         // Here allowedFormats points to a list consisting of key for preferredFormat,
719         // followed by one or more keys for allowedFormats, then followed by ALLOWED_HOUR_FORMAT_UNKNOWN.
720         if (!fDefaultHourFormatChar) {
721             switch (allowedFormats[0]) {
722                 case ALLOWED_HOUR_FORMAT_h: fDefaultHourFormatChar = LOW_H; break;
723                 case ALLOWED_HOUR_FORMAT_H: fDefaultHourFormatChar = CAP_H; break;
724                 case ALLOWED_HOUR_FORMAT_K: fDefaultHourFormatChar = CAP_K; break;
725                 case ALLOWED_HOUR_FORMAT_k: fDefaultHourFormatChar = LOW_K; break;
726                 default: fDefaultHourFormatChar = CAP_H; break;
727             }
728         }
729 
730         for (int32_t i = 0; i < UPRV_LENGTHOF(fAllowedHourFormats); ++i) {
731             fAllowedHourFormats[i] = allowedFormats[i + 1];
732             if (fAllowedHourFormats[i] == ALLOWED_HOUR_FORMAT_UNKNOWN) {
733                 break;
734             }
735         }
736     } else {  // Lookup failed, twice
737         if (!fDefaultHourFormatChar) {
738             fDefaultHourFormatChar = CAP_H;
739         }
740         fAllowedHourFormats[0] = ALLOWED_HOUR_FORMAT_H;
741         fAllowedHourFormats[1] = ALLOWED_HOUR_FORMAT_UNKNOWN;
742     }
743 }
744 
745 UDateFormatHourCycle
getDefaultHourCycle(UErrorCode & status) const746 DateTimePatternGenerator::getDefaultHourCycle(UErrorCode& status) const {
747     if (U_FAILURE(status)) {
748         return UDAT_HOUR_CYCLE_23;
749     }
750     if (fDefaultHourFormatChar == 0) {
751         // We need to return something, but the caller should ignore it
752         // anyways since the returned status is a failure.
753         status = U_UNSUPPORTED_ERROR;
754         return UDAT_HOUR_CYCLE_23;
755     }
756     switch (fDefaultHourFormatChar) {
757         case CAP_K:
758             return UDAT_HOUR_CYCLE_11;
759         case LOW_H:
760             return UDAT_HOUR_CYCLE_12;
761         case CAP_H:
762             return UDAT_HOUR_CYCLE_23;
763         case LOW_K:
764             return UDAT_HOUR_CYCLE_24;
765         default:
766             UPRV_UNREACHABLE_EXIT;
767     }
768 }
769 
770 UnicodeString
getSkeleton(const UnicodeString & pattern,UErrorCode &)771 DateTimePatternGenerator::getSkeleton(const UnicodeString& pattern, UErrorCode&
772 /*status*/) {
773     FormatParser fp2;
774     DateTimeMatcher matcher;
775     PtnSkeleton localSkeleton;
776     matcher.set(pattern, &fp2, localSkeleton);
777     return localSkeleton.getSkeleton();
778 }
779 
780 UnicodeString
staticGetSkeleton(const UnicodeString & pattern,UErrorCode &)781 DateTimePatternGenerator::staticGetSkeleton(
782         const UnicodeString& pattern, UErrorCode& /*status*/) {
783     FormatParser fp;
784     DateTimeMatcher matcher;
785     PtnSkeleton localSkeleton;
786     matcher.set(pattern, &fp, localSkeleton);
787     return localSkeleton.getSkeleton();
788 }
789 
790 UnicodeString
getBaseSkeleton(const UnicodeString & pattern,UErrorCode &)791 DateTimePatternGenerator::getBaseSkeleton(const UnicodeString& pattern, UErrorCode& /*status*/) {
792     FormatParser fp2;
793     DateTimeMatcher matcher;
794     PtnSkeleton localSkeleton;
795     matcher.set(pattern, &fp2, localSkeleton);
796     return localSkeleton.getBaseSkeleton();
797 }
798 
799 UnicodeString
staticGetBaseSkeleton(const UnicodeString & pattern,UErrorCode &)800 DateTimePatternGenerator::staticGetBaseSkeleton(
801         const UnicodeString& pattern, UErrorCode& /*status*/) {
802     FormatParser fp;
803     DateTimeMatcher matcher;
804     PtnSkeleton localSkeleton;
805     matcher.set(pattern, &fp, localSkeleton);
806     return localSkeleton.getBaseSkeleton();
807 }
808 
809 void
addICUPatterns(const Locale & locale,UErrorCode & status)810 DateTimePatternGenerator::addICUPatterns(const Locale& locale, UErrorCode& status) {
811     if (U_FAILURE(status)) { return; }
812     UnicodeString dfPattern;
813     UnicodeString conflictingString;
814     DateFormat* df;
815 
816     // Load with ICU patterns
817     for (int32_t i=DateFormat::kFull; i<=DateFormat::kShort; i++) {
818         DateFormat::EStyle style = (DateFormat::EStyle)i;
819         df = DateFormat::createDateInstance(style, locale);
820         SimpleDateFormat* sdf;
821         if (df != nullptr && (sdf = dynamic_cast<SimpleDateFormat*>(df)) != nullptr) {
822             sdf->toPattern(dfPattern);
823             addPattern(dfPattern, false, conflictingString, status);
824         }
825         // TODO Maybe we should return an error when the date format isn't simple.
826         delete df;
827         if (U_FAILURE(status)) { return; }
828 
829         df = DateFormat::createTimeInstance(style, locale);
830         if (df != nullptr && (sdf = dynamic_cast<SimpleDateFormat*>(df)) != nullptr) {
831             sdf->toPattern(dfPattern);
832             addPattern(dfPattern, false, conflictingString, status);
833 
834             // TODO: C++ and Java are inconsistent (see #12568).
835             // C++ uses MEDIUM, but Java uses SHORT.
836             if ( i==DateFormat::kShort && !dfPattern.isEmpty() ) {
837                 consumeShortTimePattern(dfPattern, status);
838             }
839         }
840         // TODO Maybe we should return an error when the date format isn't simple.
841         delete df;
842         if (U_FAILURE(status)) { return; }
843     }
844 }
845 
846 void
hackTimes(const UnicodeString & hackPattern,UErrorCode & status)847 DateTimePatternGenerator::hackTimes(const UnicodeString& hackPattern, UErrorCode& status)  {
848     UnicodeString conflictingString;
849 
850     fp->set(hackPattern);
851     UnicodeString mmss;
852     UBool gotMm=false;
853     for (int32_t i=0; i<fp->itemNumber; ++i) {
854         UnicodeString field = fp->items[i];
855         if ( fp->isQuoteLiteral(field) ) {
856             if ( gotMm ) {
857                UnicodeString quoteLiteral;
858                fp->getQuoteLiteral(quoteLiteral, &i);
859                mmss += quoteLiteral;
860             }
861         }
862         else {
863             if (fp->isPatternSeparator(field) && gotMm) {
864                 mmss+=field;
865             }
866             else {
867                 UChar ch=field.charAt(0);
868                 if (ch==LOW_M) {
869                     gotMm=true;
870                     mmss+=field;
871                 }
872                 else {
873                     if (ch==LOW_S) {
874                         if (!gotMm) {
875                             break;
876                         }
877                         mmss+= field;
878                         addPattern(mmss, false, conflictingString, status);
879                         break;
880                     }
881                     else {
882                         if (gotMm || ch==LOW_Z || ch==CAP_Z || ch==LOW_V || ch==CAP_V) {
883                             break;
884                         }
885                     }
886                 }
887             }
888         }
889     }
890 }
891 
892 #define ULOC_LOCALE_IDENTIFIER_CAPACITY (ULOC_FULLNAME_CAPACITY + 1 + ULOC_KEYWORD_AND_VALUES_CAPACITY)
893 
894 void
getCalendarTypeToUse(const Locale & locale,CharString & destination,UErrorCode & err)895 DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString& destination, UErrorCode& err) {
896     destination.clear().append(DT_DateTimeGregorianTag, -1, err); // initial default
897     if ( U_SUCCESS(err) ) {
898         UErrorCode localStatus = U_ZERO_ERROR;
899         char localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY];
900         // obtain a locale that always has the calendar key value that should be used
901         ures_getFunctionalEquivalent(
902             localeWithCalendarKey,
903             ULOC_LOCALE_IDENTIFIER_CAPACITY,
904             nullptr,
905             "calendar",
906             "calendar",
907             locale.getName(),
908             nullptr,
909             false,
910             &localStatus);
911         localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY-1] = 0; // ensure null termination
912         // now get the calendar key value from that locale
913         char calendarType[ULOC_KEYWORDS_CAPACITY];
914         int32_t calendarTypeLen = uloc_getKeywordValue(
915             localeWithCalendarKey,
916             "calendar",
917             calendarType,
918             ULOC_KEYWORDS_CAPACITY,
919             &localStatus);
920         // If the input locale was invalid, don't fail with missing resource error, instead
921         // continue with default of Gregorian.
922         if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) {
923             err = localStatus;
924             return;
925         }
926         if (calendarTypeLen > 0 && calendarTypeLen < ULOC_KEYWORDS_CAPACITY) {
927             destination.clear().append(calendarType, -1, err);
928             if (U_FAILURE(err)) { return; }
929         }
930     }
931 }
932 
933 void
consumeShortTimePattern(const UnicodeString & shortTimePattern,UErrorCode & status)934 DateTimePatternGenerator::consumeShortTimePattern(const UnicodeString& shortTimePattern,
935         UErrorCode& status) {
936     if (U_FAILURE(status)) { return; }
937     // ICU-20383 No longer set fDefaultHourFormatChar to the hour format character from
938     // this pattern; instead it is set from localeToAllowedHourFormatsMap which now
939     // includes entries for both preferred and allowed formats.
940 
941     // HACK for hh:ss
942     hackTimes(shortTimePattern, status);
943 }
944 
945 struct DateTimePatternGenerator::AppendItemFormatsSink : public ResourceSink {
946 
947     // Destination for data, modified via setters.
948     DateTimePatternGenerator& dtpg;
949 
AppendItemFormatsSinkDateTimePatternGenerator::AppendItemFormatsSink950     AppendItemFormatsSink(DateTimePatternGenerator& _dtpg) : dtpg(_dtpg) {}
951     virtual ~AppendItemFormatsSink();
952 
putDateTimePatternGenerator::AppendItemFormatsSink953     virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/,
954             UErrorCode &errorCode) override {
955         UDateTimePatternField field = dtpg.getAppendFormatNumber(key);
956         if (field == UDATPG_FIELD_COUNT) { return; }
957         const UnicodeString& valueStr = value.getUnicodeString(errorCode);
958         if (dtpg.getAppendItemFormat(field).isEmpty() && !valueStr.isEmpty()) {
959             dtpg.setAppendItemFormat(field, valueStr);
960         }
961     }
962 
fillInMissingDateTimePatternGenerator::AppendItemFormatsSink963     void fillInMissing() {
964         UnicodeString defaultItemFormat(true, UDATPG_ItemFormat, UPRV_LENGTHOF(UDATPG_ItemFormat)-1);  // Read-only alias.
965         for (int32_t i = 0; i < UDATPG_FIELD_COUNT; i++) {
966             UDateTimePatternField field = (UDateTimePatternField)i;
967             if (dtpg.getAppendItemFormat(field).isEmpty()) {
968                 dtpg.setAppendItemFormat(field, defaultItemFormat);
969             }
970         }
971     }
972 };
973 
974 struct DateTimePatternGenerator::AppendItemNamesSink : public ResourceSink {
975 
976     // Destination for data, modified via setters.
977     DateTimePatternGenerator& dtpg;
978 
AppendItemNamesSinkDateTimePatternGenerator::AppendItemNamesSink979     AppendItemNamesSink(DateTimePatternGenerator& _dtpg) : dtpg(_dtpg) {}
980     virtual ~AppendItemNamesSink();
981 
putDateTimePatternGenerator::AppendItemNamesSink982     virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/,
983             UErrorCode &errorCode) override {
984         UDateTimePGDisplayWidth width;
985         UDateTimePatternField field = dtpg.getFieldAndWidthIndices(key, &width);
986         if (field == UDATPG_FIELD_COUNT) { return; }
987         ResourceTable detailsTable = value.getTable(errorCode);
988         if (U_FAILURE(errorCode)) { return; }
989         if (!detailsTable.findValue("dn", value)) { return; }
990         const UnicodeString& valueStr = value.getUnicodeString(errorCode);
991         if (U_SUCCESS(errorCode) && dtpg.getFieldDisplayName(field,width).isEmpty() && !valueStr.isEmpty()) {
992             dtpg.setFieldDisplayName(field,width,valueStr);
993         }
994     }
995 
fillInMissingDateTimePatternGenerator::AppendItemNamesSink996     void fillInMissing() {
997         for (int32_t i = 0; i < UDATPG_FIELD_COUNT; i++) {
998             UnicodeString& valueStr = dtpg.getMutableFieldDisplayName((UDateTimePatternField)i, UDATPG_WIDE);
999             if (valueStr.isEmpty()) {
1000                 valueStr = CAP_F;
1001                 U_ASSERT(i < 20);
1002                 if (i < 10) {
1003                     // F0, F1, ..., F9
1004                     valueStr += (UChar)(i+0x30);
1005                 } else {
1006                     // F10, F11, ...
1007                     valueStr += (UChar)0x31;
1008                     valueStr += (UChar)(i-10 + 0x30);
1009                 }
1010                 // NUL-terminate for the C API.
1011                 valueStr.getTerminatedBuffer();
1012             }
1013             for (int32_t j = 1; j < UDATPG_WIDTH_COUNT; j++) {
1014                 UnicodeString& valueStr2 = dtpg.getMutableFieldDisplayName((UDateTimePatternField)i, (UDateTimePGDisplayWidth)j);
1015                 if (valueStr2.isEmpty()) {
1016                     valueStr2 = dtpg.getFieldDisplayName((UDateTimePatternField)i, (UDateTimePGDisplayWidth)(j-1));
1017                 }
1018             }
1019         }
1020     }
1021 };
1022 
1023 struct DateTimePatternGenerator::AvailableFormatsSink : public ResourceSink {
1024 
1025     // Destination for data, modified via setters.
1026     DateTimePatternGenerator& dtpg;
1027 
1028     // Temporary variable, required for calling addPatternWithSkeleton.
1029     UnicodeString conflictingPattern;
1030 
AvailableFormatsSinkDateTimePatternGenerator::AvailableFormatsSink1031     AvailableFormatsSink(DateTimePatternGenerator& _dtpg) : dtpg(_dtpg) {}
1032     virtual ~AvailableFormatsSink();
1033 
putDateTimePatternGenerator::AvailableFormatsSink1034     virtual void put(const char *key, ResourceValue &value, UBool isRoot,
1035             UErrorCode &errorCode) override {
1036         const UnicodeString formatKey(key, -1, US_INV);
1037         if (!dtpg.isAvailableFormatSet(formatKey) ) {
1038             dtpg.setAvailableFormat(formatKey, errorCode);
1039             // Add pattern with its associated skeleton. Override any duplicate
1040             // derived from std patterns, but not a previous availableFormats entry:
1041             const UnicodeString& formatValue = value.getUnicodeString(errorCode);
1042             conflictingPattern.remove();
1043             dtpg.addPatternWithSkeleton(formatValue, &formatKey, !isRoot, conflictingPattern, errorCode);
1044         }
1045     }
1046 };
1047 
1048 // Virtual destructors must be defined out of line.
~AppendItemFormatsSink()1049 DateTimePatternGenerator::AppendItemFormatsSink::~AppendItemFormatsSink() {}
~AppendItemNamesSink()1050 DateTimePatternGenerator::AppendItemNamesSink::~AppendItemNamesSink() {}
~AvailableFormatsSink()1051 DateTimePatternGenerator::AvailableFormatsSink::~AvailableFormatsSink() {}
1052 
1053 void
addCLDRData(const Locale & locale,UErrorCode & errorCode)1054 DateTimePatternGenerator::addCLDRData(const Locale& locale, UErrorCode& errorCode) {
1055     if (U_FAILURE(errorCode)) { return; }
1056     UnicodeString rbPattern, value, field;
1057     CharString path;
1058 
1059     LocalUResourceBundlePointer rb(ures_open(nullptr, locale.getName(), &errorCode));
1060     if (U_FAILURE(errorCode)) { return; }
1061 
1062     CharString calendarTypeToUse; // to be filled in with the type to use, if all goes well
1063     getCalendarTypeToUse(locale, calendarTypeToUse, errorCode);
1064     if (U_FAILURE(errorCode)) { return; }
1065 
1066     // Local err to ignore resource not found exceptions
1067     UErrorCode err = U_ZERO_ERROR;
1068 
1069     // Load append item formats.
1070     AppendItemFormatsSink appendItemFormatsSink(*this);
1071     path.clear()
1072         .append(DT_DateTimeCalendarTag, errorCode)
1073         .append('/', errorCode)
1074         .append(calendarTypeToUse, errorCode)
1075         .append('/', errorCode)
1076         .append(DT_DateTimeAppendItemsTag, errorCode); // i.e., calendar/xxx/appendItems
1077     if (U_FAILURE(errorCode)) { return; }
1078     ures_getAllChildrenWithFallback(rb.getAlias(), path.data(), appendItemFormatsSink, err);
1079     appendItemFormatsSink.fillInMissing();
1080 
1081     // Load CLDR item names.
1082     err = U_ZERO_ERROR;
1083     AppendItemNamesSink appendItemNamesSink(*this);
1084     ures_getAllChildrenWithFallback(rb.getAlias(), DT_DateTimeFieldsTag, appendItemNamesSink, err);
1085     appendItemNamesSink.fillInMissing();
1086 
1087     // Load the available formats from CLDR.
1088     err = U_ZERO_ERROR;
1089     initHashtable(errorCode);
1090     if (U_FAILURE(errorCode)) { return; }
1091     AvailableFormatsSink availableFormatsSink(*this);
1092     path.clear()
1093         .append(DT_DateTimeCalendarTag, errorCode)
1094         .append('/', errorCode)
1095         .append(calendarTypeToUse, errorCode)
1096         .append('/', errorCode)
1097         .append(DT_DateTimeAvailableFormatsTag, errorCode); // i.e., calendar/xxx/availableFormats
1098     if (U_FAILURE(errorCode)) { return; }
1099     ures_getAllChildrenWithFallback(rb.getAlias(), path.data(), availableFormatsSink, err);
1100 }
1101 
1102 void
initHashtable(UErrorCode & err)1103 DateTimePatternGenerator::initHashtable(UErrorCode& err) {
1104     if (U_FAILURE(err)) { return; }
1105     if (fAvailableFormatKeyHash!=nullptr) {
1106         return;
1107     }
1108     LocalPointer<Hashtable> hash(new Hashtable(false, err), err);
1109     if (U_SUCCESS(err)) {
1110         fAvailableFormatKeyHash = hash.orphan();
1111     }
1112 }
1113 
1114 void
setAppendItemFormat(UDateTimePatternField field,const UnicodeString & value)1115 DateTimePatternGenerator::setAppendItemFormat(UDateTimePatternField field, const UnicodeString& value) {
1116     appendItemFormats[field] = value;
1117     // NUL-terminate for the C API.
1118     appendItemFormats[field].getTerminatedBuffer();
1119 }
1120 
1121 const UnicodeString&
getAppendItemFormat(UDateTimePatternField field) const1122 DateTimePatternGenerator::getAppendItemFormat(UDateTimePatternField field) const {
1123     return appendItemFormats[field];
1124 }
1125 
1126 void
setAppendItemName(UDateTimePatternField field,const UnicodeString & value)1127 DateTimePatternGenerator::setAppendItemName(UDateTimePatternField field, const UnicodeString& value) {
1128     setFieldDisplayName(field, UDATPG_WIDTH_APPENDITEM, value);
1129 }
1130 
1131 const UnicodeString&
getAppendItemName(UDateTimePatternField field) const1132 DateTimePatternGenerator::getAppendItemName(UDateTimePatternField field) const {
1133     return fieldDisplayNames[field][UDATPG_WIDTH_APPENDITEM];
1134 }
1135 
1136 void
setFieldDisplayName(UDateTimePatternField field,UDateTimePGDisplayWidth width,const UnicodeString & value)1137 DateTimePatternGenerator::setFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width, const UnicodeString& value) {
1138     fieldDisplayNames[field][width] = value;
1139     // NUL-terminate for the C API.
1140     fieldDisplayNames[field][width].getTerminatedBuffer();
1141 }
1142 
1143 UnicodeString
getFieldDisplayName(UDateTimePatternField field,UDateTimePGDisplayWidth width) const1144 DateTimePatternGenerator::getFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width) const {
1145     return fieldDisplayNames[field][width];
1146 }
1147 
1148 UnicodeString&
getMutableFieldDisplayName(UDateTimePatternField field,UDateTimePGDisplayWidth width)1149 DateTimePatternGenerator::getMutableFieldDisplayName(UDateTimePatternField field, UDateTimePGDisplayWidth width) {
1150     return fieldDisplayNames[field][width];
1151 }
1152 
1153 void
getAppendName(UDateTimePatternField field,UnicodeString & value)1154 DateTimePatternGenerator::getAppendName(UDateTimePatternField field, UnicodeString& value) {
1155     value = SINGLE_QUOTE;
1156     value += fieldDisplayNames[field][UDATPG_WIDTH_APPENDITEM];
1157     value += SINGLE_QUOTE;
1158 }
1159 
1160 UnicodeString
getBestPattern(const UnicodeString & patternForm,UErrorCode & status)1161 DateTimePatternGenerator::getBestPattern(const UnicodeString& patternForm, UErrorCode& status) {
1162     return getBestPattern(patternForm, UDATPG_MATCH_NO_OPTIONS, status);
1163 }
1164 
1165 UnicodeString
getBestPattern(const UnicodeString & patternForm,UDateTimePatternMatchOptions options,UErrorCode & status)1166 DateTimePatternGenerator::getBestPattern(const UnicodeString& patternForm, UDateTimePatternMatchOptions options, UErrorCode& status) {
1167     if (U_FAILURE(status)) {
1168         return UnicodeString();
1169     }
1170     if (U_FAILURE(internalErrorCode)) {
1171         status = internalErrorCode;
1172         return UnicodeString();
1173     }
1174     const UnicodeString *bestPattern = nullptr;
1175     UnicodeString dtFormat;
1176     UnicodeString resultPattern;
1177     int32_t flags = kDTPGNoFlags;
1178 
1179     int32_t dateMask=(1<<UDATPG_DAYPERIOD_FIELD) - 1;
1180     int32_t timeMask=(1<<UDATPG_FIELD_COUNT) - 1 - dateMask;
1181 
1182     // Replace hour metacharacters 'j', 'C' and 'J', set flags as necessary
1183     UnicodeString patternFormMapped = mapSkeletonMetacharacters(patternForm, &flags, status);
1184     if (U_FAILURE(status)) {
1185         return UnicodeString();
1186     }
1187 
1188     resultPattern.remove();
1189     dtMatcher->set(patternFormMapped, fp);
1190     const PtnSkeleton* specifiedSkeleton = nullptr;
1191     bestPattern=getBestRaw(*dtMatcher, -1, distanceInfo, status, &specifiedSkeleton);
1192     if (U_FAILURE(status)) {
1193         return UnicodeString();
1194     }
1195 
1196     if ( distanceInfo->missingFieldMask==0 && distanceInfo->extraFieldMask==0 ) {
1197         resultPattern = adjustFieldTypes(*bestPattern, specifiedSkeleton, flags, options);
1198 
1199         return resultPattern;
1200     }
1201     int32_t neededFields = dtMatcher->getFieldMask();
1202     UnicodeString datePattern=getBestAppending(neededFields & dateMask, flags, status, options);
1203     UnicodeString timePattern=getBestAppending(neededFields & timeMask, flags, status, options);
1204     if (U_FAILURE(status)) {
1205         return UnicodeString();
1206     }
1207     if (datePattern.length()==0) {
1208         if (timePattern.length()==0) {
1209             resultPattern.remove();
1210         }
1211         else {
1212             return timePattern;
1213         }
1214     }
1215     if (timePattern.length()==0) {
1216         return datePattern;
1217     }
1218     resultPattern.remove();
1219     status = U_ZERO_ERROR;
1220     // determine which dateTimeFormat to use
1221     PtnSkeleton* reqSkeleton = dtMatcher->getSkeletonPtr();
1222     UDateFormatStyle style = UDAT_SHORT;
1223     int32_t monthFieldLen = reqSkeleton->baseOriginal.getFieldLength(UDATPG_MONTH_FIELD);
1224     if (monthFieldLen == 4) {
1225         if (reqSkeleton->baseOriginal.getFieldLength(UDATPG_WEEKDAY_FIELD) > 0) {
1226             style = UDAT_FULL;
1227         } else {
1228             style = UDAT_LONG;
1229         }
1230     } else if (monthFieldLen == 3) {
1231         style = UDAT_MEDIUM;
1232     }
1233     // and now use it to compose date and time
1234     dtFormat=getDateTimeFormat(style, status);
1235     SimpleFormatter(dtFormat, 2, 2, status).format(timePattern, datePattern, resultPattern, status);
1236     return resultPattern;
1237 }
1238 
1239 /*
1240  * Map a skeleton that may have metacharacters jJC to one without, by replacing
1241  * the metacharacters with locale-appropriate fields of h/H/k/K and of a/b/B
1242  * (depends on fDefaultHourFormatChar and fAllowedHourFormats being set, which in
1243  * turn depends on initData having been run). This method also updates the flags
1244  * as necessary. Returns the updated skeleton.
1245  */
1246 UnicodeString
mapSkeletonMetacharacters(const UnicodeString & patternForm,int32_t * flags,UErrorCode & status)1247 DateTimePatternGenerator::mapSkeletonMetacharacters(const UnicodeString& patternForm, int32_t* flags, UErrorCode& status) {
1248     UnicodeString patternFormMapped;
1249     patternFormMapped.remove();
1250     UBool inQuoted = false;
1251     int32_t patPos, patLen = patternForm.length();
1252     for (patPos = 0; patPos < patLen; patPos++) {
1253         UChar patChr = patternForm.charAt(patPos);
1254         if (patChr == SINGLE_QUOTE) {
1255             inQuoted = !inQuoted;
1256         } else if (!inQuoted) {
1257             // Handle special mappings for 'j' and 'C' in which fields lengths
1258             // 1,3,5 => hour field length 1
1259             // 2,4,6 => hour field length 2
1260             // 1,2 => abbreviated dayPeriod (field length 1..3)
1261             // 3,4 => long dayPeriod (field length 4)
1262             // 5,6 => narrow dayPeriod (field length 5)
1263             if (patChr == LOW_J || patChr == CAP_C) {
1264                 int32_t extraLen = 0; // 1 less than total field length
1265                 while (patPos+1 < patLen && patternForm.charAt(patPos+1)==patChr) {
1266                     extraLen++;
1267                     patPos++;
1268                 }
1269                 int32_t hourLen = 1 + (extraLen & 1);
1270                 int32_t dayPeriodLen = (extraLen < 2)? 1: 3 + (extraLen >> 1);
1271                 UChar hourChar = LOW_H;
1272                 UChar dayPeriodChar = LOW_A;
1273                 if (patChr == LOW_J) {
1274                     hourChar = fDefaultHourFormatChar;
1275                 } else {
1276                     AllowedHourFormat bestAllowed;
1277                     if (fAllowedHourFormats[0] != ALLOWED_HOUR_FORMAT_UNKNOWN) {
1278                         bestAllowed = (AllowedHourFormat)fAllowedHourFormats[0];
1279                     } else {
1280                         status = U_INVALID_FORMAT_ERROR;
1281                         return UnicodeString();
1282                     }
1283                     if (bestAllowed == ALLOWED_HOUR_FORMAT_H || bestAllowed == ALLOWED_HOUR_FORMAT_HB || bestAllowed == ALLOWED_HOUR_FORMAT_Hb) {
1284                         hourChar = CAP_H;
1285                     } else if (bestAllowed == ALLOWED_HOUR_FORMAT_K || bestAllowed == ALLOWED_HOUR_FORMAT_KB || bestAllowed == ALLOWED_HOUR_FORMAT_Kb) {
1286                         hourChar = CAP_K;
1287                     } else if (bestAllowed == ALLOWED_HOUR_FORMAT_k) {
1288                         hourChar = LOW_K;
1289                     }
1290                     // in #13183 just add b/B to skeleton, no longer need to set special flags
1291                     if (bestAllowed == ALLOWED_HOUR_FORMAT_HB || bestAllowed == ALLOWED_HOUR_FORMAT_hB || bestAllowed == ALLOWED_HOUR_FORMAT_KB) {
1292                         dayPeriodChar = CAP_B;
1293                     } else if (bestAllowed == ALLOWED_HOUR_FORMAT_Hb || bestAllowed == ALLOWED_HOUR_FORMAT_hb || bestAllowed == ALLOWED_HOUR_FORMAT_Kb) {
1294                         dayPeriodChar = LOW_B;
1295                     }
1296                 }
1297                 if (hourChar==CAP_H || hourChar==LOW_K) {
1298                     dayPeriodLen = 0;
1299                 }
1300                 while (dayPeriodLen-- > 0) {
1301                     patternFormMapped.append(dayPeriodChar);
1302                 }
1303                 while (hourLen-- > 0) {
1304                     patternFormMapped.append(hourChar);
1305                 }
1306             } else if (patChr == CAP_J) {
1307                 // Get pattern for skeleton with H, then replace H or k
1308                 // with fDefaultHourFormatChar (if different)
1309                 patternFormMapped.append(CAP_H);
1310                 *flags |= kDTPGSkeletonUsesCapJ;
1311             } else {
1312                 patternFormMapped.append(patChr);
1313             }
1314         }
1315     }
1316     return patternFormMapped;
1317 }
1318 
1319 UnicodeString
replaceFieldTypes(const UnicodeString & pattern,const UnicodeString & skeleton,UErrorCode & status)1320 DateTimePatternGenerator::replaceFieldTypes(const UnicodeString& pattern,
1321                                             const UnicodeString& skeleton,
1322                                             UErrorCode& status) {
1323     return replaceFieldTypes(pattern, skeleton, UDATPG_MATCH_NO_OPTIONS, status);
1324 }
1325 
1326 UnicodeString
replaceFieldTypes(const UnicodeString & pattern,const UnicodeString & skeleton,UDateTimePatternMatchOptions options,UErrorCode & status)1327 DateTimePatternGenerator::replaceFieldTypes(const UnicodeString& pattern,
1328                                             const UnicodeString& skeleton,
1329                                             UDateTimePatternMatchOptions options,
1330                                             UErrorCode& status) {
1331     if (U_FAILURE(status)) {
1332         return UnicodeString();
1333     }
1334     if (U_FAILURE(internalErrorCode)) {
1335         status = internalErrorCode;
1336         return UnicodeString();
1337     }
1338     dtMatcher->set(skeleton, fp);
1339     UnicodeString result = adjustFieldTypes(pattern, nullptr, kDTPGNoFlags, options);
1340     return result;
1341 }
1342 
1343 void
setDecimal(const UnicodeString & newDecimal)1344 DateTimePatternGenerator::setDecimal(const UnicodeString& newDecimal) {
1345     this->decimal = newDecimal;
1346     // NUL-terminate for the C API.
1347     this->decimal.getTerminatedBuffer();
1348 }
1349 
1350 const UnicodeString&
getDecimal() const1351 DateTimePatternGenerator::getDecimal() const {
1352     return decimal;
1353 }
1354 
1355 void
addCanonicalItems(UErrorCode & status)1356 DateTimePatternGenerator::addCanonicalItems(UErrorCode& status) {
1357     if (U_FAILURE(status)) { return; }
1358     UnicodeString  conflictingPattern;
1359 
1360     for (int32_t i=0; i<UDATPG_FIELD_COUNT; i++) {
1361         if (Canonical_Items[i] > 0) {
1362             addPattern(UnicodeString(Canonical_Items[i]), false, conflictingPattern, status);
1363         }
1364         if (U_FAILURE(status)) { return; }
1365     }
1366 }
1367 
1368 void
setDateTimeFormat(const UnicodeString & dtFormat)1369 DateTimePatternGenerator::setDateTimeFormat(const UnicodeString& dtFormat) {
1370     UErrorCode status = U_ZERO_ERROR;
1371     for (int32_t style = UDAT_FULL; style <= UDAT_SHORT; style++) {
1372         setDateTimeFormat((UDateFormatStyle)style, dtFormat, status);
1373     }
1374 }
1375 
1376 const UnicodeString&
getDateTimeFormat() const1377 DateTimePatternGenerator::getDateTimeFormat() const {
1378     UErrorCode status = U_ZERO_ERROR;
1379     return getDateTimeFormat(UDAT_MEDIUM, status);
1380 }
1381 
1382 void
setDateTimeFormat(UDateFormatStyle style,const UnicodeString & dtFormat,UErrorCode & status)1383 DateTimePatternGenerator::setDateTimeFormat(UDateFormatStyle style, const UnicodeString& dtFormat, UErrorCode& status) {
1384     if (U_FAILURE(status)) {
1385         return;
1386     }
1387     if (style < UDAT_FULL || style > UDAT_SHORT) {
1388         status = U_ILLEGAL_ARGUMENT_ERROR;
1389         return;
1390     }
1391     dateTimeFormat[style] = dtFormat;
1392     // Note for the following: getTerminatedBuffer() can re-allocate the UnicodeString
1393     // buffer so we do this here before clients request a const ref to the UnicodeString
1394     // or its buffer.
1395     dateTimeFormat[style].getTerminatedBuffer(); // NUL-terminate for the C API.
1396 }
1397 
1398 const UnicodeString&
getDateTimeFormat(UDateFormatStyle style,UErrorCode & status) const1399 DateTimePatternGenerator::getDateTimeFormat(UDateFormatStyle style, UErrorCode& status) const {
1400     static const UnicodeString emptyString = UNICODE_STRING_SIMPLE("");
1401     if (U_FAILURE(status)) {
1402         return emptyString;
1403     }
1404     if (style < UDAT_FULL || style > UDAT_SHORT) {
1405         status = U_ILLEGAL_ARGUMENT_ERROR;
1406         return emptyString;
1407     }
1408     return dateTimeFormat[style];
1409 }
1410 
1411 static const int32_t cTypeBufMax = 32;
1412 
1413 void
setDateTimeFromCalendar(const Locale & locale,UErrorCode & status)1414 DateTimePatternGenerator::setDateTimeFromCalendar(const Locale& locale, UErrorCode& status) {
1415     if (U_FAILURE(status)) { return; }
1416 
1417     const UChar *resStr;
1418     int32_t resStrLen = 0;
1419 
1420     LocalUResourceBundlePointer calData(ures_open(nullptr, locale.getBaseName(), &status));
1421     if (U_FAILURE(status)) { return; }
1422     ures_getByKey(calData.getAlias(), DT_DateTimeCalendarTag, calData.getAlias(), &status);
1423     if (U_FAILURE(status)) { return; }
1424 
1425     char cType[cTypeBufMax + 1];
1426     Calendar::getCalendarTypeFromLocale(locale, cType, cTypeBufMax, status);
1427     cType[cTypeBufMax] = 0;
1428     if (U_FAILURE(status) || cType[0] == 0) {
1429         status = U_ZERO_ERROR;
1430         uprv_strcpy(cType, DT_DateTimeGregorianTag);
1431     }
1432     UBool cTypeIsGregorian = (uprv_strcmp(cType, DT_DateTimeGregorianTag) == 0);
1433 
1434     // Currently, for compatibility with pre-CLDR-42 data, we default to the "atTime"
1435     // combining patterns. Depending on guidance in CLDR 42 spec and on DisplayOptions,
1436     // we may change this.
1437     LocalUResourceBundlePointer specificCalBundle;
1438     LocalUResourceBundlePointer dateTimePatterns;
1439     int32_t dateTimeOffset = 0; // initially for DateTimePatterns%atTime
1440     if (!cTypeIsGregorian) {
1441         specificCalBundle.adoptInstead(ures_getByKeyWithFallback(calData.getAlias(), cType,
1442                                         nullptr, &status));
1443         dateTimePatterns.adoptInstead(ures_getByKeyWithFallback(specificCalBundle.getAlias(), DT_DateAtTimePatternsTag, // the %atTime variant, 4 entries
1444                                         nullptr, &status));
1445     }
1446     if (dateTimePatterns.isNull() || status == U_MISSING_RESOURCE_ERROR) {
1447         status = U_ZERO_ERROR;
1448         specificCalBundle.adoptInstead(ures_getByKeyWithFallback(calData.getAlias(), DT_DateTimeGregorianTag,
1449                                         nullptr, &status));
1450         dateTimePatterns.adoptInstead(ures_getByKeyWithFallback(specificCalBundle.getAlias(), DT_DateAtTimePatternsTag, // the %atTime variant, 4 entries
1451                                         nullptr, &status));
1452     }
1453     if (U_SUCCESS(status) && (ures_getSize(dateTimePatterns.getAlias()) < 4)) {
1454         status = U_INVALID_FORMAT_ERROR;
1455     }
1456     if (status == U_MISSING_RESOURCE_ERROR) {
1457         // Try again with standard variant
1458         status = U_ZERO_ERROR;
1459         dateTimePatterns.orphan();
1460         dateTimeOffset = (int32_t)DateFormat::kDateTimeOffset;
1461         if (!cTypeIsGregorian) {
1462             specificCalBundle.adoptInstead(ures_getByKeyWithFallback(calData.getAlias(), cType,
1463                                             nullptr, &status));
1464             dateTimePatterns.adoptInstead(ures_getByKeyWithFallback(specificCalBundle.getAlias(), DT_DateTimePatternsTag, // the standard variant, 13 entries
1465                                             nullptr, &status));
1466         }
1467         if (dateTimePatterns.isNull() || status == U_MISSING_RESOURCE_ERROR) {
1468             status = U_ZERO_ERROR;
1469             specificCalBundle.adoptInstead(ures_getByKeyWithFallback(calData.getAlias(), DT_DateTimeGregorianTag,
1470                                             nullptr, &status));
1471             dateTimePatterns.adoptInstead(ures_getByKeyWithFallback(specificCalBundle.getAlias(), DT_DateTimePatternsTag, // the standard variant, 13 entries
1472                                             nullptr, &status));
1473         }
1474         if (U_SUCCESS(status) && (ures_getSize(dateTimePatterns.getAlias()) <= DateFormat::kDateTimeOffset + DateFormat::kShort)) {
1475             status = U_INVALID_FORMAT_ERROR;
1476         }
1477     }
1478     if (U_FAILURE(status)) { return; }
1479     for (int32_t style = UDAT_FULL; style <= UDAT_SHORT; style++) {
1480         resStr = ures_getStringByIndex(dateTimePatterns.getAlias(), dateTimeOffset + style, &resStrLen, &status);
1481         setDateTimeFormat((UDateFormatStyle)style, UnicodeString(true, resStr, resStrLen), status);
1482     }
1483 }
1484 
1485 void
setDecimalSymbols(const Locale & locale,UErrorCode & status)1486 DateTimePatternGenerator::setDecimalSymbols(const Locale& locale, UErrorCode& status) {
1487     DecimalFormatSymbols dfs = DecimalFormatSymbols(locale, status);
1488     if(U_SUCCESS(status)) {
1489         decimal = dfs.getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
1490         // NUL-terminate for the C API.
1491         decimal.getTerminatedBuffer();
1492     }
1493 }
1494 
1495 UDateTimePatternConflict
addPattern(const UnicodeString & pattern,UBool override,UnicodeString & conflictingPattern,UErrorCode & status)1496 DateTimePatternGenerator::addPattern(
1497     const UnicodeString& pattern,
1498     UBool override,
1499     UnicodeString &conflictingPattern,
1500     UErrorCode& status)
1501 {
1502     if (U_FAILURE(internalErrorCode)) {
1503         status = internalErrorCode;
1504         return UDATPG_NO_CONFLICT;
1505     }
1506 
1507     return addPatternWithSkeleton(pattern, nullptr, override, conflictingPattern, status);
1508 }
1509 
1510 // For DateTimePatternGenerator::addPatternWithSkeleton -
1511 // If skeletonToUse is specified, then an availableFormats entry is being added. In this case:
1512 // 1. We pass that skeleton to matcher.set instead of having it derive a skeleton from the pattern.
1513 // 2. If the new entry's skeleton or basePattern does match an existing entry but that entry also had a skeleton specified
1514 // (i.e. it was also from availableFormats), then the new entry does not override it regardless of the value of the override
1515 // parameter. This prevents later availableFormats entries from a parent locale overriding earlier ones from the actual
1516 // specified locale. However, availableFormats entries *should* override entries with matching skeleton whose skeleton was
1517 // derived (i.e. entries derived from the standard date/time patters for the specified locale).
1518 // 3. When adding the pattern (patternMap->add), we set a new boolean to indicate that the added entry had a
1519 // specified skeleton (which sets a new field in the PtnElem in the PatternMap).
1520 UDateTimePatternConflict
addPatternWithSkeleton(const UnicodeString & pattern,const UnicodeString * skeletonToUse,UBool override,UnicodeString & conflictingPattern,UErrorCode & status)1521 DateTimePatternGenerator::addPatternWithSkeleton(
1522     const UnicodeString& pattern,
1523     const UnicodeString* skeletonToUse,
1524     UBool override,
1525     UnicodeString& conflictingPattern,
1526     UErrorCode& status)
1527 {
1528     if (U_FAILURE(internalErrorCode)) {
1529         status = internalErrorCode;
1530         return UDATPG_NO_CONFLICT;
1531     }
1532 
1533     UnicodeString basePattern;
1534     PtnSkeleton   skeleton;
1535     UDateTimePatternConflict conflictingStatus = UDATPG_NO_CONFLICT;
1536 
1537     DateTimeMatcher matcher;
1538     if ( skeletonToUse == nullptr ) {
1539         matcher.set(pattern, fp, skeleton);
1540         matcher.getBasePattern(basePattern);
1541     } else {
1542         matcher.set(*skeletonToUse, fp, skeleton); // no longer trims skeleton fields to max len 3, per #7930
1543         matcher.getBasePattern(basePattern); // or perhaps instead: basePattern = *skeletonToUse;
1544     }
1545     // We only care about base conflicts - and replacing the pattern associated with a base - if:
1546     // 1. the conflicting previous base pattern did *not* have an explicit skeleton; in that case the previous
1547     // base + pattern combination was derived from either (a) a canonical item, (b) a standard format, or
1548     // (c) a pattern specified programmatically with a previous call to addPattern (which would only happen
1549     // if we are getting here from a subsequent call to addPattern).
1550     // 2. a skeleton is specified for the current pattern, but override=false; in that case we are checking
1551     // availableFormats items from root, which should not override any previous entry with the same base.
1552     UBool entryHadSpecifiedSkeleton;
1553     const UnicodeString *duplicatePattern = patternMap->getPatternFromBasePattern(basePattern, entryHadSpecifiedSkeleton);
1554     if (duplicatePattern != nullptr && (!entryHadSpecifiedSkeleton || (skeletonToUse != nullptr && !override))) {
1555         conflictingStatus = UDATPG_BASE_CONFLICT;
1556         conflictingPattern = *duplicatePattern;
1557         if (!override) {
1558             return conflictingStatus;
1559         }
1560     }
1561     // The only time we get here with override=true and skeletonToUse!=null is when adding availableFormats
1562     // items from CLDR data. In that case, we don't want an item from a parent locale to replace an item with
1563     // same skeleton from the specified locale, so skip the current item if skeletonWasSpecified is true for
1564     // the previously-specified conflicting item.
1565     const PtnSkeleton* entrySpecifiedSkeleton = nullptr;
1566     duplicatePattern = patternMap->getPatternFromSkeleton(skeleton, &entrySpecifiedSkeleton);
1567     if (duplicatePattern != nullptr ) {
1568         conflictingStatus = UDATPG_CONFLICT;
1569         conflictingPattern = *duplicatePattern;
1570         if (!override || (skeletonToUse != nullptr && entrySpecifiedSkeleton != nullptr)) {
1571             return conflictingStatus;
1572         }
1573     }
1574     patternMap->add(basePattern, skeleton, pattern, skeletonToUse != nullptr, status);
1575     if(U_FAILURE(status)) {
1576         return conflictingStatus;
1577     }
1578 
1579     return UDATPG_NO_CONFLICT;
1580 }
1581 
1582 
1583 UDateTimePatternField
getAppendFormatNumber(const char * field) const1584 DateTimePatternGenerator::getAppendFormatNumber(const char* field) const {
1585     for (int32_t i=0; i<UDATPG_FIELD_COUNT; ++i ) {
1586         if (uprv_strcmp(CLDR_FIELD_APPEND[i], field)==0) {
1587             return (UDateTimePatternField)i;
1588         }
1589     }
1590     return UDATPG_FIELD_COUNT;
1591 }
1592 
1593 UDateTimePatternField
getFieldAndWidthIndices(const char * key,UDateTimePGDisplayWidth * widthP) const1594 DateTimePatternGenerator::getFieldAndWidthIndices(const char* key, UDateTimePGDisplayWidth* widthP) const {
1595     char cldrFieldKey[UDATPG_FIELD_KEY_MAX + 1];
1596     uprv_strncpy(cldrFieldKey, key, UDATPG_FIELD_KEY_MAX);
1597     cldrFieldKey[UDATPG_FIELD_KEY_MAX]=0; // ensure termination
1598     *widthP = UDATPG_WIDE;
1599     char* hyphenPtr = uprv_strchr(cldrFieldKey, '-');
1600     if (hyphenPtr) {
1601         for (int32_t i=UDATPG_WIDTH_COUNT-1; i>0; --i) {
1602             if (uprv_strcmp(CLDR_FIELD_WIDTH[i], hyphenPtr)==0) {
1603                 *widthP=(UDateTimePGDisplayWidth)i;
1604                 break;
1605             }
1606         }
1607         *hyphenPtr = 0; // now delete width portion of key
1608     }
1609     for (int32_t i=0; i<UDATPG_FIELD_COUNT; ++i ) {
1610         if (uprv_strcmp(CLDR_FIELD_NAME[i],cldrFieldKey)==0) {
1611             return (UDateTimePatternField)i;
1612         }
1613     }
1614     return UDATPG_FIELD_COUNT;
1615 }
1616 
1617 const UnicodeString*
getBestRaw(DateTimeMatcher & source,int32_t includeMask,DistanceInfo * missingFields,UErrorCode & status,const PtnSkeleton ** specifiedSkeletonPtr)1618 DateTimePatternGenerator::getBestRaw(DateTimeMatcher& source,
1619                                      int32_t includeMask,
1620                                      DistanceInfo* missingFields,
1621                                      UErrorCode &status,
1622                                      const PtnSkeleton** specifiedSkeletonPtr) {
1623     int32_t bestDistance = 0x7fffffff;
1624     int32_t bestMissingFieldMask = -1;
1625     DistanceInfo tempInfo;
1626     const UnicodeString *bestPattern=nullptr;
1627     const PtnSkeleton* specifiedSkeleton=nullptr;
1628 
1629     PatternMapIterator it(status);
1630     if (U_FAILURE(status)) { return nullptr; }
1631 
1632     for (it.set(*patternMap); it.hasNext(); ) {
1633         DateTimeMatcher trial = it.next();
1634         if (trial.equals(skipMatcher)) {
1635             continue;
1636         }
1637         int32_t distance=source.getDistance(trial, includeMask, tempInfo);
1638         // Because we iterate over a map the order is undefined. Can change between implementations,
1639         // versions, and will very likely be different between Java and C/C++.
1640         // So if we have patterns with the same distance we also look at the missingFieldMask,
1641         // and we favour the smallest one. Because the field is a bitmask this technically means we
1642         // favour differences in the "least significant fields". For example we prefer the one with differences
1643         // in seconds field vs one with difference in the hours field.
1644         if (distance<bestDistance || (distance==bestDistance && bestMissingFieldMask<tempInfo.missingFieldMask)) {
1645             bestDistance=distance;
1646             bestMissingFieldMask=tempInfo.missingFieldMask;
1647             bestPattern=patternMap->getPatternFromSkeleton(*trial.getSkeletonPtr(), &specifiedSkeleton);
1648             missingFields->setTo(tempInfo);
1649             if (distance==0) {
1650                 break;
1651             }
1652         }
1653     }
1654 
1655     // If the best raw match had a specified skeleton and that skeleton was requested by the caller,
1656     // then return it too. This generally happens when the caller needs to pass that skeleton
1657     // through to adjustFieldTypes so the latter can do a better job.
1658     if (bestPattern && specifiedSkeletonPtr) {
1659         *specifiedSkeletonPtr = specifiedSkeleton;
1660     }
1661     return bestPattern;
1662 }
1663 
1664 UnicodeString
adjustFieldTypes(const UnicodeString & pattern,const PtnSkeleton * specifiedSkeleton,int32_t flags,UDateTimePatternMatchOptions options)1665 DateTimePatternGenerator::adjustFieldTypes(const UnicodeString& pattern,
1666                                            const PtnSkeleton* specifiedSkeleton,
1667                                            int32_t flags,
1668                                            UDateTimePatternMatchOptions options) {
1669     UnicodeString newPattern;
1670     fp->set(pattern);
1671     for (int32_t i=0; i < fp->itemNumber; i++) {
1672         UnicodeString field = fp->items[i];
1673         if ( fp->isQuoteLiteral(field) ) {
1674 
1675             UnicodeString quoteLiteral;
1676             fp->getQuoteLiteral(quoteLiteral, &i);
1677             newPattern += quoteLiteral;
1678         }
1679         else {
1680             if (fp->isPatternSeparator(field)) {
1681                 newPattern+=field;
1682                 continue;
1683             }
1684             int32_t canonicalIndex = fp->getCanonicalIndex(field);
1685             if (canonicalIndex < 0) {
1686                 newPattern+=field;
1687                 continue;  // don't adjust
1688             }
1689             const dtTypeElem *row = &dtTypes[canonicalIndex];
1690             int32_t typeValue = row->field;
1691 
1692             // handle day periods - with #13183, no longer need special handling here, integrated with normal types
1693 
1694             if ((flags & kDTPGFixFractionalSeconds) != 0 && typeValue == UDATPG_SECOND_FIELD) {
1695                 field += decimal;
1696                 dtMatcher->skeleton.original.appendFieldTo(UDATPG_FRACTIONAL_SECOND_FIELD, field);
1697             } else if (dtMatcher->skeleton.type[typeValue]!=0) {
1698                     // Here:
1699                     // - "reqField" is the field from the originally requested skeleton after replacement
1700                     // of metacharacters 'j', 'C' and 'J', with length "reqFieldLen".
1701                     // - "field" is the field from the found pattern.
1702                     //
1703                     // The adjusted field should consist of characters from the originally requested
1704                     // skeleton, except in the case of UDATPG_MONTH_FIELD or
1705                     // UDATPG_WEEKDAY_FIELD or UDATPG_YEAR_FIELD, in which case it should consist
1706                     // of characters from the found pattern. In some cases of UDATPG_HOUR_FIELD,
1707                     // there is adjustment following the "defaultHourFormatChar". There is explanation
1708                     // how it is done below.
1709                     //
1710                     // The length of the adjusted field (adjFieldLen) should match that in the originally
1711                     // requested skeleton, except that in the following cases the length of the adjusted field
1712                     // should match that in the found pattern (i.e. the length of this pattern field should
1713                     // not be adjusted):
1714                     // 1. typeValue is UDATPG_HOUR_FIELD/MINUTE/SECOND and the corresponding bit in options is
1715                     //    not set (ticket #7180). Note, we may want to implement a similar change for other
1716                     //    numeric fields (MM, dd, etc.) so the default behavior is to get locale preference for
1717                     //    field length, but options bits can be used to override this.
1718                     // 2. There is a specified skeleton for the found pattern and one of the following is true:
1719                     //    a) The length of the field in the skeleton (skelFieldLen) is equal to reqFieldLen.
1720                     //    b) The pattern field is numeric and the skeleton field is not, or vice versa.
1721 
1722                     UChar reqFieldChar = dtMatcher->skeleton.original.getFieldChar(typeValue);
1723                     int32_t reqFieldLen = dtMatcher->skeleton.original.getFieldLength(typeValue);
1724                     if (reqFieldChar == CAP_E && reqFieldLen < 3)
1725                         reqFieldLen = 3; // 1-3 for E are equivalent to 3 for c,e
1726                     int32_t adjFieldLen = reqFieldLen;
1727                     if ( (typeValue==UDATPG_HOUR_FIELD && (options & UDATPG_MATCH_HOUR_FIELD_LENGTH)==0) ||
1728                          (typeValue==UDATPG_MINUTE_FIELD && (options & UDATPG_MATCH_MINUTE_FIELD_LENGTH)==0) ||
1729                          (typeValue==UDATPG_SECOND_FIELD && (options & UDATPG_MATCH_SECOND_FIELD_LENGTH)==0) ) {
1730                          adjFieldLen = field.length();
1731                     } else if (specifiedSkeleton && reqFieldChar != LOW_C && reqFieldChar != LOW_E) {
1732                         // (we skip this section for 'c' and 'e' because unlike the other characters considered in this function,
1733                         // they have no minimum field length-- 'E' and 'EE' are equivalent to 'EEE', but 'e' and 'ee' are not
1734                         // equivalent to 'eee' -- see the entries for "week day" in
1735                         // https://www.unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table for more info)
1736                         int32_t skelFieldLen = specifiedSkeleton->original.getFieldLength(typeValue);
1737                         UBool patFieldIsNumeric = (row->type > 0);
1738                         UBool skelFieldIsNumeric = (specifiedSkeleton->type[typeValue] > 0);
1739                         if (skelFieldLen == reqFieldLen || (patFieldIsNumeric && !skelFieldIsNumeric) || (skelFieldIsNumeric && !patFieldIsNumeric)) {
1740                             // don't adjust the field length in the found pattern
1741                             adjFieldLen = field.length();
1742                         }
1743                     }
1744                     UChar c = (typeValue!= UDATPG_HOUR_FIELD
1745                             && typeValue!= UDATPG_MONTH_FIELD
1746                             && typeValue!= UDATPG_WEEKDAY_FIELD
1747                             && (typeValue!= UDATPG_YEAR_FIELD || reqFieldChar==CAP_Y))
1748                             ? reqFieldChar
1749                             : field.charAt(0);
1750                     if (c == CAP_E && adjFieldLen < 3) {
1751                         c = LOW_E;
1752                     }
1753                     if (typeValue == UDATPG_HOUR_FIELD && fDefaultHourFormatChar != 0) {
1754                         // The adjustment here is required to match spec (https://www.unicode.org/reports/tr35/tr35-dates.html#dfst-hour).
1755                         // It is necessary to match the hour-cycle preferred by the Locale.
1756                         // Given that, we need to do the following adjustments:
1757                         // 1. When hour-cycle is h11 it should replace 'h' by 'K'.
1758                         // 2. When hour-cycle is h23 it should replace 'H' by 'k'.
1759                         // 3. When hour-cycle is h24 it should replace 'k' by 'H'.
1760                         // 4. When hour-cycle is h12 it should replace 'K' by 'h'.
1761 
1762                         if ((flags & kDTPGSkeletonUsesCapJ) != 0 || reqFieldChar == fDefaultHourFormatChar) {
1763                             c = fDefaultHourFormatChar;
1764                         } else if (reqFieldChar == LOW_H && fDefaultHourFormatChar == CAP_K) {
1765                             c = CAP_K;
1766                         } else if (reqFieldChar == CAP_H && fDefaultHourFormatChar == LOW_K) {
1767                             c = LOW_K;
1768                         } else if (reqFieldChar == LOW_K && fDefaultHourFormatChar == CAP_H) {
1769                             c = CAP_H;
1770                         } else if (reqFieldChar == CAP_K && fDefaultHourFormatChar == LOW_H) {
1771                             c = LOW_H;
1772                         }
1773                     }
1774 
1775                     field.remove();
1776                     for (int32_t j=adjFieldLen; j>0; --j) {
1777                         field += c;
1778                     }
1779             }
1780             newPattern+=field;
1781         }
1782     }
1783     return newPattern;
1784 }
1785 
1786 UnicodeString
getBestAppending(int32_t missingFields,int32_t flags,UErrorCode & status,UDateTimePatternMatchOptions options)1787 DateTimePatternGenerator::getBestAppending(int32_t missingFields, int32_t flags, UErrorCode &status, UDateTimePatternMatchOptions options) {
1788     if (U_FAILURE(status)) {
1789         return UnicodeString();
1790     }
1791     UnicodeString  resultPattern, tempPattern;
1792     const UnicodeString* tempPatternPtr;
1793     int32_t lastMissingFieldMask=0;
1794     if (missingFields!=0) {
1795         resultPattern=UnicodeString();
1796         const PtnSkeleton* specifiedSkeleton=nullptr;
1797         tempPatternPtr = getBestRaw(*dtMatcher, missingFields, distanceInfo, status, &specifiedSkeleton);
1798         if (U_FAILURE(status)) {
1799             return UnicodeString();
1800         }
1801         tempPattern = *tempPatternPtr;
1802         resultPattern = adjustFieldTypes(tempPattern, specifiedSkeleton, flags, options);
1803         if ( distanceInfo->missingFieldMask==0 ) {
1804             return resultPattern;
1805         }
1806         while (distanceInfo->missingFieldMask!=0) { // precondition: EVERY single field must work!
1807             if ( lastMissingFieldMask == distanceInfo->missingFieldMask ) {
1808                 break;  // cannot find the proper missing field
1809             }
1810             if (((distanceInfo->missingFieldMask & UDATPG_SECOND_AND_FRACTIONAL_MASK)==UDATPG_FRACTIONAL_MASK) &&
1811                 ((missingFields & UDATPG_SECOND_AND_FRACTIONAL_MASK) == UDATPG_SECOND_AND_FRACTIONAL_MASK)) {
1812                 resultPattern = adjustFieldTypes(resultPattern, specifiedSkeleton, flags | kDTPGFixFractionalSeconds, options);
1813                 distanceInfo->missingFieldMask &= ~UDATPG_FRACTIONAL_MASK;
1814                 continue;
1815             }
1816             int32_t startingMask = distanceInfo->missingFieldMask;
1817             tempPatternPtr = getBestRaw(*dtMatcher, distanceInfo->missingFieldMask, distanceInfo, status, &specifiedSkeleton);
1818             if (U_FAILURE(status)) {
1819                 return UnicodeString();
1820             }
1821             tempPattern = *tempPatternPtr;
1822             tempPattern = adjustFieldTypes(tempPattern, specifiedSkeleton, flags, options);
1823             int32_t foundMask=startingMask& ~distanceInfo->missingFieldMask;
1824             int32_t topField=getTopBitNumber(foundMask);
1825 
1826             if (appendItemFormats[topField].length() != 0) {
1827                 UnicodeString appendName;
1828                 getAppendName((UDateTimePatternField)topField, appendName);
1829                 const UnicodeString *values[3] = {
1830                     &resultPattern,
1831                     &tempPattern,
1832                     &appendName
1833                 };
1834                 SimpleFormatter(appendItemFormats[topField], 2, 3, status).
1835                     formatAndReplace(values, 3, resultPattern, nullptr, 0, status);
1836             }
1837             lastMissingFieldMask = distanceInfo->missingFieldMask;
1838         }
1839     }
1840     return resultPattern;
1841 }
1842 
1843 int32_t
getTopBitNumber(int32_t foundMask) const1844 DateTimePatternGenerator::getTopBitNumber(int32_t foundMask) const {
1845     if ( foundMask==0 ) {
1846         return 0;
1847     }
1848     int32_t i=0;
1849     while (foundMask!=0) {
1850         foundMask >>=1;
1851         ++i;
1852     }
1853     if (i-1 >UDATPG_ZONE_FIELD) {
1854         return UDATPG_ZONE_FIELD;
1855     }
1856     else
1857         return i-1;
1858 }
1859 
1860 void
setAvailableFormat(const UnicodeString & key,UErrorCode & err)1861 DateTimePatternGenerator::setAvailableFormat(const UnicodeString &key, UErrorCode& err)
1862 {
1863     fAvailableFormatKeyHash->puti(key, 1, err);
1864 }
1865 
1866 UBool
isAvailableFormatSet(const UnicodeString & key) const1867 DateTimePatternGenerator::isAvailableFormatSet(const UnicodeString &key) const {
1868     return (UBool)(fAvailableFormatKeyHash->geti(key) == 1);
1869 }
1870 
1871 void
copyHashtable(Hashtable * other,UErrorCode & status)1872 DateTimePatternGenerator::copyHashtable(Hashtable *other, UErrorCode &status) {
1873     if (other == nullptr || U_FAILURE(status)) {
1874         return;
1875     }
1876     if (fAvailableFormatKeyHash != nullptr) {
1877         delete fAvailableFormatKeyHash;
1878         fAvailableFormatKeyHash = nullptr;
1879     }
1880     initHashtable(status);
1881     if(U_FAILURE(status)){
1882         return;
1883     }
1884     int32_t pos = UHASH_FIRST;
1885     const UHashElement* elem = nullptr;
1886     // walk through the hash table and create a deep clone
1887     while((elem = other->nextElement(pos))!= nullptr){
1888         const UHashTok otherKeyTok = elem->key;
1889         UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer;
1890         fAvailableFormatKeyHash->puti(*otherKey, 1, status);
1891         if(U_FAILURE(status)){
1892             return;
1893         }
1894     }
1895 }
1896 
1897 StringEnumeration*
getSkeletons(UErrorCode & status) const1898 DateTimePatternGenerator::getSkeletons(UErrorCode& status) const {
1899     if (U_FAILURE(status)) {
1900         return nullptr;
1901     }
1902     if (U_FAILURE(internalErrorCode)) {
1903         status = internalErrorCode;
1904         return nullptr;
1905     }
1906     LocalPointer<StringEnumeration> skeletonEnumerator(
1907         new DTSkeletonEnumeration(*patternMap, DT_SKELETON, status), status);
1908 
1909     return U_SUCCESS(status) ? skeletonEnumerator.orphan() : nullptr;
1910 }
1911 
1912 const UnicodeString&
getPatternForSkeleton(const UnicodeString & skeleton) const1913 DateTimePatternGenerator::getPatternForSkeleton(const UnicodeString& skeleton) const {
1914     PtnElem *curElem;
1915 
1916     if (skeleton.length() ==0) {
1917         return emptyString;
1918     }
1919     curElem = patternMap->getHeader(skeleton.charAt(0));
1920     while ( curElem != nullptr ) {
1921         if ( curElem->skeleton->getSkeleton()==skeleton ) {
1922             return curElem->pattern;
1923         }
1924         curElem = curElem->next.getAlias();
1925     }
1926     return emptyString;
1927 }
1928 
1929 StringEnumeration*
getBaseSkeletons(UErrorCode & status) const1930 DateTimePatternGenerator::getBaseSkeletons(UErrorCode& status) const {
1931     if (U_FAILURE(status)) {
1932         return nullptr;
1933     }
1934     if (U_FAILURE(internalErrorCode)) {
1935         status = internalErrorCode;
1936         return nullptr;
1937     }
1938     LocalPointer<StringEnumeration> baseSkeletonEnumerator(
1939         new DTSkeletonEnumeration(*patternMap, DT_BASESKELETON, status), status);
1940 
1941     return U_SUCCESS(status) ? baseSkeletonEnumerator.orphan() : nullptr;
1942 }
1943 
1944 StringEnumeration*
getRedundants(UErrorCode & status)1945 DateTimePatternGenerator::getRedundants(UErrorCode& status) {
1946     if (U_FAILURE(status)) { return nullptr; }
1947     if (U_FAILURE(internalErrorCode)) {
1948         status = internalErrorCode;
1949         return nullptr;
1950     }
1951     LocalPointer<StringEnumeration> output(new DTRedundantEnumeration(), status);
1952     if (U_FAILURE(status)) { return nullptr; }
1953     const UnicodeString *pattern;
1954     PatternMapIterator it(status);
1955     if (U_FAILURE(status)) { return nullptr; }
1956 
1957     for (it.set(*patternMap); it.hasNext(); ) {
1958         DateTimeMatcher current = it.next();
1959         pattern = patternMap->getPatternFromSkeleton(*(it.getSkeleton()));
1960         if ( isCanonicalItem(*pattern) ) {
1961             continue;
1962         }
1963         if ( skipMatcher == nullptr ) {
1964             skipMatcher = new DateTimeMatcher(current);
1965             if (skipMatcher == nullptr) {
1966                 status = U_MEMORY_ALLOCATION_ERROR;
1967                 return nullptr;
1968             }
1969         }
1970         else {
1971             *skipMatcher = current;
1972         }
1973         UnicodeString trial = getBestPattern(current.getPattern(), status);
1974         if (U_FAILURE(status)) { return nullptr; }
1975         if (trial == *pattern) {
1976             ((DTRedundantEnumeration *)output.getAlias())->add(*pattern, status);
1977             if (U_FAILURE(status)) { return nullptr; }
1978         }
1979         if (current.equals(skipMatcher)) {
1980             continue;
1981         }
1982     }
1983     return output.orphan();
1984 }
1985 
1986 UBool
isCanonicalItem(const UnicodeString & item) const1987 DateTimePatternGenerator::isCanonicalItem(const UnicodeString& item) const {
1988     if ( item.length() != 1 ) {
1989         return false;
1990     }
1991     for (int32_t i=0; i<UDATPG_FIELD_COUNT; ++i) {
1992         if (item.charAt(0)==Canonical_Items[i]) {
1993             return true;
1994         }
1995     }
1996     return false;
1997 }
1998 
1999 
2000 DateTimePatternGenerator*
clone() const2001 DateTimePatternGenerator::clone() const {
2002     return new DateTimePatternGenerator(*this);
2003 }
2004 
PatternMap()2005 PatternMap::PatternMap() {
2006    for (int32_t i=0; i < MAX_PATTERN_ENTRIES; ++i ) {
2007        boot[i] = nullptr;
2008    }
2009    isDupAllowed = true;
2010 }
2011 
2012 void
copyFrom(const PatternMap & other,UErrorCode & status)2013 PatternMap::copyFrom(const PatternMap& other, UErrorCode& status) {
2014     if (U_FAILURE(status)) {
2015         return;
2016     }
2017     this->isDupAllowed = other.isDupAllowed;
2018     for (int32_t bootIndex = 0; bootIndex < MAX_PATTERN_ENTRIES; ++bootIndex) {
2019         PtnElem *curElem, *otherElem, *prevElem=nullptr;
2020         otherElem = other.boot[bootIndex];
2021         while (otherElem != nullptr) {
2022             LocalPointer<PtnElem> newElem(new PtnElem(otherElem->basePattern, otherElem->pattern), status);
2023             if (U_FAILURE(status)) {
2024                 return; // out of memory
2025             }
2026             newElem->skeleton.adoptInsteadAndCheckErrorCode(new PtnSkeleton(*(otherElem->skeleton)), status);
2027             if (U_FAILURE(status)) {
2028                 return; // out of memory
2029             }
2030             newElem->skeletonWasSpecified = otherElem->skeletonWasSpecified;
2031 
2032             // Release ownership from the LocalPointer of the PtnElem object.
2033             // The PtnElem will now be owned by either the boot (for the first entry in the linked-list)
2034             // or owned by the previous PtnElem object in the linked-list.
2035             curElem = newElem.orphan();
2036 
2037             if (this->boot[bootIndex] == nullptr) {
2038                 this->boot[bootIndex] = curElem;
2039             } else {
2040                 if (prevElem != nullptr) {
2041                     prevElem->next.adoptInstead(curElem);
2042                 } else {
2043                     UPRV_UNREACHABLE_EXIT;
2044                 }
2045             }
2046             prevElem = curElem;
2047             otherElem = otherElem->next.getAlias();
2048         }
2049 
2050     }
2051 }
2052 
2053 PtnElem*
getHeader(UChar baseChar) const2054 PatternMap::getHeader(UChar baseChar) const {
2055     PtnElem* curElem;
2056 
2057     if ( (baseChar >= CAP_A) && (baseChar <= CAP_Z) ) {
2058          curElem = boot[baseChar-CAP_A];
2059     }
2060     else {
2061         if ( (baseChar >=LOW_A) && (baseChar <= LOW_Z) ) {
2062             curElem = boot[26+baseChar-LOW_A];
2063         }
2064         else {
2065             return nullptr;
2066         }
2067     }
2068     return curElem;
2069 }
2070 
~PatternMap()2071 PatternMap::~PatternMap() {
2072    for (int32_t i=0; i < MAX_PATTERN_ENTRIES; ++i ) {
2073        if (boot[i] != nullptr ) {
2074            delete boot[i];
2075            boot[i] = nullptr;
2076        }
2077    }
2078 }  // PatternMap destructor
2079 
2080 void
add(const UnicodeString & basePattern,const PtnSkeleton & skeleton,const UnicodeString & value,UBool skeletonWasSpecified,UErrorCode & status)2081 PatternMap::add(const UnicodeString& basePattern,
2082                 const PtnSkeleton& skeleton,
2083                 const UnicodeString& value,// mapped pattern value
2084                 UBool skeletonWasSpecified,
2085                 UErrorCode &status) {
2086     UChar baseChar = basePattern.charAt(0);
2087     PtnElem *curElem, *baseElem;
2088     status = U_ZERO_ERROR;
2089 
2090     // the baseChar must be A-Z or a-z
2091     if ((baseChar >= CAP_A) && (baseChar <= CAP_Z)) {
2092         baseElem = boot[baseChar-CAP_A];
2093     }
2094     else {
2095         if ((baseChar >=LOW_A) && (baseChar <= LOW_Z)) {
2096             baseElem = boot[26+baseChar-LOW_A];
2097          }
2098          else {
2099              status = U_ILLEGAL_CHARACTER;
2100              return;
2101          }
2102     }
2103 
2104     if (baseElem == nullptr) {
2105         LocalPointer<PtnElem> newElem(new PtnElem(basePattern, value), status);
2106         if (U_FAILURE(status)) {
2107             return; // out of memory
2108         }
2109         newElem->skeleton.adoptInsteadAndCheckErrorCode(new PtnSkeleton(skeleton), status);
2110         if (U_FAILURE(status)) {
2111             return; // out of memory
2112         }
2113         newElem->skeletonWasSpecified = skeletonWasSpecified;
2114         if (baseChar >= LOW_A) {
2115             boot[26 + (baseChar - LOW_A)] = newElem.orphan(); // the boot array now owns the PtnElem.
2116         }
2117         else {
2118             boot[baseChar - CAP_A] = newElem.orphan(); // the boot array now owns the PtnElem.
2119         }
2120     }
2121     if ( baseElem != nullptr ) {
2122         curElem = getDuplicateElem(basePattern, skeleton, baseElem);
2123 
2124         if (curElem == nullptr) {
2125             // add new element to the list.
2126             curElem = baseElem;
2127             while( curElem -> next != nullptr )
2128             {
2129                 curElem = curElem->next.getAlias();
2130             }
2131 
2132             LocalPointer<PtnElem> newElem(new PtnElem(basePattern, value), status);
2133             if (U_FAILURE(status)) {
2134                 return; // out of memory
2135             }
2136             newElem->skeleton.adoptInsteadAndCheckErrorCode(new PtnSkeleton(skeleton), status);
2137             if (U_FAILURE(status)) {
2138                 return; // out of memory
2139             }
2140             newElem->skeletonWasSpecified = skeletonWasSpecified;
2141             curElem->next.adoptInstead(newElem.orphan());
2142             curElem = curElem->next.getAlias();
2143         }
2144         else {
2145             // Pattern exists in the list already.
2146             if ( !isDupAllowed ) {
2147                 return;
2148             }
2149             // Overwrite the value.
2150             curElem->pattern = value;
2151             // It was a bug that we were not doing the following previously,
2152             // though that bug hid other problems by making things partly work.
2153             curElem->skeletonWasSpecified = skeletonWasSpecified;
2154         }
2155     }
2156 }  // PatternMap::add
2157 
2158 // Find the pattern from the given basePattern string.
2159 const UnicodeString *
getPatternFromBasePattern(const UnicodeString & basePattern,UBool & skeletonWasSpecified) const2160 PatternMap::getPatternFromBasePattern(const UnicodeString& basePattern, UBool& skeletonWasSpecified) const { // key to search for
2161    PtnElem *curElem;
2162 
2163    if ((curElem=getHeader(basePattern.charAt(0)))==nullptr) {
2164        return nullptr;  // no match
2165    }
2166 
2167    do  {
2168        if ( basePattern.compare(curElem->basePattern)==0 ) {
2169           skeletonWasSpecified = curElem->skeletonWasSpecified;
2170           return &(curElem->pattern);
2171        }
2172        curElem = curElem->next.getAlias();
2173    } while (curElem != nullptr);
2174 
2175    return nullptr;
2176 }  // PatternMap::getFromBasePattern
2177 
2178 
2179 // Find the pattern from the given skeleton.
2180 // At least when this is called from getBestRaw & addPattern (in which case specifiedSkeletonPtr is non-NULL),
2181 // the comparison should be based on skeleton.original (which is unique and tied to the distance measurement in bestRaw)
2182 // and not skeleton.baseOriginal (which is not unique); otherwise we may pick a different skeleton than the one with the
2183 // optimum distance value in getBestRaw. When this is called from public getRedundants (specifiedSkeletonPtr is NULL),
2184 // for now it will continue to compare based on baseOriginal so as not to change the behavior unnecessarily.
2185 const UnicodeString *
getPatternFromSkeleton(const PtnSkeleton & skeleton,const PtnSkeleton ** specifiedSkeletonPtr) const2186 PatternMap::getPatternFromSkeleton(const PtnSkeleton& skeleton, const PtnSkeleton** specifiedSkeletonPtr) const { // key to search for
2187    PtnElem *curElem;
2188 
2189    if (specifiedSkeletonPtr) {
2190        *specifiedSkeletonPtr = nullptr;
2191    }
2192 
2193    // find boot entry
2194    UChar baseChar = skeleton.getFirstChar();
2195    if ((curElem=getHeader(baseChar))==nullptr) {
2196        return nullptr;  // no match
2197    }
2198 
2199    do  {
2200        UBool equal;
2201        if (specifiedSkeletonPtr != nullptr) { // called from DateTimePatternGenerator::getBestRaw or addPattern, use original
2202            equal = curElem->skeleton->original == skeleton.original;
2203        } else { // called from DateTimePatternGenerator::getRedundants, use baseOriginal
2204            equal = curElem->skeleton->baseOriginal == skeleton.baseOriginal;
2205        }
2206        if (equal) {
2207            if (specifiedSkeletonPtr && curElem->skeletonWasSpecified) {
2208                *specifiedSkeletonPtr = curElem->skeleton.getAlias();
2209            }
2210            return &(curElem->pattern);
2211        }
2212        curElem = curElem->next.getAlias();
2213    } while (curElem != nullptr);
2214 
2215    return nullptr;
2216 }
2217 
2218 UBool
equals(const PatternMap & other) const2219 PatternMap::equals(const PatternMap& other) const {
2220     if ( this==&other ) {
2221         return true;
2222     }
2223     for (int32_t bootIndex = 0; bootIndex < MAX_PATTERN_ENTRIES; ++bootIndex) {
2224         if (boot[bootIndex] == other.boot[bootIndex]) {
2225             continue;
2226         }
2227         if ((boot[bootIndex] == nullptr) || (other.boot[bootIndex] == nullptr)) {
2228             return false;
2229         }
2230         PtnElem *otherElem = other.boot[bootIndex];
2231         PtnElem *myElem = boot[bootIndex];
2232         while ((otherElem != nullptr) || (myElem != nullptr)) {
2233             if ( myElem == otherElem ) {
2234                 break;
2235             }
2236             if ((otherElem == nullptr) || (myElem == nullptr)) {
2237                 return false;
2238             }
2239             if ( (myElem->basePattern != otherElem->basePattern) ||
2240                  (myElem->pattern != otherElem->pattern) ) {
2241                 return false;
2242             }
2243             if ((myElem->skeleton.getAlias() != otherElem->skeleton.getAlias()) &&
2244                 !myElem->skeleton->equals(*(otherElem->skeleton))) {
2245                 return false;
2246             }
2247             myElem = myElem->next.getAlias();
2248             otherElem = otherElem->next.getAlias();
2249         }
2250     }
2251     return true;
2252 }
2253 
2254 // find any key existing in the mapping table already.
2255 // return true if there is an existing key, otherwise return false.
2256 PtnElem*
getDuplicateElem(const UnicodeString & basePattern,const PtnSkeleton & skeleton,PtnElem * baseElem)2257 PatternMap::getDuplicateElem(
2258             const UnicodeString &basePattern,
2259             const PtnSkeleton &skeleton,
2260             PtnElem *baseElem) {
2261    PtnElem *curElem;
2262 
2263    if ( baseElem == nullptr ) {
2264          return nullptr;
2265    }
2266    else {
2267          curElem = baseElem;
2268    }
2269    do {
2270      if ( basePattern.compare(curElem->basePattern)==0 ) {
2271          UBool isEqual = true;
2272          for (int32_t i = 0; i < UDATPG_FIELD_COUNT; ++i) {
2273             if (curElem->skeleton->type[i] != skeleton.type[i] ) {
2274                 isEqual = false;
2275                 break;
2276             }
2277         }
2278         if (isEqual) {
2279             return curElem;
2280         }
2281      }
2282      curElem = curElem->next.getAlias();
2283    } while( curElem != nullptr );
2284 
2285    // end of the list
2286    return nullptr;
2287 
2288 }  // PatternMap::getDuplicateElem
2289 
DateTimeMatcher(void)2290 DateTimeMatcher::DateTimeMatcher(void) {
2291 }
2292 
~DateTimeMatcher()2293 DateTimeMatcher::~DateTimeMatcher() {}
2294 
DateTimeMatcher(const DateTimeMatcher & other)2295 DateTimeMatcher::DateTimeMatcher(const DateTimeMatcher& other) {
2296     copyFrom(other.skeleton);
2297 }
2298 
operator =(const DateTimeMatcher & other)2299 DateTimeMatcher& DateTimeMatcher::operator=(const DateTimeMatcher& other) {
2300     copyFrom(other.skeleton);
2301     return *this;
2302 }
2303 
2304 
2305 void
set(const UnicodeString & pattern,FormatParser * fp)2306 DateTimeMatcher::set(const UnicodeString& pattern, FormatParser* fp) {
2307     PtnSkeleton localSkeleton;
2308     return set(pattern, fp, localSkeleton);
2309 }
2310 
2311 void
set(const UnicodeString & pattern,FormatParser * fp,PtnSkeleton & skeletonResult)2312 DateTimeMatcher::set(const UnicodeString& pattern, FormatParser* fp, PtnSkeleton& skeletonResult) {
2313     int32_t i;
2314     for (i=0; i<UDATPG_FIELD_COUNT; ++i) {
2315         skeletonResult.type[i] = NONE;
2316     }
2317     skeletonResult.original.clear();
2318     skeletonResult.baseOriginal.clear();
2319     skeletonResult.addedDefaultDayPeriod = false;
2320 
2321     fp->set(pattern);
2322     for (i=0; i < fp->itemNumber; i++) {
2323         const UnicodeString& value = fp->items[i];
2324         // don't skip 'a' anymore, dayPeriod handled specially below
2325 
2326         if ( fp->isQuoteLiteral(value) ) {
2327             UnicodeString quoteLiteral;
2328             fp->getQuoteLiteral(quoteLiteral, &i);
2329             continue;
2330         }
2331         int32_t canonicalIndex = fp->getCanonicalIndex(value);
2332         if (canonicalIndex < 0) {
2333             continue;
2334         }
2335         const dtTypeElem *row = &dtTypes[canonicalIndex];
2336         int32_t field = row->field;
2337         skeletonResult.original.populate(field, value);
2338         UChar repeatChar = row->patternChar;
2339         int32_t repeatCount = row->minLen;
2340         skeletonResult.baseOriginal.populate(field, repeatChar, repeatCount);
2341         int16_t subField = row->type;
2342         if (row->type > 0) {
2343             U_ASSERT(value.length() < INT16_MAX);
2344             subField += static_cast<int16_t>(value.length());
2345         }
2346         skeletonResult.type[field] = subField;
2347     }
2348 
2349     // #20739, we have a skeleton with minutes and milliseconds, but no seconds
2350     //
2351     // Theoretically we would need to check and fix all fields with "gaps":
2352     // for example year-day (no month), month-hour (no day), and so on, All the possible field combinations.
2353     // Plus some smartness: year + hour => should we add month, or add day-of-year?
2354     // What about month + day-of-week, or month + am/pm indicator.
2355     // I think beyond a certain point we should not try to fix bad developer input and try guessing what they mean.
2356     // Garbage in, garbage out.
2357     if (!skeletonResult.original.isFieldEmpty(UDATPG_MINUTE_FIELD)
2358         && !skeletonResult.original.isFieldEmpty(UDATPG_FRACTIONAL_SECOND_FIELD)
2359         && skeletonResult.original.isFieldEmpty(UDATPG_SECOND_FIELD)) {
2360         // Force the use of seconds
2361         for (i = 0; dtTypes[i].patternChar != 0; i++) {
2362             if (dtTypes[i].field == UDATPG_SECOND_FIELD) {
2363                 // first entry for UDATPG_SECOND_FIELD
2364                 skeletonResult.original.populate(UDATPG_SECOND_FIELD, dtTypes[i].patternChar, dtTypes[i].minLen);
2365                 skeletonResult.baseOriginal.populate(UDATPG_SECOND_FIELD, dtTypes[i].patternChar, dtTypes[i].minLen);
2366                 // We add value.length, same as above, when type is first initialized.
2367                 // The value we want to "fake" here is "s", and 1 means "s".length()
2368                 int16_t subField = dtTypes[i].type;
2369                 skeletonResult.type[UDATPG_SECOND_FIELD] = (subField > 0) ? subField + 1 : subField;
2370                 break;
2371             }
2372         }
2373     }
2374 
2375     // #13183, handle special behavior for day period characters (a, b, B)
2376     if (!skeletonResult.original.isFieldEmpty(UDATPG_HOUR_FIELD)) {
2377         if (skeletonResult.original.getFieldChar(UDATPG_HOUR_FIELD)==LOW_H || skeletonResult.original.getFieldChar(UDATPG_HOUR_FIELD)==CAP_K) {
2378             // We have a skeleton with 12-hour-cycle format
2379             if (skeletonResult.original.isFieldEmpty(UDATPG_DAYPERIOD_FIELD)) {
2380                 // But we do not have a day period in the skeleton; add the default DAYPERIOD (currently "a")
2381                 for (i = 0; dtTypes[i].patternChar != 0; i++) {
2382                     if ( dtTypes[i].field == UDATPG_DAYPERIOD_FIELD ) {
2383                         // first entry for UDATPG_DAYPERIOD_FIELD
2384                         skeletonResult.original.populate(UDATPG_DAYPERIOD_FIELD, dtTypes[i].patternChar, dtTypes[i].minLen);
2385                         skeletonResult.baseOriginal.populate(UDATPG_DAYPERIOD_FIELD, dtTypes[i].patternChar, dtTypes[i].minLen);
2386                         skeletonResult.type[UDATPG_DAYPERIOD_FIELD] = dtTypes[i].type;
2387                         skeletonResult.addedDefaultDayPeriod = true;
2388                         break;
2389                     }
2390                 }
2391             }
2392         } else {
2393             // Skeleton has 24-hour-cycle hour format and has dayPeriod, delete dayPeriod (i.e. ignore it)
2394             skeletonResult.original.clearField(UDATPG_DAYPERIOD_FIELD);
2395             skeletonResult.baseOriginal.clearField(UDATPG_DAYPERIOD_FIELD);
2396             skeletonResult.type[UDATPG_DAYPERIOD_FIELD] = NONE;
2397         }
2398     }
2399     copyFrom(skeletonResult);
2400 }
2401 
2402 void
getBasePattern(UnicodeString & result)2403 DateTimeMatcher::getBasePattern(UnicodeString &result ) {
2404     result.remove(); // Reset the result first.
2405     skeleton.baseOriginal.appendTo(result);
2406 }
2407 
2408 UnicodeString
getPattern()2409 DateTimeMatcher::getPattern() {
2410     UnicodeString result;
2411     return skeleton.original.appendTo(result);
2412 }
2413 
2414 int32_t
getDistance(const DateTimeMatcher & other,int32_t includeMask,DistanceInfo & distanceInfo) const2415 DateTimeMatcher::getDistance(const DateTimeMatcher& other, int32_t includeMask, DistanceInfo& distanceInfo) const {
2416     int32_t result = 0;
2417     distanceInfo.clear();
2418     for (int32_t i=0; i<UDATPG_FIELD_COUNT; ++i ) {
2419         int32_t myType = (includeMask&(1<<i))==0 ? 0 : skeleton.type[i];
2420         int32_t otherType = other.skeleton.type[i];
2421         if (myType==otherType) {
2422             continue;
2423         }
2424         if (myType==0) {// and other is not
2425             result += EXTRA_FIELD;
2426             distanceInfo.addExtra(i);
2427         }
2428         else {
2429             if (otherType==0) {
2430                 result += MISSING_FIELD;
2431                 distanceInfo.addMissing(i);
2432             }
2433             else {
2434                 result += abs(myType - otherType);
2435             }
2436         }
2437 
2438     }
2439     return result;
2440 }
2441 
2442 void
copyFrom(const PtnSkeleton & newSkeleton)2443 DateTimeMatcher::copyFrom(const PtnSkeleton& newSkeleton) {
2444     skeleton.copyFrom(newSkeleton);
2445 }
2446 
2447 void
copyFrom()2448 DateTimeMatcher::copyFrom() {
2449     // same as clear
2450     skeleton.clear();
2451 }
2452 
2453 UBool
equals(const DateTimeMatcher * other) const2454 DateTimeMatcher::equals(const DateTimeMatcher* other) const {
2455     if (other==nullptr) { return false; }
2456     return skeleton.original == other->skeleton.original;
2457 }
2458 
2459 int32_t
getFieldMask() const2460 DateTimeMatcher::getFieldMask() const {
2461     int32_t result = 0;
2462 
2463     for (int32_t i=0; i<UDATPG_FIELD_COUNT; ++i) {
2464         if (skeleton.type[i]!=0) {
2465             result |= (1<<i);
2466         }
2467     }
2468     return result;
2469 }
2470 
2471 PtnSkeleton*
getSkeletonPtr()2472 DateTimeMatcher::getSkeletonPtr() {
2473     return &skeleton;
2474 }
2475 
FormatParser()2476 FormatParser::FormatParser () {
2477     status = START;
2478     itemNumber = 0;
2479 }
2480 
2481 
~FormatParser()2482 FormatParser::~FormatParser () {
2483 }
2484 
2485 
2486 // Find the next token with the starting position and length
2487 // Note: the startPos may
2488 FormatParser::TokenStatus
setTokens(const UnicodeString & pattern,int32_t startPos,int32_t * len)2489 FormatParser::setTokens(const UnicodeString& pattern, int32_t startPos, int32_t *len) {
2490     int32_t curLoc = startPos;
2491     if ( curLoc >= pattern.length()) {
2492         return DONE;
2493     }
2494     // check the current char is between A-Z or a-z
2495     do {
2496         UChar c=pattern.charAt(curLoc);
2497         if ( (c>=CAP_A && c<=CAP_Z) || (c>=LOW_A && c<=LOW_Z) ) {
2498            curLoc++;
2499         }
2500         else {
2501                startPos = curLoc;
2502                *len=1;
2503                return ADD_TOKEN;
2504         }
2505 
2506         if ( pattern.charAt(curLoc)!= pattern.charAt(startPos) ) {
2507             break;  // not the same token
2508         }
2509     } while(curLoc <= pattern.length());
2510     *len = curLoc-startPos;
2511     return ADD_TOKEN;
2512 }
2513 
2514 void
set(const UnicodeString & pattern)2515 FormatParser::set(const UnicodeString& pattern) {
2516     int32_t startPos = 0;
2517     TokenStatus result = START;
2518     int32_t len = 0;
2519     itemNumber = 0;
2520 
2521     do {
2522         result = setTokens( pattern, startPos, &len );
2523         if ( result == ADD_TOKEN )
2524         {
2525             items[itemNumber++] = UnicodeString(pattern, startPos, len );
2526             startPos += len;
2527         }
2528         else {
2529             break;
2530         }
2531     } while (result==ADD_TOKEN && itemNumber < MAX_DT_TOKEN);
2532 }
2533 
2534 int32_t
getCanonicalIndex(const UnicodeString & s,UBool strict)2535 FormatParser::getCanonicalIndex(const UnicodeString& s, UBool strict) {
2536     int32_t len = s.length();
2537     if (len == 0) {
2538         return -1;
2539     }
2540     UChar ch = s.charAt(0);
2541 
2542     // Verify that all are the same character.
2543     for (int32_t l = 1; l < len; l++) {
2544         if (ch != s.charAt(l)) {
2545             return -1;
2546         }
2547     }
2548     int32_t i = 0;
2549     int32_t bestRow = -1;
2550     while (dtTypes[i].patternChar != 0x0000) {
2551         if ( dtTypes[i].patternChar != ch ) {
2552             ++i;
2553             continue;
2554         }
2555         bestRow = i;
2556         if (dtTypes[i].patternChar != dtTypes[i+1].patternChar) {
2557             return i;
2558         }
2559         if (dtTypes[i+1].minLen <= len) {
2560             ++i;
2561             continue;
2562         }
2563         return i;
2564     }
2565     return strict ? -1 : bestRow;
2566 }
2567 
2568 UBool
isQuoteLiteral(const UnicodeString & s)2569 FormatParser::isQuoteLiteral(const UnicodeString& s) {
2570     return (UBool)(s.charAt(0) == SINGLE_QUOTE);
2571 }
2572 
2573 // This function assumes the current itemIndex points to the quote literal.
2574 // Please call isQuoteLiteral prior to this function.
2575 void
getQuoteLiteral(UnicodeString & quote,int32_t * itemIndex)2576 FormatParser::getQuoteLiteral(UnicodeString& quote, int32_t *itemIndex) {
2577     int32_t i = *itemIndex;
2578 
2579     quote.remove();
2580     if (items[i].charAt(0)==SINGLE_QUOTE) {
2581         quote += items[i];
2582         ++i;
2583     }
2584     while ( i < itemNumber ) {
2585         if ( items[i].charAt(0)==SINGLE_QUOTE ) {
2586             if ( (i+1<itemNumber) && (items[i+1].charAt(0)==SINGLE_QUOTE)) {
2587                 // two single quotes e.g. 'o''clock'
2588                 quote += items[i++];
2589                 quote += items[i++];
2590                 continue;
2591             }
2592             else {
2593                 quote += items[i];
2594                 break;
2595             }
2596         }
2597         else {
2598             quote += items[i];
2599         }
2600         ++i;
2601     }
2602     *itemIndex=i;
2603 }
2604 
2605 UBool
isPatternSeparator(const UnicodeString & field) const2606 FormatParser::isPatternSeparator(const UnicodeString& field) const {
2607     for (int32_t i=0; i<field.length(); ++i ) {
2608         UChar c= field.charAt(i);
2609         if ( (c==SINGLE_QUOTE) || (c==BACKSLASH) || (c==SPACE) || (c==COLON) ||
2610              (c==QUOTATION_MARK) || (c==COMMA) || (c==HYPHEN) ||(items[i].charAt(0)==DOT) ) {
2611             continue;
2612         }
2613         else {
2614             return false;
2615         }
2616     }
2617     return true;
2618 }
2619 
~DistanceInfo()2620 DistanceInfo::~DistanceInfo() {}
2621 
2622 void
setTo(const DistanceInfo & other)2623 DistanceInfo::setTo(const DistanceInfo& other) {
2624     missingFieldMask = other.missingFieldMask;
2625     extraFieldMask= other.extraFieldMask;
2626 }
2627 
PatternMapIterator(UErrorCode & status)2628 PatternMapIterator::PatternMapIterator(UErrorCode& status) :
2629     bootIndex(0), nodePtr(nullptr), matcher(nullptr), patternMap(nullptr)
2630 {
2631     if (U_FAILURE(status)) { return; }
2632     matcher.adoptInsteadAndCheckErrorCode(new DateTimeMatcher(), status);
2633 }
2634 
~PatternMapIterator()2635 PatternMapIterator::~PatternMapIterator() {
2636 }
2637 
2638 void
set(PatternMap & newPatternMap)2639 PatternMapIterator::set(PatternMap& newPatternMap) {
2640     this->patternMap=&newPatternMap;
2641 }
2642 
2643 PtnSkeleton*
getSkeleton() const2644 PatternMapIterator::getSkeleton() const {
2645     if ( nodePtr == nullptr ) {
2646         return nullptr;
2647     }
2648     else {
2649         return nodePtr->skeleton.getAlias();
2650     }
2651 }
2652 
2653 UBool
hasNext() const2654 PatternMapIterator::hasNext() const {
2655     int32_t headIndex = bootIndex;
2656     PtnElem *curPtr = nodePtr;
2657 
2658     if (patternMap==nullptr) {
2659         return false;
2660     }
2661     while ( headIndex < MAX_PATTERN_ENTRIES ) {
2662         if ( curPtr != nullptr ) {
2663             if ( curPtr->next != nullptr ) {
2664                 return true;
2665             }
2666             else {
2667                 headIndex++;
2668                 curPtr=nullptr;
2669                 continue;
2670             }
2671         }
2672         else {
2673             if ( patternMap->boot[headIndex] != nullptr ) {
2674                 return true;
2675             }
2676             else {
2677                 headIndex++;
2678                 continue;
2679             }
2680         }
2681     }
2682     return false;
2683 }
2684 
2685 DateTimeMatcher&
next()2686 PatternMapIterator::next() {
2687     while ( bootIndex < MAX_PATTERN_ENTRIES ) {
2688         if ( nodePtr != nullptr ) {
2689             if ( nodePtr->next != nullptr ) {
2690                 nodePtr = nodePtr->next.getAlias();
2691                 break;
2692             }
2693             else {
2694                 bootIndex++;
2695                 nodePtr=nullptr;
2696                 continue;
2697             }
2698         }
2699         else {
2700             if ( patternMap->boot[bootIndex] != nullptr ) {
2701                 nodePtr = patternMap->boot[bootIndex];
2702                 break;
2703             }
2704             else {
2705                 bootIndex++;
2706                 continue;
2707             }
2708         }
2709     }
2710     if (nodePtr!=nullptr) {
2711         matcher->copyFrom(*nodePtr->skeleton);
2712     }
2713     else {
2714         matcher->copyFrom();
2715     }
2716     return *matcher;
2717 }
2718 
2719 
SkeletonFields()2720 SkeletonFields::SkeletonFields() {
2721     // Set initial values to zero
2722     clear();
2723 }
2724 
clear()2725 void SkeletonFields::clear() {
2726     uprv_memset(chars, 0, sizeof(chars));
2727     uprv_memset(lengths, 0, sizeof(lengths));
2728 }
2729 
copyFrom(const SkeletonFields & other)2730 void SkeletonFields::copyFrom(const SkeletonFields& other) {
2731     uprv_memcpy(chars, other.chars, sizeof(chars));
2732     uprv_memcpy(lengths, other.lengths, sizeof(lengths));
2733 }
2734 
clearField(int32_t field)2735 void SkeletonFields::clearField(int32_t field) {
2736     chars[field] = 0;
2737     lengths[field] = 0;
2738 }
2739 
getFieldChar(int32_t field) const2740 UChar SkeletonFields::getFieldChar(int32_t field) const {
2741     return chars[field];
2742 }
2743 
getFieldLength(int32_t field) const2744 int32_t SkeletonFields::getFieldLength(int32_t field) const {
2745     return lengths[field];
2746 }
2747 
populate(int32_t field,const UnicodeString & value)2748 void SkeletonFields::populate(int32_t field, const UnicodeString& value) {
2749     populate(field, value.charAt(0), value.length());
2750 }
2751 
populate(int32_t field,UChar ch,int32_t length)2752 void SkeletonFields::populate(int32_t field, UChar ch, int32_t length) {
2753     chars[field] = (int8_t) ch;
2754     lengths[field] = (int8_t) length;
2755 }
2756 
isFieldEmpty(int32_t field) const2757 UBool SkeletonFields::isFieldEmpty(int32_t field) const {
2758     return lengths[field] == 0;
2759 }
2760 
appendTo(UnicodeString & string) const2761 UnicodeString& SkeletonFields::appendTo(UnicodeString& string) const {
2762     for (int32_t i = 0; i < UDATPG_FIELD_COUNT; ++i) {
2763         appendFieldTo(i, string);
2764     }
2765     return string;
2766 }
2767 
appendFieldTo(int32_t field,UnicodeString & string) const2768 UnicodeString& SkeletonFields::appendFieldTo(int32_t field, UnicodeString& string) const {
2769     UChar ch(chars[field]);
2770     int32_t length = (int32_t) lengths[field];
2771 
2772     for (int32_t i=0; i<length; i++) {
2773         string += ch;
2774     }
2775     return string;
2776 }
2777 
getFirstChar() const2778 UChar SkeletonFields::getFirstChar() const {
2779     for (int32_t i = 0; i < UDATPG_FIELD_COUNT; ++i) {
2780         if (lengths[i] != 0) {
2781             return chars[i];
2782         }
2783     }
2784     return '\0';
2785 }
2786 
2787 
PtnSkeleton()2788 PtnSkeleton::PtnSkeleton()
2789     : addedDefaultDayPeriod(false) {
2790 }
2791 
PtnSkeleton(const PtnSkeleton & other)2792 PtnSkeleton::PtnSkeleton(const PtnSkeleton& other) {
2793     copyFrom(other);
2794 }
2795 
copyFrom(const PtnSkeleton & other)2796 void PtnSkeleton::copyFrom(const PtnSkeleton& other) {
2797     uprv_memcpy(type, other.type, sizeof(type));
2798     original.copyFrom(other.original);
2799     baseOriginal.copyFrom(other.baseOriginal);
2800     addedDefaultDayPeriod = other.addedDefaultDayPeriod;
2801 }
2802 
clear()2803 void PtnSkeleton::clear() {
2804     uprv_memset(type, 0, sizeof(type));
2805     original.clear();
2806     baseOriginal.clear();
2807 }
2808 
2809 UBool
equals(const PtnSkeleton & other) const2810 PtnSkeleton::equals(const PtnSkeleton& other) const  {
2811     return (original == other.original)
2812         && (baseOriginal == other.baseOriginal)
2813         && (uprv_memcmp(type, other.type, sizeof(type)) == 0);
2814 }
2815 
2816 UnicodeString
getSkeleton() const2817 PtnSkeleton::getSkeleton() const {
2818     UnicodeString result;
2819     result = original.appendTo(result);
2820     int32_t pos;
2821     if (addedDefaultDayPeriod && (pos = result.indexOf(LOW_A)) >= 0) {
2822         // for backward compatibility: if DateTimeMatcher.set added a single 'a' that
2823         // was not in the provided skeleton, remove it here before returning skeleton.
2824         result.remove(pos, 1);
2825     }
2826     return result;
2827 }
2828 
2829 UnicodeString
getBaseSkeleton() const2830 PtnSkeleton::getBaseSkeleton() const {
2831     UnicodeString result;
2832     result = baseOriginal.appendTo(result);
2833     int32_t pos;
2834     if (addedDefaultDayPeriod && (pos = result.indexOf(LOW_A)) >= 0) {
2835         // for backward compatibility: if DateTimeMatcher.set added a single 'a' that
2836         // was not in the provided skeleton, remove it here before returning skeleton.
2837         result.remove(pos, 1);
2838     }
2839     return result;
2840 }
2841 
2842 UChar
getFirstChar() const2843 PtnSkeleton::getFirstChar() const {
2844     return baseOriginal.getFirstChar();
2845 }
2846 
~PtnSkeleton()2847 PtnSkeleton::~PtnSkeleton() {
2848 }
2849 
PtnElem(const UnicodeString & basePat,const UnicodeString & pat)2850 PtnElem::PtnElem(const UnicodeString &basePat, const UnicodeString &pat) :
2851     basePattern(basePat), skeleton(nullptr), pattern(pat), next(nullptr)
2852 {
2853 }
2854 
~PtnElem()2855 PtnElem::~PtnElem() {
2856 }
2857 
DTSkeletonEnumeration(PatternMap & patternMap,dtStrEnum type,UErrorCode & status)2858 DTSkeletonEnumeration::DTSkeletonEnumeration(PatternMap& patternMap, dtStrEnum type, UErrorCode& status) : fSkeletons(nullptr) {
2859     PtnElem  *curElem;
2860     PtnSkeleton *curSkeleton;
2861     UnicodeString s;
2862     int32_t bootIndex;
2863 
2864     pos=0;
2865     fSkeletons.adoptInsteadAndCheckErrorCode(new UVector(status), status);
2866     if (U_FAILURE(status)) {
2867         return;
2868     }
2869 
2870     for (bootIndex=0; bootIndex<MAX_PATTERN_ENTRIES; ++bootIndex ) {
2871         curElem = patternMap.boot[bootIndex];
2872         while (curElem!=nullptr) {
2873             switch(type) {
2874                 case DT_BASESKELETON:
2875                     s=curElem->basePattern;
2876                     break;
2877                 case DT_PATTERN:
2878                     s=curElem->pattern;
2879                     break;
2880                 case DT_SKELETON:
2881                     curSkeleton=curElem->skeleton.getAlias();
2882                     s=curSkeleton->getSkeleton();
2883                     break;
2884             }
2885             if ( !isCanonicalItem(s) ) {
2886                 LocalPointer<UnicodeString> newElem(s.clone(), status);
2887                 if (U_FAILURE(status)) {
2888                     return;
2889                 }
2890                 fSkeletons->addElement(newElem.getAlias(), status);
2891                 if (U_FAILURE(status)) {
2892                     fSkeletons.adoptInstead(nullptr);
2893                     return;
2894                 }
2895                 newElem.orphan(); // fSkeletons vector now owns the UnicodeString (although it
2896                                   // does not use a deleter function to manage the ownership).
2897             }
2898             curElem = curElem->next.getAlias();
2899         }
2900     }
2901     if ((bootIndex==MAX_PATTERN_ENTRIES) && (curElem!=nullptr) ) {
2902         status = U_BUFFER_OVERFLOW_ERROR;
2903     }
2904 }
2905 
2906 const UnicodeString*
snext(UErrorCode & status)2907 DTSkeletonEnumeration::snext(UErrorCode& status) {
2908     if (U_SUCCESS(status) && fSkeletons.isValid() && pos < fSkeletons->size()) {
2909         return (const UnicodeString*)fSkeletons->elementAt(pos++);
2910     }
2911     return nullptr;
2912 }
2913 
2914 void
reset(UErrorCode &)2915 DTSkeletonEnumeration::reset(UErrorCode& /*status*/) {
2916     pos=0;
2917 }
2918 
2919 int32_t
count(UErrorCode &) const2920 DTSkeletonEnumeration::count(UErrorCode& /*status*/) const {
2921    return (fSkeletons.isNull()) ? 0 : fSkeletons->size();
2922 }
2923 
2924 UBool
isCanonicalItem(const UnicodeString & item)2925 DTSkeletonEnumeration::isCanonicalItem(const UnicodeString& item) {
2926     if ( item.length() != 1 ) {
2927         return false;
2928     }
2929     for (int32_t i=0; i<UDATPG_FIELD_COUNT; ++i) {
2930         if (item.charAt(0)==Canonical_Items[i]) {
2931             return true;
2932         }
2933     }
2934     return false;
2935 }
2936 
~DTSkeletonEnumeration()2937 DTSkeletonEnumeration::~DTSkeletonEnumeration() {
2938     UnicodeString *s;
2939     if (fSkeletons.isValid()) {
2940         for (int32_t i = 0; i < fSkeletons->size(); ++i) {
2941             if ((s = (UnicodeString *)fSkeletons->elementAt(i)) != nullptr) {
2942                 delete s;
2943             }
2944         }
2945     }
2946 }
2947 
DTRedundantEnumeration()2948 DTRedundantEnumeration::DTRedundantEnumeration() : pos(0), fPatterns(nullptr) {
2949 }
2950 
2951 void
add(const UnicodeString & pattern,UErrorCode & status)2952 DTRedundantEnumeration::add(const UnicodeString& pattern, UErrorCode& status) {
2953     if (U_FAILURE(status)) { return; }
2954     if (fPatterns.isNull())  {
2955         fPatterns.adoptInsteadAndCheckErrorCode(new UVector(status), status);
2956         if (U_FAILURE(status)) {
2957             return;
2958        }
2959     }
2960     LocalPointer<UnicodeString> newElem(new UnicodeString(pattern), status);
2961     if (U_FAILURE(status)) {
2962         return;
2963     }
2964     fPatterns->addElement(newElem.getAlias(), status);
2965     if (U_FAILURE(status)) {
2966         fPatterns.adoptInstead(nullptr);
2967         return;
2968     }
2969     newElem.orphan(); // fPatterns now owns the string, although a UVector
2970                       // deleter function is not used to manage that ownership.
2971 }
2972 
2973 const UnicodeString*
snext(UErrorCode & status)2974 DTRedundantEnumeration::snext(UErrorCode& status) {
2975     if (U_SUCCESS(status) && fPatterns.isValid() && pos < fPatterns->size()) {
2976         return (const UnicodeString*)fPatterns->elementAt(pos++);
2977     }
2978     return nullptr;
2979 }
2980 
2981 void
reset(UErrorCode &)2982 DTRedundantEnumeration::reset(UErrorCode& /*status*/) {
2983     pos=0;
2984 }
2985 
2986 int32_t
count(UErrorCode &) const2987 DTRedundantEnumeration::count(UErrorCode& /*status*/) const {
2988     return (fPatterns.isNull()) ? 0 : fPatterns->size();
2989 }
2990 
2991 UBool
isCanonicalItem(const UnicodeString & item) const2992 DTRedundantEnumeration::isCanonicalItem(const UnicodeString& item) const {
2993     if ( item.length() != 1 ) {
2994         return false;
2995     }
2996     for (int32_t i=0; i<UDATPG_FIELD_COUNT; ++i) {
2997         if (item.charAt(0)==Canonical_Items[i]) {
2998             return true;
2999         }
3000     }
3001     return false;
3002 }
3003 
~DTRedundantEnumeration()3004 DTRedundantEnumeration::~DTRedundantEnumeration() {
3005     UnicodeString *s;
3006     if (fPatterns.isValid()) {
3007         for (int32_t i = 0; i < fPatterns->size(); ++i) {
3008             if ((s = (UnicodeString *)fPatterns->elementAt(i)) != nullptr) {
3009                 delete s;
3010             }
3011         }
3012     }
3013 }
3014 
3015 U_NAMESPACE_END
3016 
3017 
3018 #endif /* #if !UCONFIG_NO_FORMATTING */
3019 
3020 //eof
3021