• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 
9 #include "cintltst.h"
10 #include "unicode/ures.h"
11 #include "unicode/ucurr.h"
12 #include "unicode/ustring.h"
13 #include "unicode/uset.h"
14 #include "unicode/udat.h"
15 #include "unicode/uscript.h"
16 #include "unicode/ulocdata.h"
17 #include "unicode/utf16.h"
18 #include "cmemory.h"
19 #include "cstring.h"
20 #include "locmap.h"
21 #include "uresimp.h"
22 
23 /*
24 returns a new UnicodeSet that is a flattened form of the original
25 UnicodeSet.
26 */
27 static USet*
createFlattenSet(USet * origSet,UErrorCode * status)28 createFlattenSet(USet *origSet, UErrorCode *status) {
29 
30 
31     USet *newSet = NULL;
32     int32_t origItemCount = 0;
33     int32_t idx, graphmeSize;
34     UChar32 start, end;
35     UChar graphme[64];
36     if (U_FAILURE(*status)) {
37         log_err("createFlattenSet called with %s\n", u_errorName(*status));
38         return NULL;
39     }
40     newSet = uset_open(1, 0);
41     origItemCount = uset_getItemCount(origSet);
42     for (idx = 0; idx < origItemCount; idx++) {
43         graphmeSize = uset_getItem(origSet, idx,
44             &start, &end,
45             graphme, UPRV_LENGTHOF(graphme),
46             status);
47         if (U_FAILURE(*status)) {
48             log_err("ERROR: uset_getItem returned %s\n", u_errorName(*status));
49             *status = U_ZERO_ERROR;
50         }
51         if (graphmeSize) {
52             uset_addAllCodePoints(newSet, graphme, graphmeSize);
53         }
54         else {
55             uset_addRange(newSet, start, end);
56         }
57     }
58     uset_closeOver(newSet,USET_CASE_INSENSITIVE);
59     return newSet;
60 }
61 
62 static UBool
isCurrencyPreEuro(const char * currencyKey)63 isCurrencyPreEuro(const char* currencyKey){
64     if( strcmp(currencyKey, "PTE") == 0 ||
65         strcmp(currencyKey, "ESP") == 0 ||
66         strcmp(currencyKey, "LUF") == 0 ||
67         strcmp(currencyKey, "GRD") == 0 ||
68         strcmp(currencyKey, "BEF") == 0 ||
69         strcmp(currencyKey, "ITL") == 0 ||
70         strcmp(currencyKey, "EEK") == 0){
71             return TRUE;
72     }
73     return FALSE;
74 }
75 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
76 static void
TestKeyInRootRecursive(UResourceBundle * root,const char * rootName,UResourceBundle * currentBundle,const char * locale)77 TestKeyInRootRecursive(UResourceBundle *root, const char *rootName,
78                        UResourceBundle *currentBundle, const char *locale) {
79     UErrorCode errorCode = U_ZERO_ERROR;
80     UResourceBundle *subRootBundle = NULL, *subBundle = NULL, *arr = NULL;
81 
82     ures_resetIterator(root);
83     ures_resetIterator(currentBundle);
84     while (ures_hasNext(currentBundle)) {
85         const char *subBundleKey = NULL;
86         const char *currentBundleKey = NULL;
87 
88         errorCode = U_ZERO_ERROR;
89         currentBundleKey = ures_getKey(currentBundle);
90         (void)currentBundleKey;    /* Suppress set but not used warning. */
91         subBundle = ures_getNextResource(currentBundle, NULL, &errorCode);
92         if (U_FAILURE(errorCode)) {
93             log_err("Can't open a resource for lnocale %s. Error: %s\n", locale, u_errorName(errorCode));
94             continue;
95         }
96         subBundleKey = ures_getKey(subBundle);
97 
98 
99         subRootBundle = ures_getByKey(root, subBundleKey, NULL, &errorCode);
100         if (U_FAILURE(errorCode)) {
101             log_err("Can't open a resource with key \"%s\" in \"%s\" from %s for locale \"%s\"\n",
102                     subBundleKey,
103                     ures_getKey(currentBundle),
104                     rootName,
105                     locale);
106             ures_close(subBundle);
107             continue;
108         }
109         if (ures_getType(subRootBundle) != ures_getType(subBundle)) {
110             log_err("key \"%s\" in \"%s\" has a different type from root for locale \"%s\"\n"
111                     "\troot=%d, locale=%d\n",
112                     subBundleKey,
113                     ures_getKey(currentBundle),
114                     locale,
115                     ures_getType(subRootBundle),
116                     ures_getType(subBundle));
117             ures_close(subBundle);
118             continue;
119         }
120         else if (ures_getType(subBundle) == URES_INT_VECTOR) {
121             int32_t minSize;
122             int32_t subBundleSize;
123             int32_t idx;
124             UBool sameArray = TRUE;
125             const int32_t *subRootBundleArr = ures_getIntVector(subRootBundle, &minSize, &errorCode);
126             const int32_t *subBundleArr = ures_getIntVector(subBundle, &subBundleSize, &errorCode);
127 
128             if (minSize > subBundleSize) {
129                 minSize = subBundleSize;
130                 log_err("Arrays are different size with key \"%s\" in \"%s\" from root for locale \"%s\"\n",
131                         subBundleKey,
132                         ures_getKey(currentBundle),
133                         locale);
134             }
135 
136             for (idx = 0; idx < minSize && sameArray; idx++) {
137                 if (subRootBundleArr[idx] != subBundleArr[idx]) {
138                     sameArray = FALSE;
139                 }
140                 if (strcmp(subBundleKey, "DateTimeElements") == 0
141                     && (subBundleArr[idx] < 1 || 7 < subBundleArr[idx]))
142                 {
143                     log_err("Value out of range with key \"%s\" at index %d in \"%s\" for locale \"%s\"\n",
144                             subBundleKey,
145                             idx,
146                             ures_getKey(currentBundle),
147                             locale);
148                 }
149             }
150             /* Special exception es_US and DateTimeElements */
151             if (sameArray
152                 && !(strcmp(locale, "es_US") == 0 && strcmp(subBundleKey, "DateTimeElements") == 0))
153             {
154                 log_err("Integer vectors are the same with key \"%s\" in \"%s\" from root for locale \"%s\"\n",
155                         subBundleKey,
156                         ures_getKey(currentBundle),
157                         locale);
158             }
159         }
160         else if (ures_getType(subBundle) == URES_ARRAY) {
161             UResourceBundle *subSubBundle = ures_getByIndex(subBundle, 0, NULL, &errorCode);
162             UResourceBundle *subSubRootBundle = ures_getByIndex(subRootBundle, 0, NULL, &errorCode);
163 
164             if (U_SUCCESS(errorCode)
165                 && (ures_getType(subSubBundle) == URES_ARRAY || ures_getType(subSubRootBundle) == URES_ARRAY))
166             {
167                 /* Here is one of the recursive parts */
168                 TestKeyInRootRecursive(subRootBundle, rootName, subBundle, locale);
169             }
170             else {
171                 int32_t minSize = ures_getSize(subRootBundle);
172                 int32_t idx;
173                 UBool sameArray = TRUE;
174 
175                 if (minSize > ures_getSize(subBundle)) {
176                     minSize = ures_getSize(subBundle);
177                 }
178 
179                 if ((subBundleKey == NULL
180                     || (subBundleKey != NULL &&  strcmp(subBundleKey, "LocaleScript") != 0 && !isCurrencyPreEuro(subBundleKey)))
181                     && ures_getSize(subRootBundle) != ures_getSize(subBundle))
182                 {
183                     log_err("Different size array with key \"%s\" in \"%s\" from root for locale \"%s\"\n"
184                             "\troot array size=%d, locale array size=%d\n",
185                             subBundleKey,
186                             ures_getKey(currentBundle),
187                             locale,
188                             ures_getSize(subRootBundle),
189                             ures_getSize(subBundle));
190                 }
191                 /*
192                 if(isCurrencyPreEuro(subBundleKey) && ures_getSize(subBundle)!=3){
193                     log_err("Different size array with key \"%s\" in \"%s\" for locale \"%s\" the expected size is 3 got size=%d\n",
194                             subBundleKey,
195                             ures_getKey(currentBundle),
196                             locale,
197                             ures_getSize(subBundle));
198                 }
199                 */
200                 for (idx = 0; idx < minSize; idx++) {
201                     int32_t rootStrLen, localeStrLen;
202                     const UChar *rootStr = ures_getStringByIndex(subRootBundle,idx,&rootStrLen,&errorCode);
203                     const UChar *localeStr = ures_getStringByIndex(subBundle,idx,&localeStrLen,&errorCode);
204                     if (rootStr && localeStr && U_SUCCESS(errorCode)) {
205                         if (u_strcmp(rootStr, localeStr) != 0) {
206                             sameArray = FALSE;
207                         }
208                     }
209                     else {
210                         if ( rootStrLen > 1 && rootStr[0] == 0x41 && rootStr[1] >= 0x30 && rootStr[1] <= 0x39 ) {
211                            /* A2 or A4 in the root string indicates that the resource can optionally be an array instead of a */
212                            /* string.  Attempt to read it as an array. */
213                           errorCode = U_ZERO_ERROR;
214                           arr = ures_getByIndex(subBundle,idx,NULL,&errorCode);
215                           if (U_FAILURE(errorCode)) {
216                               log_err("Got a NULL string with key \"%s\" in \"%s\" at index %d for root or locale \"%s\"\n",
217                                       subBundleKey,
218                                       ures_getKey(currentBundle),
219                                       idx,
220                                       locale);
221                               continue;
222                           }
223                           if (ures_getType(arr) != URES_ARRAY || ures_getSize(arr) != (int32_t)rootStr[1] - 0x30) {
224                               log_err("Got something other than a string or array of size %d for key \"%s\" in \"%s\" at index %d for root or locale \"%s\"\n",
225                                       rootStr[1] - 0x30,
226                                       subBundleKey,
227                                       ures_getKey(currentBundle),
228                                       idx,
229                                       locale);
230                               ures_close(arr);
231                               continue;
232                           }
233                           localeStr = ures_getStringByIndex(arr,0,&localeStrLen,&errorCode);
234                           ures_close(arr);
235                           if (U_FAILURE(errorCode)) {
236                               log_err("Got something other than a string or array for key \"%s\" in \"%s\" at index %d for root or locale \"%s\"\n",
237                                       subBundleKey,
238                                       ures_getKey(currentBundle),
239                                       idx,
240                                       locale);
241                               continue;
242                           }
243                         } else {
244                             log_err("Got a NULL string with key \"%s\" in \"%s\" at index %d for root or locale \"%s\"\n",
245                                 subBundleKey,
246                                 ures_getKey(currentBundle),
247                                 idx,
248                                 locale);
249                             continue;
250                         }
251                     }
252                     if (localeStr[0] == (UChar)0x20) {
253                         log_err("key \"%s\" at index %d in \"%s\" starts with a space in locale \"%s\"\n",
254                                 subBundleKey,
255                                 idx,
256                                 ures_getKey(currentBundle),
257                                 locale);
258                     }
259                     else if ((localeStr[localeStrLen - 1] == (UChar)0x20) && (strcmp(subBundleKey,"separator") != 0)) {
260                         log_err("key \"%s\" at index %d in \"%s\" ends with a space in locale \"%s\"\n",
261                                 subBundleKey,
262                                 idx,
263                                 ures_getKey(currentBundle),
264                                 locale);
265                     }
266                     else if (subBundleKey != NULL
267                         && strcmp(subBundleKey, "DateTimePatterns") == 0)
268                     {
269                         int32_t quoted = 0;
270                         const UChar *localeStrItr = localeStr;
271                         while (*localeStrItr) {
272                             if (*localeStrItr == (UChar)0x27 /* ' */) {
273                                 quoted++;
274                             }
275                             else if ((quoted % 2) == 0) {
276                                 /* Search for unquoted characters */
277                                 if (4 <= idx && idx <= 7
278                                     && (*localeStrItr == (UChar)0x6B /* k */
279                                     || *localeStrItr == (UChar)0x48 /* H */
280                                     || *localeStrItr == (UChar)0x6D /* m */
281                                     || *localeStrItr == (UChar)0x73 /* s */
282                                     || *localeStrItr == (UChar)0x53 /* S */
283                                     || *localeStrItr == (UChar)0x61 /* a */
284                                     || *localeStrItr == (UChar)0x68 /* h */
285                                     || *localeStrItr == (UChar)0x7A /* z */))
286                                 {
287                                     log_err("key \"%s\" at index %d has time pattern chars in date for locale \"%s\"\n",
288                                             subBundleKey,
289                                             idx,
290                                             locale);
291                                 }
292                                 else if (0 <= idx && idx <= 3
293                                     && (*localeStrItr == (UChar)0x47 /* G */
294                                     || *localeStrItr == (UChar)0x79 /* y */
295                                     || *localeStrItr == (UChar)0x4D /* M */
296                                     || *localeStrItr == (UChar)0x64 /* d */
297                                     || *localeStrItr == (UChar)0x45 /* E */
298                                     || *localeStrItr == (UChar)0x44 /* D */
299                                     || *localeStrItr == (UChar)0x46 /* F */
300                                     || *localeStrItr == (UChar)0x77 /* w */
301                                     || *localeStrItr == (UChar)0x57 /* W */))
302                                 {
303                                     log_err("key \"%s\" at index %d has date pattern chars in time for locale \"%s\"\n",
304                                             subBundleKey,
305                                             idx,
306                                             locale);
307                                 }
308                             }
309                             localeStrItr++;
310                         }
311                     }
312                     else if (idx == 4 && subBundleKey != NULL
313                         && strcmp(subBundleKey, "NumberElements") == 0
314                         && u_charDigitValue(localeStr[0]) != 0)
315                     {
316                         log_err("key \"%s\" at index %d has a non-zero based number for locale \"%s\"\n",
317                                 subBundleKey,
318                                 idx,
319                                 locale);
320                     }
321                 }
322                 (void)sameArray;    /* Suppress set but not used warning. */
323 /*                if (sameArray && strcmp(rootName, "root") == 0) {
324                     log_err("Arrays are the same with key \"%s\" in \"%s\" from root for locale \"%s\"\n",
325                             subBundleKey,
326                             ures_getKey(currentBundle),
327                             locale);
328                 }*/
329             }
330             ures_close(subSubBundle);
331             ures_close(subSubRootBundle);
332         }
333         else if (ures_getType(subBundle) == URES_STRING) {
334             int32_t len = 0;
335             const UChar *string = ures_getString(subBundle, &len, &errorCode);
336             if (U_FAILURE(errorCode) || string == NULL) {
337                 log_err("Can't open a string with key \"%s\" in \"%s\" for locale \"%s\"\n",
338                         subBundleKey,
339                         ures_getKey(currentBundle),
340                         locale);
341             } else if (string[0] == (UChar)0x20) {
342                 log_err("key \"%s\" in \"%s\" starts with a space in locale \"%s\"\n",
343                         subBundleKey,
344                         ures_getKey(currentBundle),
345                         locale);
346             /* localeDisplayPattern/separator can end with a space */
347             } else if (string[len - 1] == (UChar)0x20 && (strcmp(subBundleKey,"separator"))) {
348                 log_err("key \"%s\" in \"%s\" ends with a space in locale \"%s\"\n",
349                         subBundleKey,
350                         ures_getKey(currentBundle),
351                         locale);
352             } else if (strcmp(subBundleKey, "localPatternChars") == 0) {
353                 /* Note: We no longer import localPatternChars data starting
354                  * ICU 3.8.  So it never comes into this else if block. (ticket#5597)
355                  */
356 
357                 /* Check well-formedness of localPatternChars.  First, the
358                  * length must match the number of fields defined by
359                  * DateFormat.  Second, each character in the string must
360                  * be in the set [A-Za-z].  Finally, each character must be
361                  * unique.
362                  */
363                 int32_t i,j;
364 #if !UCONFIG_NO_FORMATTING
365                 if (len != UDAT_FIELD_COUNT) {
366                     log_err("key \"%s\" has the wrong number of characters in locale \"%s\"\n",
367                             subBundleKey,
368                             locale);
369                 }
370 #endif
371                 /* Check char validity. */
372                 for (i=0; i<len; ++i) {
373                     if (!((string[i] >= 65/*'A'*/ && string[i] <= 90/*'Z'*/) ||
374                           (string[i] >= 97/*'a'*/ && string[i] <= 122/*'z'*/))) {
375                         log_err("key \"%s\" has illegal character '%c' in locale \"%s\"\n",
376                                 subBundleKey,
377                                 (char) string[i],
378                                 locale);
379                     }
380                     /* Do O(n^2) check for duplicate chars. */
381                     for (j=0; j<i; ++j) {
382                         if (string[j] == string[i]) {
383                             log_err("key \"%s\" has duplicate character '%c' in locale \"%s\"\n",
384                                     subBundleKey,
385                                     (char) string[i],
386                                     locale);
387                         }
388                     }
389                 }
390             }
391             /* No fallback was done. Check for duplicate data */
392             /* The ures_* API does not do fallback of sub-resource bundles,
393                So we can't do this now. */
394 #if 0
395             else if (strcmp(locale, "root") != 0 && errorCode == U_ZERO_ERROR) {
396 
397                 const UChar *rootString = ures_getString(subRootBundle, &len, &errorCode);
398                 if (U_FAILURE(errorCode) || rootString == NULL) {
399                     log_err("Can't open a string with key \"%s\" in \"%s\" in root\n",
400                             ures_getKey(subRootBundle),
401                             ures_getKey(currentBundle));
402                     continue;
403                 } else if (u_strcmp(string, rootString) == 0) {
404                     if (strcmp(locale, "de_CH") != 0 && strcmp(subBundleKey, "Countries") != 0 &&
405                         strcmp(subBundleKey, "Version") != 0) {
406                         log_err("Found duplicate data with key \"%s\" in \"%s\" in locale \"%s\"\n",
407                                 ures_getKey(subRootBundle),
408                                 ures_getKey(currentBundle),
409                                 locale);
410                     }
411                     else {
412                         /* Ignore for now. */
413                         /* Can be fixed if fallback through de locale was done. */
414                         log_verbose("Skipping key %s in %s\n", subBundleKey, locale);
415                     }
416                 }
417             }
418 #endif
419         }
420         else if (ures_getType(subBundle) == URES_TABLE) {
421             if (strcmp(subBundleKey, "availableFormats")!=0) {
422                 /* Here is one of the recursive parts */
423                 TestKeyInRootRecursive(subRootBundle, rootName, subBundle, locale);
424             }
425             else {
426                 log_verbose("Skipping key %s in %s\n", subBundleKey, locale);
427             }
428         }
429         else if (ures_getType(subBundle) == URES_BINARY || ures_getType(subBundle) == URES_INT) {
430             /* Can't do anything to check it */
431             /* We'll assume it's all correct */
432             if (strcmp(subBundleKey, "MeasurementSystem") != 0) {
433                 log_verbose("Skipping key \"%s\" in \"%s\" for locale \"%s\"\n",
434                         subBundleKey,
435                         ures_getKey(currentBundle),
436                         locale);
437             }
438             /* Testing for MeasurementSystem is done in VerifyTranslation */
439         }
440         else {
441             log_err("Type %d for key \"%s\" in \"%s\" is unknown for locale \"%s\"\n",
442                     ures_getType(subBundle),
443                     subBundleKey,
444                     ures_getKey(currentBundle),
445                     locale);
446         }
447         ures_close(subRootBundle);
448         ures_close(subBundle);
449     }
450 }
451 #endif
452 
453 static void
testLCID(UResourceBundle * currentBundle,const char * localeName)454 testLCID(UResourceBundle *currentBundle,
455          const char *localeName)
456 {
457     (void)currentBundle; // suppress compiler warnings about unused variables
458     UErrorCode status = U_ZERO_ERROR;
459     uint32_t expectedLCID;
460     char lcidStringC[64] = {0};
461     int32_t len;
462 
463     expectedLCID = uloc_getLCID(localeName);
464     if (expectedLCID == 0) {
465         log_verbose("INFO:    %-5s does not have any LCID mapping\n",
466             localeName);
467         return;
468     }
469 
470     status = U_ZERO_ERROR;
471     len = uprv_convertToPosix(expectedLCID, lcidStringC, UPRV_LENGTHOF(lcidStringC) - 1, &status);
472     if (U_FAILURE(status)) {
473         log_err("ERROR:   %.4x does not have a POSIX mapping due to %s\n",
474             expectedLCID, u_errorName(status));
475     }
476     lcidStringC[len] = 0;
477 
478     if(strcmp(localeName, lcidStringC) != 0) {
479         char langName[1024];
480         char langLCID[1024];
481         uloc_getLanguage(localeName, langName, sizeof(langName), &status);
482         uloc_getLanguage(lcidStringC, langLCID, sizeof(langLCID), &status);
483 
484         if (strcmp(langName, langLCID) == 0) {
485             log_verbose("WARNING: %-5s resolves to %s (0x%.4x)\n",
486                 localeName, lcidStringC, expectedLCID);
487         }
488         else if (!(strcmp(localeName, "ku") == 0 && log_knownIssue("20181", "ICU-20181 Fix LCID mapping for ckb vs ku"))) {
489             log_err("ERROR:   %-5s has 0x%.4x and the number resolves wrongfully to %s\n",
490                 localeName, expectedLCID, lcidStringC);
491         }
492     }
493 }
494 
495 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
496 static void
TestLocaleStructure(void)497 TestLocaleStructure(void) {
498     // This test checks the locale structure against a key file located
499     // at source/test/testdata/structLocale.txt. When adding new data to
500     // a locale file such as en.txt, the structLocale.txt file must be changed
501     // too to include the the template of the new data. Otherwise this test
502     // will fail!
503 
504     UResourceBundle *root, *currentLocale;
505     int32_t locCount = uloc_countAvailable();
506     int32_t locIndex;
507     UErrorCode errorCode = U_ZERO_ERROR;
508     const char *currLoc, *resolvedLoc;
509 
510     /* TODO: Compare against parent's data too. This code can't handle fallbacks that some tools do already. */
511 /*    char locName[ULOC_FULLNAME_CAPACITY];
512     char *locNamePtr;
513 
514     for (locIndex = 0; locIndex < locCount; locIndex++) {
515         errorCode=U_ZERO_ERROR;
516         strcpy(locName, uloc_getAvailable(locIndex));
517         locNamePtr = strrchr(locName, '_');
518         if (locNamePtr) {
519             *locNamePtr = 0;
520         }
521         else {
522             strcpy(locName, "root");
523         }
524 
525         root = ures_openDirect(NULL, locName, &errorCode);
526         if(U_FAILURE(errorCode)) {
527             log_err("Can't open %s\n", locName);
528             continue;
529         }
530 */
531     if (locCount <= 1) {
532         log_data_err("At least root needs to be installed\n");
533     }
534 
535     root = ures_openDirect(loadTestData(&errorCode), "structLocale", &errorCode);
536     if(U_FAILURE(errorCode)) {
537         log_data_err("Can't open structLocale\n");
538         return;
539     }
540     for (locIndex = 0; locIndex < locCount; locIndex++) {
541         errorCode=U_ZERO_ERROR;
542         currLoc = uloc_getAvailable(locIndex);
543         currentLocale = ures_open(NULL, currLoc, &errorCode);
544         if(errorCode != U_ZERO_ERROR) {
545             if(U_SUCCESS(errorCode)) {
546                 /* It's installed, but there is no data.
547                    It's installed for the g18n white paper [grhoten] */
548                 log_err("ERROR: Locale %-5s not installed, and it should be, err %s\n",
549                     uloc_getAvailable(locIndex), u_errorName(errorCode));
550             } else {
551                 log_err("%%%%%%% Unexpected error %d in %s %%%%%%%",
552                     u_errorName(errorCode),
553                     uloc_getAvailable(locIndex));
554             }
555             ures_close(currentLocale);
556             continue;
557         }
558         ures_getStringByKey(currentLocale, "Version", NULL, &errorCode);
559         if(errorCode != U_ZERO_ERROR) {
560             log_err("No version information is available for locale %s, and it should be!\n",
561                 currLoc);
562         }
563         else if (ures_getStringByKey(currentLocale, "Version", NULL, &errorCode)[0] == (UChar)(0x78)) {
564             log_verbose("WARNING: The locale %s is experimental! It shouldn't be listed as an installed locale.\n",
565                 currLoc);
566         }
567         resolvedLoc = ures_getLocaleByType(currentLocale, ULOC_ACTUAL_LOCALE, &errorCode);
568         if (strcmp(resolvedLoc, currLoc) != 0) {
569             /* All locales have at least a Version resource.
570                If it's absolutely empty, then the previous test will fail too.*/
571             log_err("Locale resolves to different locale. Is %s an alias of %s?\n",
572                 currLoc, resolvedLoc);
573         }
574         TestKeyInRootRecursive(root, "root", currentLocale, currLoc);
575 
576         testLCID(currentLocale, currLoc);
577 
578         ures_close(currentLocale);
579     }
580 
581     ures_close(root);
582 }
583 #endif
584 
585 static void
compareArrays(const char * keyName,UResourceBundle * fromArray,const char * fromLocale,UResourceBundle * toArray,const char * toLocale,int32_t start,int32_t end)586 compareArrays(const char *keyName,
587               UResourceBundle *fromArray, const char *fromLocale,
588               UResourceBundle *toArray, const char *toLocale,
589               int32_t start, int32_t end)
590 {
591     int32_t fromSize = ures_getSize(fromArray);
592     int32_t toSize = ures_getSize(fromArray);
593     int32_t idx;
594     UErrorCode errorCode = U_ZERO_ERROR;
595 
596     if (fromSize > toSize) {
597         fromSize = toSize;
598         log_err("Arrays are different size from \"%s\" to \"%s\"\n",
599                 fromLocale,
600                 toLocale);
601     }
602 
603     for (idx = start; idx <= end; idx++) {
604         const UChar *fromBundleStr = ures_getStringByIndex(fromArray, idx, NULL, &errorCode);
605         const UChar *toBundleStr = ures_getStringByIndex(toArray, idx, NULL, &errorCode);
606         if (fromBundleStr && toBundleStr && u_strcmp(fromBundleStr, toBundleStr) != 0)
607         {
608             log_err("Difference for %s at index %d from %s= \"%s\" to %s= \"%s\"\n",
609                     keyName,
610                     idx,
611                     fromLocale,
612                     austrdup(fromBundleStr),
613                     toLocale,
614                     austrdup(toBundleStr));
615         }
616     }
617 }
618 
619 static void
compareConsistentCountryInfo(const char * fromLocale,const char * toLocale)620 compareConsistentCountryInfo(const char *fromLocale, const char *toLocale) {
621     UErrorCode errorCode = U_ZERO_ERROR;
622     UResourceBundle *fromArray, *toArray;
623     UResourceBundle *fromLocaleBund = ures_open(NULL, fromLocale, &errorCode);
624     UResourceBundle *toLocaleBund = ures_open(NULL, toLocale, &errorCode);
625     UResourceBundle *toCalendar, *fromCalendar, *toGregorian, *fromGregorian;
626 
627     if(U_FAILURE(errorCode)) {
628         log_err("Can't open resource bundle %s or %s - %s\n", fromLocale, toLocale, u_errorName(errorCode));
629         return;
630     }
631     fromCalendar = ures_getByKey(fromLocaleBund, "calendar", NULL, &errorCode);
632     fromGregorian = ures_getByKeyWithFallback(fromCalendar, "gregorian", NULL, &errorCode);
633 
634     toCalendar = ures_getByKey(toLocaleBund, "calendar", NULL, &errorCode);
635     toGregorian = ures_getByKeyWithFallback(toCalendar, "gregorian", NULL, &errorCode);
636 
637     fromArray = ures_getByKey(fromLocaleBund, "CurrencyElements", NULL, &errorCode);
638     toArray = ures_getByKey(toLocaleBund, "CurrencyElements", NULL, &errorCode);
639     if (strcmp(fromLocale, "en_CA") != 0)
640     {
641         /* The first one is probably localized. */
642         compareArrays("CurrencyElements", fromArray, fromLocale, toArray, toLocale, 1, 2);
643     }
644     ures_close(fromArray);
645     ures_close(toArray);
646 
647     fromArray = ures_getByKey(fromLocaleBund, "NumberPatterns", NULL, &errorCode);
648     toArray = ures_getByKey(toLocaleBund, "NumberPatterns", NULL, &errorCode);
649     if (strcmp(fromLocale, "en_CA") != 0)
650     {
651         compareArrays("NumberPatterns", fromArray, fromLocale, toArray, toLocale, 0, 3);
652     }
653     ures_close(fromArray);
654     ures_close(toArray);
655 
656     /* Difficult to test properly */
657 /*
658     fromArray = ures_getByKey(fromLocaleBund, "DateTimePatterns", NULL, &errorCode);
659     toArray = ures_getByKey(toLocaleBund, "DateTimePatterns", NULL, &errorCode);
660     {
661         compareArrays("DateTimePatterns", fromArray, fromLocale, toArray, toLocale);
662     }
663     ures_close(fromArray);
664     ures_close(toArray);*/
665 
666     fromArray = ures_getByKey(fromLocaleBund, "NumberElements", NULL, &errorCode);
667     toArray = ures_getByKey(toLocaleBund, "NumberElements", NULL, &errorCode);
668     if (strcmp(fromLocale, "en_CA") != 0)
669     {
670         compareArrays("NumberElements", fromArray, fromLocale, toArray, toLocale, 0, 3);
671         /* Index 4 is a script based 0 */
672         compareArrays("NumberElements", fromArray, fromLocale, toArray, toLocale, 5, 10);
673     }
674     ures_close(fromArray);
675     ures_close(toArray);
676     ures_close(fromCalendar);
677     ures_close(toCalendar);
678     ures_close(fromGregorian);
679     ures_close(toGregorian);
680 
681     ures_close(fromLocaleBund);
682     ures_close(toLocaleBund);
683 }
684 
685 static void
TestConsistentCountryInfo(void)686 TestConsistentCountryInfo(void) {
687 /*    UResourceBundle *fromLocale, *toLocale;*/
688     int32_t locCount = uloc_countAvailable();
689     int32_t fromLocIndex, toLocIndex;
690 
691     int32_t fromCountryLen, toCountryLen;
692     char fromCountry[ULOC_FULLNAME_CAPACITY], toCountry[ULOC_FULLNAME_CAPACITY];
693 
694     int32_t fromVariantLen, toVariantLen;
695     char fromVariant[ULOC_FULLNAME_CAPACITY], toVariant[ULOC_FULLNAME_CAPACITY];
696 
697     UErrorCode errorCode = U_ZERO_ERROR;
698 
699     for (fromLocIndex = 0; fromLocIndex < locCount; fromLocIndex++) {
700         const char *fromLocale = uloc_getAvailable(fromLocIndex);
701 
702         errorCode=U_ZERO_ERROR;
703         fromCountryLen = uloc_getCountry(fromLocale, fromCountry, ULOC_FULLNAME_CAPACITY, &errorCode);
704         if (fromCountryLen <= 0) {
705             /* Ignore countryless locales */
706             continue;
707         }
708         fromVariantLen = uloc_getVariant(fromLocale, fromVariant, ULOC_FULLNAME_CAPACITY, &errorCode);
709         if (fromVariantLen > 0) {
710             /* Most variants are ignorable like collation variants. */
711             continue;
712         }
713         /* Start comparing only after the current index.
714            Previous loop should have already compared fromLocIndex.
715         */
716         for (toLocIndex = fromLocIndex + 1; toLocIndex < locCount; toLocIndex++) {
717             const char *toLocale = uloc_getAvailable(toLocIndex);
718 
719             toCountryLen = uloc_getCountry(toLocale, toCountry, ULOC_FULLNAME_CAPACITY, &errorCode);
720             if(U_FAILURE(errorCode)) {
721                 log_err("Unknown failure fromLocale=%s toLocale=%s errorCode=%s\n",
722                     fromLocale, toLocale, u_errorName(errorCode));
723                 continue;
724             }
725 
726             if (toCountryLen <= 0) {
727                 /* Ignore countryless locales */
728                 continue;
729             }
730             toVariantLen = uloc_getVariant(toLocale, toVariant, ULOC_FULLNAME_CAPACITY, &errorCode);
731             if (toVariantLen > 0) {
732                 /* Most variants are ignorable like collation variants. */
733                 /* They're a variant for a reason. */
734                 continue;
735             }
736             if (strcmp(fromCountry, toCountry) == 0) {
737                 log_verbose("comparing fromLocale=%s toLocale=%s\n",
738                     fromLocale, toLocale);
739                 compareConsistentCountryInfo(fromLocale, toLocale);
740             }
741         }
742     }
743 }
744 
745 static int32_t
findStringSetMismatch(const char * currLoc,const UChar * string,int32_t langSize,USet * mergedExemplarSet,UBool ignoreNumbers,UChar32 * badCharPtr)746 findStringSetMismatch(const char *currLoc, const UChar *string, int32_t langSize,
747                       USet * mergedExemplarSet,
748                       UBool ignoreNumbers, UChar32* badCharPtr) {
749     UErrorCode errorCode = U_ZERO_ERROR;
750     USet *exemplarSet;
751     int32_t strIdx;
752     if (mergedExemplarSet == NULL) {
753         return -1;
754     }
755     exemplarSet = createFlattenSet(mergedExemplarSet, &errorCode);
756     if (U_FAILURE(errorCode)) {
757         log_err("%s: error createFlattenSet returned %s\n", currLoc, u_errorName(errorCode));
758         return -1;
759     }
760 
761     for (strIdx = 0; strIdx < langSize;) {
762         UChar32 testChar;
763         U16_NEXT(string, strIdx, langSize, testChar);
764         if (!uset_contains(exemplarSet, testChar)
765             && testChar != 0x0020 && testChar != 0x00A0 && testChar != 0x002e && testChar != 0x002c && testChar != 0x002d && testChar != 0x0027
766             && testChar != 0x005B && testChar != 0x005D && testChar != 0x2019 && testChar != 0x0f0b && testChar != 0x200C && testChar != 0x200D) {
767             if (!ignoreNumbers || (ignoreNumbers && (testChar < 0x30 || testChar > 0x39))) {
768                 uset_close(exemplarSet);
769                 if (badCharPtr) {
770                     *badCharPtr = testChar;
771                 }
772                 return strIdx;
773             }
774         }
775     }
776     uset_close(exemplarSet);
777     if (badCharPtr) {
778         *badCharPtr = 0;
779     }
780     return -1;
781 }
782 /* include non-invariant chars */
783 static int32_t
myUCharsToChars(const UChar * us,char * cs,int32_t len)784 myUCharsToChars(const UChar* us, char* cs, int32_t len){
785     int32_t i=0;
786     for(; i< len; i++){
787         if(us[i] < 0x7f){
788             cs[i] = (char)us[i];
789         }else{
790             return -1;
791         }
792     }
793     return i;
794 }
795 static void
findSetMatch(UScriptCode * scriptCodes,int32_t scriptsLen,USet * exemplarSet,const char * locale)796 findSetMatch( UScriptCode *scriptCodes, int32_t scriptsLen,
797               USet *exemplarSet,
798               const char  *locale){
799     USet *scripts[10]= {0};
800     char pattern[256] = { '[', ':', 0x000 };
801     int32_t patternLen;
802     UChar uPattern[256] = {0};
803     UErrorCode status = U_ZERO_ERROR;
804     int32_t i;
805 
806     /* create the sets with script codes */
807     for(i = 0; i<scriptsLen; i++){
808         strcat(pattern, uscript_getShortName(scriptCodes[i]));
809         strcat(pattern, ":]");
810         patternLen = (int32_t)strlen(pattern);
811         u_charsToUChars(pattern, uPattern, patternLen);
812         scripts[i] = uset_openPattern(uPattern, patternLen, &status);
813         if(U_FAILURE(status)){
814             log_err("Could not create set for pattern %s. Error: %s\n", pattern, u_errorName(status));
815             return;
816         }
817         pattern[2] = 0;
818     }
819     if (strcmp(locale, "uk") == 0 || strcmp(locale, "uk_UA") == 0) {
820         /* Special addition. Add the modifying apostrophe, which isn't in Cyrillic. */
821         uset_add(scripts[0], 0x2bc);
822     }
823     if(U_SUCCESS(status)){
824         UBool existsInScript = FALSE;
825         /* iterate over the exemplarSet and ascertain if all
826          * UChars in exemplarSet belong to the scripts returned
827          * by getScript
828          */
829         int32_t count = uset_getItemCount(exemplarSet);
830 
831         for( i=0; i < count; i++){
832             UChar32 start = 0;
833             UChar32 end = 0;
834             UChar *str = NULL;
835             int32_t strCapacity = 0;
836 
837             strCapacity = uset_getItem(exemplarSet, i, &start, &end, str, strCapacity, &status);
838             if(U_SUCCESS(status)){
839                 int32_t j;
840                 if(strCapacity == 0){
841                     /* ok the item is a range */
842                      for( j = 0; j < scriptsLen; j++){
843                         if(uset_containsRange(scripts[j], start, end) == TRUE){
844                             existsInScript = TRUE;
845                         }
846                     }
847                     if(existsInScript == FALSE){
848                         for( j = 0; j < scriptsLen; j++){
849                             UChar toPattern[500]={'\0'};
850                             char pat[500]={'\0'};
851                             int32_t len = uset_toPattern(scripts[j], toPattern, 500, TRUE, &status);
852                             len = myUCharsToChars(toPattern, pat, len);
853                             log_err("uset_indexOf(\\u%04X)=%i uset_indexOf(\\u%04X)=%i\n", start, uset_indexOf(scripts[0], start), end, uset_indexOf(scripts[0], end));
854                             if(len!=-1){
855                                 log_err("Pattern: %s\n",pat);
856                             }
857                         }
858                         log_err("ExemplarCharacters and LocaleScript containment test failed for locale %s. \n", locale);
859                     }
860                 }else{
861                     strCapacity++; /* increment for NUL termination */
862                     /* allocate the str and call the api again */
863                     str = (UChar*) malloc(U_SIZEOF_UCHAR * strCapacity);
864                     strCapacity =  uset_getItem(exemplarSet, i, &start, &end, str, strCapacity, &status);
865                     /* iterate over the scripts and figure out if the string contained is actually
866                      * in the script set
867                      */
868                     for( j = 0; j < scriptsLen; j++){
869                         if(uset_containsString(scripts[j],str, strCapacity) == TRUE){
870                             existsInScript = TRUE;
871                         }
872                     }
873                     if(existsInScript == FALSE){
874                         log_err("ExemplarCharacters and LocaleScript containment test failed for locale %s. \n", locale);
875                     }
876                 }
877             }
878         }
879 
880     }
881 
882     /* close the sets */
883     for(i = 0; i<scriptsLen; i++){
884         uset_close(scripts[i]);
885     }
886 }
887 
VerifyTranslation(void)888 static void VerifyTranslation(void) {
889     UResourceBundle *root, *currentLocale;
890     int32_t locCount = uloc_countAvailable();
891     int32_t locIndex;
892     UErrorCode errorCode = U_ZERO_ERROR;
893     const char *currLoc;
894     UScriptCode scripts[USCRIPT_CODE_LIMIT];
895     int32_t numScripts;
896     int32_t idx;
897     int32_t end;
898     UResourceBundle *resArray;
899 
900     if (locCount <= 1) {
901         log_data_err("At least root needs to be installed\n");
902     }
903 
904     root = ures_openDirect(NULL, "root", &errorCode);
905     if(U_FAILURE(errorCode)) {
906         log_data_err("Can't open root\n");
907         return;
908     }
909     for (locIndex = 0; locIndex < locCount; locIndex++) {
910         USet * mergedExemplarSet = NULL;
911         errorCode=U_ZERO_ERROR;
912         currLoc = uloc_getAvailable(locIndex);
913         currentLocale = ures_open(NULL, currLoc, &errorCode);
914         if(errorCode != U_ZERO_ERROR) {
915             if(U_SUCCESS(errorCode)) {
916                 /* It's installed, but there is no data.
917                    It's installed for the g18n white paper [grhoten] */
918                 log_err("ERROR: Locale %-5s not installed, and it should be!\n",
919                     uloc_getAvailable(locIndex));
920             } else {
921                 log_err("%%%%%%% Unexpected error %d in %s %%%%%%%",
922                     u_errorName(errorCode),
923                     uloc_getAvailable(locIndex));
924             }
925             ures_close(currentLocale);
926             continue;
927         }
928         {
929             UErrorCode exemplarStatus = U_ZERO_ERROR;
930             ULocaleData * uld = ulocdata_open(currLoc, &exemplarStatus);
931             if (U_SUCCESS(exemplarStatus)) {
932                 USet * exemplarSet = ulocdata_getExemplarSet(uld, NULL, USET_ADD_CASE_MAPPINGS, ULOCDATA_ES_STANDARD, &exemplarStatus);
933                 if (U_SUCCESS(exemplarStatus)) {
934                     mergedExemplarSet = uset_cloneAsThawed(exemplarSet);
935                     uset_close(exemplarSet);
936                     exemplarSet = ulocdata_getExemplarSet(uld, NULL, USET_ADD_CASE_MAPPINGS, ULOCDATA_ES_AUXILIARY, &exemplarStatus);
937                     if (U_SUCCESS(exemplarStatus)) {
938                         uset_addAll(mergedExemplarSet, exemplarSet);
939                         uset_close(exemplarSet);
940                     }
941                     exemplarStatus = U_ZERO_ERROR;
942                     exemplarSet = ulocdata_getExemplarSet(uld, NULL, 0, ULOCDATA_ES_PUNCTUATION, &exemplarStatus);
943                     if (U_SUCCESS(exemplarStatus)) {
944                         uset_addAll(mergedExemplarSet, exemplarSet);
945                         uset_close(exemplarSet);
946                     }
947                 } else {
948                     log_err("error ulocdata_getExemplarSet (main) for locale %s returned %s\n", currLoc, u_errorName(errorCode));
949                 }
950                 ulocdata_close(uld);
951             } else {
952                 log_err("error ulocdata_open for locale %s returned %s\n", currLoc, u_errorName(errorCode));
953             }
954         }
955         if (mergedExemplarSet == NULL /*|| (getTestOption(QUICK_OPTION) && uset_size() > 2048)*/) {
956             log_verbose("skipping test for %s\n", currLoc);
957         }
958         //else if (uprv_strncmp(currLoc,"bem",3) == 0 || uprv_strncmp(currLoc,"mgo",3) == 0 || uprv_strncmp(currLoc,"nl",2) == 0) {
959         //    log_verbose("skipping test for %s, some month and country names known to use aux exemplars\n", currLoc);
960         //}
961         else {
962             UChar langBuffer[128];
963             int32_t langSize;
964             int32_t strIdx;
965             UChar32 badChar;
966             langSize = uloc_getDisplayLanguage(currLoc, currLoc, langBuffer, UPRV_LENGTHOF(langBuffer), &errorCode);
967             if (U_FAILURE(errorCode)) {
968                 log_err("error uloc_getDisplayLanguage returned %s\n", u_errorName(errorCode));
969             }
970             else {
971                 strIdx = findStringSetMismatch(currLoc, langBuffer, langSize, mergedExemplarSet, FALSE, &badChar);
972                 if (strIdx >= 0) {
973                     log_err("getDisplayLanguage(%s) at index %d returned characters not in the exemplar characters: %04X.\n",
974                         currLoc, strIdx, badChar);
975                 }
976             }
977             langSize = uloc_getDisplayCountry(currLoc, currLoc, langBuffer, UPRV_LENGTHOF(langBuffer), &errorCode);
978             if (U_FAILURE(errorCode)) {
979                 log_err("error uloc_getDisplayCountry returned %s\n", u_errorName(errorCode));
980             }
981             {
982                 UResourceBundle* cal = ures_getByKey(currentLocale, "calendar", NULL, &errorCode);
983                 UResourceBundle* greg = ures_getByKeyWithFallback(cal, "gregorian", NULL, &errorCode);
984                 UResourceBundle* names = ures_getByKeyWithFallback(greg,  "dayNames", NULL, &errorCode);
985                 UResourceBundle* format = ures_getByKeyWithFallback(names,  "format", NULL, &errorCode);
986                 resArray = ures_getByKeyWithFallback(format,  "wide", NULL, &errorCode);
987 
988                 if (U_FAILURE(errorCode)) {
989                     log_err("error ures_getByKey returned %s\n", u_errorName(errorCode));
990                 }
991                 if (getTestOption(QUICK_OPTION)) {
992                     end = 1;
993                 }
994                 else {
995                     end = ures_getSize(resArray);
996                 }
997 
998                 if ((uprv_strncmp(currLoc,"lrc",3) == 0 || uprv_strncmp(currLoc,"mzn",3) == 0) &&
999                         log_knownIssue("cldrbug:8899", "lrc and mzn locales don't have translated day names")) {
1000                     end = 0;
1001                 }
1002 
1003                 for (idx = 0; idx < end; idx++) {
1004                     const UChar *fromBundleStr = ures_getStringByIndex(resArray, idx, &langSize, &errorCode);
1005                     if (U_FAILURE(errorCode)) {
1006                         log_err("error ures_getStringByIndex(%d) returned %s\n", idx, u_errorName(errorCode));
1007                         continue;
1008                     }
1009                     strIdx = findStringSetMismatch(currLoc, fromBundleStr, langSize, mergedExemplarSet, TRUE, &badChar);
1010                     if ( strIdx >= 0 ) {
1011                         log_err("getDayNames(%s, %d) at index %d returned characters not in the exemplar characters: %04X.\n",
1012                             currLoc, idx, strIdx, badChar);
1013                     }
1014                 }
1015                 ures_close(resArray);
1016                 ures_close(format);
1017                 ures_close(names);
1018 
1019                 names = ures_getByKeyWithFallback(greg, "monthNames", NULL, &errorCode);
1020                 format = ures_getByKeyWithFallback(names,"format", NULL, &errorCode);
1021                 resArray = ures_getByKeyWithFallback(format, "wide", NULL, &errorCode);
1022                 if (U_FAILURE(errorCode)) {
1023                     log_err("error ures_getByKey returned %s\n", u_errorName(errorCode));
1024                 }
1025                 if (getTestOption(QUICK_OPTION)) {
1026                     end = 1;
1027                 }
1028                 else {
1029                     end = ures_getSize(resArray);
1030                 }
1031 
1032                 for (idx = 0; idx < end; idx++) {
1033                     const UChar *fromBundleStr = ures_getStringByIndex(resArray, idx, &langSize, &errorCode);
1034                     if (U_FAILURE(errorCode)) {
1035                         log_err("error ures_getStringByIndex(%d) returned %s\n", idx, u_errorName(errorCode));
1036                         continue;
1037                     }
1038                     strIdx = findStringSetMismatch(currLoc, fromBundleStr, langSize, mergedExemplarSet, TRUE, &badChar);
1039                     if (strIdx >= 0) {
1040                         log_err("getMonthNames(%s, %d) at index %d returned characters not in the exemplar characters: %04X.\n",
1041                             currLoc, idx, strIdx, badChar);
1042                     }
1043                 }
1044                 ures_close(resArray);
1045                 ures_close(format);
1046                 ures_close(names);
1047                 ures_close(greg);
1048                 ures_close(cal);
1049             }
1050             errorCode = U_ZERO_ERROR;
1051             numScripts = uscript_getCode(currLoc, scripts, UPRV_LENGTHOF(scripts), &errorCode);
1052             if (strcmp(currLoc, "yi") == 0 && numScripts > 0 && log_knownIssue("11217", "Fix result of uscript_getCode for yi: USCRIPT_YI -> USCRIPT_HEBREW")) {
1053                 scripts[0] = USCRIPT_HEBREW;
1054             }
1055             if (numScripts == 0) {
1056                 log_err("uscript_getCode(%s) doesn't work.\n", currLoc);
1057             }else if(scripts[0] == USCRIPT_COMMON){
1058                 log_err("uscript_getCode(%s) returned USCRIPT_COMMON.\n", currLoc);
1059             }
1060 
1061             /* test that the scripts are a superset of exemplar characters. */
1062            {
1063                 ULocaleData *uld = ulocdata_open(currLoc,&errorCode);
1064                 USet *exemplarSet =  ulocdata_getExemplarSet(uld, NULL, 0, ULOCDATA_ES_STANDARD, &errorCode);
1065                 /* test if exemplar characters are part of script code */
1066                 findSetMatch(scripts, numScripts, exemplarSet, currLoc);
1067                 uset_close(exemplarSet);
1068                 ulocdata_close(uld);
1069             }
1070 
1071            /* test that the paperSize API works */
1072            {
1073                int32_t height=0, width=0;
1074                ulocdata_getPaperSize(currLoc, &height, &width, &errorCode);
1075                if(U_FAILURE(errorCode)){
1076                    log_err("ulocdata_getPaperSize failed for locale %s with error: %s \n", currLoc, u_errorName(errorCode));
1077                }
1078                if(strstr(currLoc, "_US")!=NULL && height != 279 && width != 216 ){
1079                    log_err("ulocdata_getPaperSize did not return expected data for locale %s \n", currLoc);
1080                }
1081            }
1082             /* test that the MeasurementSystem API works */
1083            {
1084                char fullLoc[ULOC_FULLNAME_CAPACITY];
1085                UMeasurementSystem measurementSystem;
1086                int32_t height = 0, width = 0;
1087 
1088                uloc_addLikelySubtags(currLoc, fullLoc, ULOC_FULLNAME_CAPACITY, &errorCode);
1089 
1090                errorCode = U_ZERO_ERROR;
1091                measurementSystem = ulocdata_getMeasurementSystem(currLoc, &errorCode);
1092                if (U_FAILURE(errorCode)) {
1093                    log_err("ulocdata_getMeasurementSystem failed for locale %s with error: %s \n", currLoc, u_errorName(errorCode));
1094                } else {
1095                    if ( strstr(fullLoc, "_US")!=NULL || strstr(fullLoc, "_MM")!=NULL || strstr(fullLoc, "_LR")!=NULL ) {
1096                        if(measurementSystem != UMS_US){
1097                             log_err("ulocdata_getMeasurementSystem did not return expected data for locale %s \n", currLoc);
1098                        }
1099                    } else if ( strstr(fullLoc, "_GB")!=NULL ) {
1100                        if(measurementSystem != UMS_UK){
1101                             log_err("ulocdata_getMeasurementSystem did not return expected data for locale %s \n", currLoc);
1102                        }
1103                    } else if (measurementSystem != UMS_SI) {
1104                        log_err("ulocdata_getMeasurementSystem did not return expected data for locale %s \n", currLoc);
1105                    }
1106                }
1107 
1108                errorCode = U_ZERO_ERROR;
1109                ulocdata_getPaperSize(currLoc, &height, &width, &errorCode);
1110                if (U_FAILURE(errorCode)) {
1111                    log_err("ulocdata_getPaperSize failed for locale %s with error: %s \n", currLoc, u_errorName(errorCode));
1112                } else {
1113                    if ( strstr(fullLoc, "_US")!=NULL || strstr(fullLoc, "_BZ")!=NULL || strstr(fullLoc, "_CA")!=NULL || strstr(fullLoc, "_CL")!=NULL ||
1114                         strstr(fullLoc, "_CO")!=NULL || strstr(fullLoc, "_CR")!=NULL || strstr(fullLoc, "_GT")!=NULL || strstr(fullLoc, "_MX")!=NULL ||
1115                         strstr(fullLoc, "_NI")!=NULL || strstr(fullLoc, "_PA")!=NULL || strstr(fullLoc, "_PH")!=NULL || strstr(fullLoc, "_PR")!=NULL ||
1116                         strstr(fullLoc, "_SV")!=NULL || strstr(fullLoc, "_VE")!=NULL ) {
1117                        if (height != 279 || width != 216) {
1118                             log_err("ulocdata_getPaperSize did not return expected data for locale %s \n", currLoc);
1119                        }
1120                    } else if (height != 297 || width != 210) {
1121                        log_err("ulocdata_getPaperSize did not return expected data for locale %s \n", currLoc);
1122                    }
1123                }
1124            }
1125         }
1126         if (mergedExemplarSet != NULL) {
1127             uset_close(mergedExemplarSet);
1128         }
1129         ures_close(currentLocale);
1130     }
1131 
1132     ures_close(root);
1133 }
1134 
1135 /* adjust this limit as appropriate */
1136 #define MAX_SCRIPTS_PER_LOCALE 8
1137 
TestExemplarSet(void)1138 static void TestExemplarSet(void){
1139     int32_t i, j, k, m, n;
1140     int32_t equalCount = 0;
1141     UErrorCode ec = U_ZERO_ERROR;
1142     UEnumeration* avail;
1143     USet* exemplarSets[2];
1144     USet* unassignedSet;
1145     UScriptCode code[MAX_SCRIPTS_PER_LOCALE];
1146     USet* codeSets[MAX_SCRIPTS_PER_LOCALE];
1147     int32_t codeLen;
1148     char cbuf[32]; /* 9 should be enough */
1149     UChar ubuf[64]; /* adjust as needed */
1150     UBool existsInScript;
1151     int32_t itemCount;
1152     int32_t strLen;
1153     UChar32 start, end;
1154 
1155     unassignedSet = NULL;
1156     exemplarSets[0] = NULL;
1157     exemplarSets[1] = NULL;
1158     for (i=0; i<MAX_SCRIPTS_PER_LOCALE; ++i) {
1159         codeSets[i] = NULL;
1160     }
1161 
1162     avail = ures_openAvailableLocales(NULL, &ec);
1163     if (!assertSuccess("ures_openAvailableLocales", &ec)) goto END;
1164     n = uenum_count(avail, &ec);
1165     if (!assertSuccess("uenum_count", &ec)) goto END;
1166 
1167     u_uastrcpy(ubuf, "[:unassigned:]");
1168     unassignedSet = uset_openPattern(ubuf, -1, &ec);
1169     if (!assertSuccess("uset_openPattern", &ec)) goto END;
1170 
1171     for(i=0; i<n; i++){
1172         const char* locale = uenum_next(avail, NULL, &ec);
1173         if (!assertSuccess("uenum_next", &ec)) goto END;
1174         log_verbose("%s\n", locale);
1175         for (k=0; k<2; ++k) {
1176             uint32_t option = (k==0) ? 0 : USET_CASE_INSENSITIVE;
1177             ULocaleData *uld = ulocdata_open(locale,&ec);
1178             USet* exemplarSet = ulocdata_getExemplarSet(uld,NULL, option, ULOCDATA_ES_STANDARD, &ec);
1179             uset_close(exemplarSets[k]);
1180             ulocdata_close(uld);
1181             exemplarSets[k] = exemplarSet;
1182             if (!assertSuccess("ulocaledata_getExemplarSet", &ec)) goto END;
1183 
1184             if (uset_containsSome(exemplarSet, unassignedSet)) {
1185                 log_err("ExemplarSet contains unassigned characters for locale : %s\n", locale);
1186             }
1187             codeLen = uscript_getCode(locale, code, 8, &ec);
1188             if (strcmp(locale, "yi") == 0 && codeLen > 0 && log_knownIssue("11217", "Fix result of uscript_getCode for yi: USCRIPT_YI -> USCRIPT_HEBREW")) {
1189                 code[0] = USCRIPT_HEBREW;
1190             }
1191             if (!assertSuccess("uscript_getCode", &ec)) goto END;
1192 
1193             for (j=0; j<MAX_SCRIPTS_PER_LOCALE; ++j) {
1194                 uset_close(codeSets[j]);
1195                 codeSets[j] = NULL;
1196             }
1197             for (j=0; j<codeLen; ++j) {
1198                 uprv_strcpy(cbuf, "[:");
1199                 if(code[j]==-1){
1200                     log_err("USCRIPT_INVALID_CODE returned for locale: %s\n", locale);
1201                     continue;
1202                 }
1203                 uprv_strcat(cbuf, uscript_getShortName(code[j]));
1204                 uprv_strcat(cbuf, ":]");
1205                 u_uastrcpy(ubuf, cbuf);
1206                 codeSets[j] = uset_openPattern(ubuf, -1, &ec);
1207             }
1208             if (!assertSuccess("uset_openPattern", &ec)) goto END;
1209 
1210             existsInScript = FALSE;
1211             itemCount = uset_getItemCount(exemplarSet);
1212             for (m=0; m<itemCount && !existsInScript; ++m) {
1213                 strLen = uset_getItem(exemplarSet, m, &start, &end, ubuf,
1214                                       UPRV_LENGTHOF(ubuf), &ec);
1215                 /* failure here might mean str[] needs to be larger */
1216                 if (!assertSuccess("uset_getItem", &ec)) goto END;
1217                 if (strLen == 0) {
1218                     for (j=0; j<codeLen; ++j) {
1219                         if (codeSets[j]!=NULL && uset_containsRange(codeSets[j], start, end)) {
1220                             existsInScript = TRUE;
1221                             break;
1222                         }
1223                     }
1224                 } else {
1225                     for (j=0; j<codeLen; ++j) {
1226                         if (codeSets[j]!=NULL && uset_containsString(codeSets[j], ubuf, strLen)) {
1227                             existsInScript = TRUE;
1228                             break;
1229                         }
1230                     }
1231                 }
1232             }
1233 
1234             if (existsInScript == FALSE){
1235                 log_err("ExemplarSet containment failed for locale : %s\n", locale);
1236             }
1237         }
1238         assertTrue("case-folded is a superset",
1239                    uset_containsAll(exemplarSets[1], exemplarSets[0]));
1240         if (uset_equals(exemplarSets[1], exemplarSets[0])) {
1241             ++equalCount;
1242         }
1243     }
1244     /* Note: The case-folded set should sometimes be a strict superset
1245        and sometimes be equal. */
1246     assertTrue("case-folded is sometimes a strict superset, and sometimes equal",
1247                equalCount > 0 && equalCount < n);
1248 
1249  END:
1250     uenum_close(avail);
1251     uset_close(exemplarSets[0]);
1252     uset_close(exemplarSets[1]);
1253     uset_close(unassignedSet);
1254     for (i=0; i<MAX_SCRIPTS_PER_LOCALE; ++i) {
1255         uset_close(codeSets[i]);
1256     }
1257 }
1258 
1259 enum { kUBufMax = 32 };
TestLocaleDisplayPattern(void)1260 static void TestLocaleDisplayPattern(void){
1261     UErrorCode status;
1262     UChar pattern[kUBufMax] = {0,};
1263     UChar separator[kUBufMax] = {0,};
1264     ULocaleData *uld;
1265     static const UChar enExpectPat[] = { 0x007B,0x0030,0x007D,0x0020,0x0028,0x007B,0x0031,0x007D,0x0029,0 }; /* "{0} ({1})" */
1266     static const UChar enExpectSep[] = { 0x002C,0x0020,0 }; /* ", " */
1267     static const UChar zhExpectPat[] = { 0x007B,0x0030,0x007D,0xFF08,0x007B,0x0031,0x007D,0xFF09,0 };
1268     static const UChar zhExpectSep[] = { 0xFF0C,0 };
1269 
1270     status = U_ZERO_ERROR;
1271     uld = ulocdata_open("en", &status);
1272     if(U_FAILURE(status)){
1273         log_data_err("ulocdata_open en error %s", u_errorName(status));
1274     } else {
1275         ulocdata_getLocaleDisplayPattern(uld, pattern, kUBufMax, &status);
1276         if (U_FAILURE(status)){
1277             log_err("ulocdata_getLocaleDisplayPattern en error %s", u_errorName(status));
1278         } else if (u_strcmp(pattern, enExpectPat) != 0) {
1279              log_err("ulocdata_getLocaleDisplayPattern en returns unexpected pattern");
1280         }
1281         status = U_ZERO_ERROR;
1282         ulocdata_getLocaleSeparator(uld, separator, kUBufMax, &status);
1283         if (U_FAILURE(status)){
1284             log_err("ulocdata_getLocaleSeparator en error %s", u_errorName(status));
1285         } else if (u_strcmp(separator, enExpectSep) != 0) {
1286              log_err("ulocdata_getLocaleSeparator en returns unexpected string ");
1287         }
1288         ulocdata_close(uld);
1289     }
1290 
1291     status = U_ZERO_ERROR;
1292     uld = ulocdata_open("zh", &status);
1293     if(U_FAILURE(status)){
1294         log_data_err("ulocdata_open zh error %s", u_errorName(status));
1295     } else {
1296         ulocdata_getLocaleDisplayPattern(uld, pattern, kUBufMax, &status);
1297         if (U_FAILURE(status)){
1298             log_err("ulocdata_getLocaleDisplayPattern zh error %s", u_errorName(status));
1299         } else if (u_strcmp(pattern, zhExpectPat) != 0) {
1300              log_err("ulocdata_getLocaleDisplayPattern zh returns unexpected pattern");
1301         }
1302         status = U_ZERO_ERROR;
1303         ulocdata_getLocaleSeparator(uld, separator, kUBufMax, &status);
1304         if (U_FAILURE(status)){
1305             log_err("ulocdata_getLocaleSeparator zh error %s", u_errorName(status));
1306         } else if (u_strcmp(separator, zhExpectSep) != 0) {
1307              log_err("ulocdata_getLocaleSeparator zh returns unexpected string ");
1308         }
1309         ulocdata_close(uld);
1310     }
1311 }
1312 
TestCoverage(void)1313 static void TestCoverage(void){
1314     ULocaleDataDelimiterType types[] = {
1315      ULOCDATA_QUOTATION_START,     /* Quotation start */
1316      ULOCDATA_QUOTATION_END,       /* Quotation end */
1317      ULOCDATA_ALT_QUOTATION_START, /* Alternate quotation start */
1318      ULOCDATA_ALT_QUOTATION_END,   /* Alternate quotation end */
1319      ULOCDATA_DELIMITER_COUNT
1320     };
1321     int i;
1322     UBool sub;
1323     UErrorCode status = U_ZERO_ERROR;
1324     ULocaleData *uld = ulocdata_open(uloc_getDefault(), &status);
1325 
1326     if(U_FAILURE(status)){
1327         log_data_err("ulocdata_open error");
1328         return;
1329     }
1330 
1331 
1332     for(i = 0; i < ULOCDATA_DELIMITER_COUNT; i++){
1333         UChar result[32] = {0,};
1334         status = U_ZERO_ERROR;
1335         ulocdata_getDelimiter(uld, types[i], result, 32, &status);
1336         if (U_FAILURE(status)){
1337             log_err("ulocdata_getgetDelimiter error with type %d", types[i]);
1338         }
1339     }
1340 
1341     sub = ulocdata_getNoSubstitute(uld);
1342     ulocdata_setNoSubstitute(uld,sub);
1343     ulocdata_close(uld);
1344 }
1345 
TestIndexChars(void)1346 static void TestIndexChars(void) {
1347     /* Very basic test of ULOCDATA_ES_INDEX.
1348      * No comprehensive test of data, just basic check that the code path is alive.
1349      */
1350     UErrorCode status = U_ZERO_ERROR;
1351     ULocaleData  *uld;
1352     USet *exemplarChars;
1353     USet *indexChars;
1354 
1355     uld = ulocdata_open("en", &status);
1356     exemplarChars = uset_openEmpty();
1357     indexChars = uset_openEmpty();
1358     ulocdata_getExemplarSet(uld, exemplarChars, 0, ULOCDATA_ES_STANDARD, &status);
1359     ulocdata_getExemplarSet(uld, indexChars, 0, ULOCDATA_ES_INDEX, &status);
1360     if (U_FAILURE(status)) {
1361         log_data_err("File %s, line %d, Failure opening exemplar chars: %s", __FILE__, __LINE__, u_errorName(status));
1362         goto close_sets;
1363     }
1364     /* en data, standard exemplars are [a-z], lower case. */
1365     /* en data, index characters are [A-Z], upper case. */
1366     if ((uset_contains(exemplarChars, (UChar32)0x41) || uset_contains(indexChars, (UChar32)0x61))) {
1367         log_err("File %s, line %d, Exemplar characters incorrect.", __FILE__, __LINE__ );
1368         goto close_sets;
1369     }
1370     if (!(uset_contains(exemplarChars, (UChar32)0x61) && uset_contains(indexChars, (UChar32)0x41) )) {
1371         log_err("File %s, line %d, Exemplar characters incorrect.", __FILE__, __LINE__ );
1372         goto close_sets;
1373     }
1374 
1375   close_sets:
1376     uset_close(exemplarChars);
1377     uset_close(indexChars);
1378     ulocdata_close(uld);
1379 }
1380 
1381 
1382 
1383 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
TestCurrencyList(void)1384 static void TestCurrencyList(void){
1385 #if !UCONFIG_NO_FORMATTING
1386     UErrorCode errorCode = U_ZERO_ERROR;
1387     int32_t structLocaleCount, currencyCount;
1388     UEnumeration *en = ucurr_openISOCurrencies(UCURR_ALL, &errorCode);
1389     const char *isoCode, *structISOCode;
1390     UResourceBundle *subBundle;
1391     UResourceBundle *currencies = ures_openDirect(loadTestData(&errorCode), "structLocale", &errorCode);
1392     if(U_FAILURE(errorCode)) {
1393         log_data_err("Can't open structLocale\n");
1394         return;
1395     }
1396     currencies = ures_getByKey(currencies, "Currencies", currencies, &errorCode);
1397     currencyCount = uenum_count(en, &errorCode);
1398     structLocaleCount = ures_getSize(currencies);
1399     if (currencyCount != structLocaleCount) {
1400         log_err("structLocale(%d) and ISO4217(%d) currency list are out of sync.\n", structLocaleCount, currencyCount);
1401 #if U_CHARSET_FAMILY == U_ASCII_FAMILY
1402         ures_resetIterator(currencies);
1403         while ((isoCode = uenum_next(en, NULL, &errorCode)) != NULL && ures_hasNext(currencies)) {
1404             subBundle = ures_getNextResource(currencies, NULL, &errorCode);
1405             structISOCode = ures_getKey(subBundle);
1406             ures_close(subBundle);
1407             if (strcmp(structISOCode, isoCode) != 0) {
1408                 log_err("First difference found at structLocale(%s) and ISO4217(%s).\n", structISOCode, isoCode);
1409                 break;
1410             }
1411         }
1412 #endif
1413     }
1414     ures_close(currencies);
1415     uenum_close(en);
1416 #endif
1417 }
1418 #endif
1419 
TestAvailableIsoCodes(void)1420 static void TestAvailableIsoCodes(void){
1421 #if !UCONFIG_NO_FORMATTING
1422     UErrorCode errorCode = U_ZERO_ERROR;
1423     const char* eurCode = "EUR";
1424     const char* usdCode = "USD";
1425     const char* lastCode = "RHD";
1426     const char* zzzCode = "ZZZ";
1427     UDate date1950 = (UDate)-630720000000.0;/* year 1950 */
1428     UDate date1970 = (UDate)0.0;            /* year 1970 */
1429     UDate date1975 = (UDate)173448000000.0; /* year 1975 */
1430     UDate date1978 = (UDate)260172000000.0; /* year 1978 */
1431     UDate date1981 = (UDate)346896000000.0; /* year 1981 */
1432     UDate date1992 = (UDate)693792000000.0; /* year 1992 */
1433     UChar* isoCode = (UChar*)malloc(sizeof(UChar) * (uprv_strlen(usdCode) + 1));
1434 
1435     /* testing available codes with no time ranges */
1436     u_charsToUChars(eurCode, isoCode, (int32_t)uprv_strlen(usdCode) + 1);
1437     if (ucurr_isAvailable(isoCode, U_DATE_MIN, U_DATE_MAX, &errorCode) == FALSE) {
1438        log_data_err("FAIL: ISO code (%s) is not found.\n", eurCode);
1439     }
1440 
1441     u_charsToUChars(usdCode, isoCode, (int32_t)uprv_strlen(zzzCode) + 1);
1442     if (ucurr_isAvailable(isoCode, U_DATE_MIN, U_DATE_MAX, &errorCode) == FALSE) {
1443        log_data_err("FAIL: ISO code (%s) is not found.\n", usdCode);
1444     }
1445 
1446     u_charsToUChars(zzzCode, isoCode, (int32_t)uprv_strlen(zzzCode) + 1);
1447     if (ucurr_isAvailable(isoCode, U_DATE_MIN, U_DATE_MAX, &errorCode) == TRUE) {
1448        log_err("FAIL: ISO code (%s) is reported as available, but it doesn't exist.\n", zzzCode);
1449     }
1450 
1451     u_charsToUChars(lastCode, isoCode, (int32_t)uprv_strlen(zzzCode) + 1);
1452     if (ucurr_isAvailable(isoCode, U_DATE_MIN, U_DATE_MAX, &errorCode) == FALSE) {
1453        log_data_err("FAIL: ISO code (%s) is not found.\n", lastCode);
1454     }
1455 
1456     /* RHD was used from 1970-02-17  to 1980-04-18*/
1457 
1458     /* to = null */
1459     if (ucurr_isAvailable(isoCode, date1970, U_DATE_MAX, &errorCode) == FALSE) {
1460        log_data_err("FAIL: ISO code (%s) was available in time range >1970-01-01.\n", lastCode);
1461     }
1462 
1463     if (ucurr_isAvailable(isoCode, date1975, U_DATE_MAX, &errorCode) == FALSE) {
1464        log_data_err("FAIL: ISO code (%s) was available in time range >1975.\n", lastCode);
1465     }
1466 
1467     if (ucurr_isAvailable(isoCode, date1981, U_DATE_MAX, &errorCode) == TRUE) {
1468        log_err("FAIL: ISO code (%s) was not available in time range >1981.\n", lastCode);
1469     }
1470 
1471     /* from = null */
1472     if (ucurr_isAvailable(isoCode, U_DATE_MIN, date1970, &errorCode) == TRUE) {
1473        log_err("FAIL: ISO code (%s) was not available in time range <1970.\n", lastCode);
1474     }
1475 
1476     if (ucurr_isAvailable(isoCode, U_DATE_MIN, date1975, &errorCode) == FALSE) {
1477        log_data_err("FAIL: ISO code (%s) was available in time range <1975.\n", lastCode);
1478     }
1479 
1480     if (ucurr_isAvailable(isoCode, U_DATE_MIN, date1981, &errorCode) == FALSE) {
1481        log_data_err("FAIL: ISO code (%s) was available in time range <1981.\n", lastCode);
1482     }
1483 
1484     /* full ranges */
1485     if (ucurr_isAvailable(isoCode, date1975, date1978, &errorCode) == FALSE) {
1486        log_data_err("FAIL: ISO code (%s) was available in time range 1975-1978.\n", lastCode);
1487     }
1488 
1489     if (ucurr_isAvailable(isoCode, date1970, date1975, &errorCode) == FALSE) {
1490        log_data_err("FAIL: ISO code (%s) was available in time range 1970-1975.\n", lastCode);
1491     }
1492 
1493     if (ucurr_isAvailable(isoCode, date1975, date1981, &errorCode) == FALSE) {
1494        log_data_err("FAIL: ISO code (%s) was available in time range 1975-1981.\n", lastCode);
1495     }
1496 
1497     if (ucurr_isAvailable(isoCode, date1970,  date1981, &errorCode) == FALSE) {
1498        log_data_err("FAIL: ISO code (%s) was available in time range 1970-1981.\n", lastCode);
1499     }
1500 
1501     if (ucurr_isAvailable(isoCode, date1981,  date1992, &errorCode) == TRUE) {
1502        log_err("FAIL: ISO code (%s) was not available in time range 1981-1992.\n", lastCode);
1503     }
1504 
1505     if (ucurr_isAvailable(isoCode, date1950,  date1970, &errorCode) == TRUE) {
1506        log_err("FAIL: ISO code (%s) was not available in time range 1950-1970.\n", lastCode);
1507     }
1508 
1509     /* wrong range - from > to*/
1510     if (ucurr_isAvailable(isoCode, date1975,  date1970, &errorCode) == TRUE) {
1511        log_err("FAIL: Wrong range 1975-1970 for ISO code (%s) was not reported.\n", lastCode);
1512     } else if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
1513        log_data_err("FAIL: Error code not reported for wrong range 1975-1970 for ISO code (%s).\n", lastCode);
1514     }
1515 
1516     free(isoCode);
1517 #endif
1518 }
1519 
1520 #define TESTCASE(name) addTest(root, &name, "tsutil/cldrtest/" #name)
1521 
1522 void addCLDRTest(TestNode** root);
1523 
addCLDRTest(TestNode ** root)1524 void addCLDRTest(TestNode** root)
1525 {
1526 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
1527     TESTCASE(TestLocaleStructure);
1528     TESTCASE(TestCurrencyList);
1529 #endif
1530     TESTCASE(TestConsistentCountryInfo);
1531     TESTCASE(VerifyTranslation);
1532     TESTCASE(TestExemplarSet);
1533     TESTCASE(TestLocaleDisplayPattern);
1534     TESTCASE(TestCoverage);
1535     TESTCASE(TestIndexChars);
1536     TESTCASE(TestAvailableIsoCodes);
1537 }
1538 
1539