• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 1999-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *   file name:  udata.cpp
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 1999oct25
16 *   created by: Markus W. Scherer
17 */
18 
19 #include "unicode/utypes.h"  /* U_PLATFORM etc. */
20 
21 #ifdef __GNUC__
22 /* if gcc
23 #define ATTRIBUTE_WEAK __attribute__ ((weak))
24 might have to #include some other header
25 */
26 #endif
27 
28 #include "unicode/putil.h"
29 #include "unicode/udata.h"
30 #include "unicode/uversion.h"
31 #include "charstr.h"
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "mutex.h"
35 #include "putilimp.h"
36 #include "restrace.h"
37 #include "uassert.h"
38 #include "ucln_cmn.h"
39 #include "ucmndata.h"
40 #include "udatamem.h"
41 #include "uhash.h"
42 #include "umapfile.h"
43 #include "umutex.h"
44 
45 /***********************************************************************
46 *
47 *   Notes on the organization of the ICU data implementation
48 *
49 *      All of the public API is defined in udata.h
50 *
51 *      The implementation is split into several files...
52 *
53 *         - udata.c  (this file) contains higher level code that knows about
54 *                     the search paths for locating data, caching opened data, etc.
55 *
56 *         - umapfile.c  contains the low level platform-specific code for actually loading
57 *                     (memory mapping, file reading, whatever) data into memory.
58 *
59 *         - ucmndata.c  deals with the tables of contents of ICU data items within
60 *                     an ICU common format data file.  The implementation includes
61 *                     an abstract interface and support for multiple TOC formats.
62 *                     All knowledge of any specific TOC format is encapsulated here.
63 *
64 *         - udatamem.c has code for managing UDataMemory structs.  These are little
65 *                     descriptor objects for blocks of memory holding ICU data of
66 *                     various types.
67 */
68 
69 /* configuration ---------------------------------------------------------- */
70 
71 /* If you are excruciatingly bored turn this on .. */
72 /* #define UDATA_DEBUG 1 */
73 
74 #if defined(UDATA_DEBUG)
75 #   include <stdio.h>
76 #endif
77 
78 U_NAMESPACE_USE
79 
80 /*
81  *  Forward declarations
82  */
83 static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err);
84 
85 /***********************************************************************
86 *
87 *    static (Global) data
88 *
89 ************************************************************************/
90 
91 /*
92  * Pointers to the common ICU data.
93  *
94  * We store multiple pointers to ICU data packages and iterate through them
95  * when looking for a data item.
96  *
97  * It is possible to combine this with dependency inversion:
98  * One or more data package libraries may export
99  * functions that each return a pointer to their piece of the ICU data,
100  * and this file would import them as weak functions, without a
101  * strong linker dependency from the common library on the data library.
102  *
103  * Then we can have applications depend on only that part of ICU's data
104  * that they really need, reducing the size of binaries that take advantage
105  * of this.
106  */
107 static UDataMemory *gCommonICUDataArray[10] = { nullptr };   // Access protected by icu global mutex.
108 
109 static u_atomic_int32_t gHaveTriedToLoadCommonData {0};  //  See extendICUData().
110 
111 static UHashtable  *gCommonDataCache = nullptr;  /* Global hash table of opened ICU data files.  */
112 static icu::UInitOnce gCommonDataCacheInitOnce {};
113 
114 // Android-changed: On Android, use our patched version of openCommonData() to load the data,
115 //   and do not try to load ICU data from other files.
116 #if U_PLATFORM == U_PF_ANDROID
117 static UDataFileAccess  gDataFileAccess = UDATA_NO_FILES;
118 #elif !defined(ICU_DATA_DIR_WINDOWS)
119 static UDataFileAccess  gDataFileAccess = UDATA_DEFAULT_ACCESS;  // Access not synchronized.
120                                                                  // Modifying is documented as thread-unsafe.
121 #else
122 // If we are using the Windows data directory, then look in one spot only.
123 static UDataFileAccess  gDataFileAccess = UDATA_NO_FILES;
124 #endif
125 
126 // BEGIN Android-added: Include android/host-linux-specific headers and variables.
127 #ifdef ANDROID // if using the AOSP build system, e.g. Soong, but not the normal GNU make used by ./updateicudata.py
128   #if U_PLATFORM == U_PF_ANDROID ||  U_PLATFORM == U_PF_LINUX // if targeting Android or host linux
129     #define AOSP_ICU_INIT 1
130   #endif
131 #endif
132 
133 #ifdef AOSP_ICU_INIT
134   #include "androidicuinit/android_icu_init.h"
135   static icu::UInitOnce gAospInitOnce {};
136 #endif
137 // END Android-added: Include android/host-linux-specific headers and variables.
138 
139 
140 static UBool U_CALLCONV
udata_cleanup()141 udata_cleanup()
142 {
143     int32_t i;
144 
145     if (gCommonDataCache) {             /* Delete the cache of user data mappings.  */
146         uhash_close(gCommonDataCache);  /*   Table owns the contents, and will delete them. */
147         gCommonDataCache = nullptr;        /*   Cleanup is not thread safe.                */
148     }
149     gCommonDataCacheInitOnce.reset();
150 
151     for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray) && gCommonICUDataArray[i] != nullptr; ++i) {
152         udata_close(gCommonICUDataArray[i]);
153         gCommonICUDataArray[i] = nullptr;
154     }
155     gHaveTriedToLoadCommonData = 0;
156 
157 // BEGIN Android-added: Use specialized libandroidicuinit to unload the data on Android/ART host.
158 #ifdef AOSP_ICU_INIT
159     android_icu_cleanup();
160     gAospInitOnce.reset();
161 #endif
162 // END Android-added: Use specialized libandroidicuinit to unload the data on Android/ART host.
163 
164     return true;                   /* Everything was cleaned up */
165 }
166 
167 static UBool U_CALLCONV
findCommonICUDataByName(const char * inBasename,UErrorCode & err)168 findCommonICUDataByName(const char *inBasename, UErrorCode &err)
169 {
170     UBool found = false;
171     int32_t i;
172 
173     UDataMemory  *pData = udata_findCachedData(inBasename, err);
174     if (U_FAILURE(err) || pData == nullptr)
175         return false;
176 
177     {
178         Mutex lock;
179         for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) {
180             if ((gCommonICUDataArray[i] != nullptr) && (gCommonICUDataArray[i]->pHeader == pData->pHeader)) {
181                 /* The data pointer is already in the array. */
182                 found = true;
183                 break;
184             }
185         }
186     }
187     return found;
188 }
189 
190 
191 /*
192  * setCommonICUData.   Set a UDataMemory to be the global ICU Data
193  */
194 static UBool
setCommonICUData(UDataMemory * pData,UBool warn,UErrorCode * pErr)195 setCommonICUData(UDataMemory *pData,     /*  The new common data.  Belongs to caller, we copy it. */
196                  UBool       warn,       /*  If true, set USING_DEFAULT warning if ICUData was    */
197                                          /*    changed by another thread before we got to it.     */
198                  UErrorCode *pErr)
199 {
200     UDataMemory  *newCommonData = UDataMemory_createNewInstance(pErr);
201     int32_t i;
202     UBool didUpdate = false;
203     if (U_FAILURE(*pErr)) {
204         return false;
205     }
206 
207     /*  For the assignment, other threads must cleanly see either the old            */
208     /*    or the new, not some partially initialized new.  The old can not be        */
209     /*    deleted - someone may still have a pointer to it lying around in           */
210     /*    their locals.                                                              */
211     UDatamemory_assign(newCommonData, pData);
212     umtx_lock(nullptr);
213     for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) {
214         if (gCommonICUDataArray[i] == nullptr) {
215             gCommonICUDataArray[i] = newCommonData;
216             didUpdate = true;
217             break;
218         } else if (gCommonICUDataArray[i]->pHeader == pData->pHeader) {
219             /* The same data pointer is already in the array. */
220             break;
221         }
222     }
223     umtx_unlock(nullptr);
224 
225     if (i == UPRV_LENGTHOF(gCommonICUDataArray) && warn) {
226         *pErr = U_USING_DEFAULT_WARNING;
227     }
228     if (didUpdate) {
229         ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
230     } else {
231         uprv_free(newCommonData);
232     }
233     return didUpdate;
234 }
235 
236 #if !defined(ICU_DATA_DIR_WINDOWS)
237 
238 static UBool
setCommonICUDataPointer(const void * pData,UBool,UErrorCode * pErrorCode)239 setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) {
240     UDataMemory tData;
241     UDataMemory_init(&tData);
242     UDataMemory_setData(&tData, pData);
243     udata_checkCommonData(&tData, pErrorCode);
244     return setCommonICUData(&tData, false, pErrorCode);
245 }
246 
247 #endif
248 
249 static const char *
findBasename(const char * path)250 findBasename(const char *path) {
251     const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
252     if(basename==nullptr) {
253         return path;
254     } else {
255         return basename+1;
256     }
257 }
258 
259 #ifdef UDATA_DEBUG
260 static const char *
packageNameFromPath(const char * path)261 packageNameFromPath(const char *path)
262 {
263     if((path == nullptr) || (*path == 0)) {
264         return U_ICUDATA_NAME;
265     }
266 
267     path = findBasename(path);
268 
269     if((path == nullptr) || (*path == 0)) {
270         return U_ICUDATA_NAME;
271     }
272 
273     return path;
274 }
275 #endif
276 
277 /*----------------------------------------------------------------------*
278  *                                                                      *
279  *   Cache for common data                                              *
280  *      Functions for looking up or adding entries to a cache of        *
281  *      data that has been previously opened.  Avoids a potentially     *
282  *      expensive operation of re-opening the data for subsequent       *
283  *      uses.                                                           *
284  *                                                                      *
285  *      Data remains cached for the duration of the process.            *
286  *                                                                      *
287  *----------------------------------------------------------------------*/
288 
289 typedef struct DataCacheElement {
290     char          *name;
291     UDataMemory   *item;
292 } DataCacheElement;
293 
294 
295 
296 /*
297  * Deleter function for DataCacheElements.
298  *         udata cleanup function closes the hash table; hash table in turn calls back to
299  *         here for each entry.
300  */
DataCacheElement_deleter(void * pDCEl)301 static void U_CALLCONV DataCacheElement_deleter(void *pDCEl) {
302     DataCacheElement* p = static_cast<DataCacheElement*>(pDCEl);
303     udata_close(p->item);              /* unmaps storage */
304     uprv_free(p->name);                /* delete the hash key string. */
305     uprv_free(pDCEl);                  /* delete 'this'          */
306 }
307 
udata_initHashTable(UErrorCode & err)308 static void U_CALLCONV udata_initHashTable(UErrorCode &err) {
309     U_ASSERT(gCommonDataCache == nullptr);
310     gCommonDataCache = uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &err);
311     if (U_FAILURE(err)) {
312        return;
313     }
314     U_ASSERT(gCommonDataCache != nullptr);
315     uhash_setValueDeleter(gCommonDataCache, DataCacheElement_deleter);
316     ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
317 }
318 
319  /*   udata_getCacheHashTable()
320   *     Get the hash table used to store the data cache entries.
321   *     Lazy create it if it doesn't yet exist.
322   */
udata_getHashTable(UErrorCode & err)323 static UHashtable *udata_getHashTable(UErrorCode &err) {
324     umtx_initOnce(gCommonDataCacheInitOnce, &udata_initHashTable, err);
325     return gCommonDataCache;
326 }
327 
328 
329 
udata_findCachedData(const char * path,UErrorCode & err)330 static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err)
331 {
332     UHashtable        *htable;
333     UDataMemory       *retVal = nullptr;
334     DataCacheElement  *el;
335     const char        *baseName;
336 
337     htable = udata_getHashTable(err);
338     if (U_FAILURE(err)) {
339         return nullptr;
340     }
341 
342     baseName = findBasename(path);   /* Cache remembers only the base name, not the full path. */
343     umtx_lock(nullptr);
344     el = static_cast<DataCacheElement*>(uhash_get(htable, baseName));
345     umtx_unlock(nullptr);
346     if (el != nullptr) {
347         retVal = el->item;
348     }
349 #ifdef UDATA_DEBUG
350     fprintf(stderr, "Cache: [%s] -> %p\n", baseName, (void*) retVal);
351 #endif
352     return retVal;
353 }
354 
355 
udata_cacheDataItem(const char * path,UDataMemory * item,UErrorCode * pErr)356 static UDataMemory *udata_cacheDataItem(const char *path, UDataMemory *item, UErrorCode *pErr) {
357     DataCacheElement *newElement;
358     const char       *baseName;
359     int32_t           nameLen;
360     UHashtable       *htable;
361     DataCacheElement *oldValue = nullptr;
362     UErrorCode        subErr = U_ZERO_ERROR;
363 
364     htable = udata_getHashTable(*pErr);
365     if (U_FAILURE(*pErr)) {
366         return nullptr;
367     }
368 
369     /* Create a new DataCacheElement - the thingy we store in the hash table -
370      * and copy the supplied path and UDataMemoryItems into it.
371      */
372     newElement = static_cast<DataCacheElement*>(uprv_malloc(sizeof(DataCacheElement)));
373     if (newElement == nullptr) {
374         *pErr = U_MEMORY_ALLOCATION_ERROR;
375         return nullptr;
376     }
377     newElement->item = UDataMemory_createNewInstance(pErr);
378     if (U_FAILURE(*pErr)) {
379         uprv_free(newElement);
380         return nullptr;
381     }
382     UDatamemory_assign(newElement->item, item);
383 
384     baseName = findBasename(path);
385     nameLen = static_cast<int32_t>(uprv_strlen(baseName));
386     newElement->name = static_cast<char*>(uprv_malloc(nameLen + 1));
387     if (newElement->name == nullptr) {
388         *pErr = U_MEMORY_ALLOCATION_ERROR;
389         uprv_free(newElement->item);
390         uprv_free(newElement);
391         return nullptr;
392     }
393     uprv_strcpy(newElement->name, baseName);
394 
395     /* Stick the new DataCacheElement into the hash table.
396     */
397     umtx_lock(nullptr);
398     oldValue = static_cast<DataCacheElement*>(uhash_get(htable, path));
399     if (oldValue != nullptr) {
400         subErr = U_USING_DEFAULT_WARNING;
401     }
402     else {
403         uhash_put(
404             htable,
405             newElement->name,               /* Key   */
406             newElement,                     /* Value */
407             &subErr);
408     }
409     umtx_unlock(nullptr);
410 
411 #ifdef UDATA_DEBUG
412     fprintf(stderr, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement->name,
413     (void*) newElement->item, u_errorName(subErr), (void*) newElement->item->vFuncs);
414 #endif
415 
416     if (subErr == U_USING_DEFAULT_WARNING || U_FAILURE(subErr)) {
417         *pErr = subErr; /* copy sub err unto fillin ONLY if something happens. */
418         uprv_free(newElement->name);
419         uprv_free(newElement->item);
420         uprv_free(newElement);
421         return oldValue ? oldValue->item : nullptr;
422     }
423 
424     return newElement->item;
425 }
426 
427 /*----------------------------------------------------------------------*==============
428  *                                                                      *
429  *  Path management.  Could be shared with other tools/etc if need be   *
430  * later on.                                                            *
431  *                                                                      *
432  *----------------------------------------------------------------------*/
433 
434 U_NAMESPACE_BEGIN
435 
436 class UDataPathIterator
437 {
438 public:
439     UDataPathIterator(const char *path, const char *pkg,
440                       const char *item, const char *suffix, UBool doCheckLastFour,
441                       UErrorCode *pErrorCode);
442     const char *next(UErrorCode *pErrorCode);
443 
444 private:
445     const char *path;                              /* working path (u_icudata_Dir) */
446     const char *nextPath;                          /* path following this one */
447     const char *basename;                          /* item's basename (icudt22e_mt.res)*/
448 
449     StringPiece suffix;                            /* item suffix (can be null) */
450 
451     uint32_t    basenameLen;                       /* length of basename */
452 
453     CharString  itemPath;                          /* path passed in with item name */
454     CharString  pathBuffer;                        /* output path for this it'ion */
455     CharString  packageStub;                       /* example:  "/icudt28b". Will ignore that leaf in set paths. */
456 
457     UBool       checkLastFour;                     /* if true then allow paths such as '/foo/myapp.dat'
458                                                     * to match, checks last 4 chars of suffix with
459                                                     * last 4 of path, then previous chars. */
460 };
461 
462 /**
463  * @param iter    The iterator to be initialized. Its current state does not matter.
464  * @param inPath  The full pathname to be iterated over.  If nullptr, defaults to U_ICUDATA_NAME
465  * @param pkg     Package which is being searched for, ex "icudt28l".  Will ignore leaf directories such as /icudt28l
466  * @param item    Item to be searched for.  Can include full path, such as /a/b/foo.dat
467  * @param inSuffix  Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly.
468  *             Ex:   'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2.
469  *                   '/blarg/stuff.dat' would also be found.
470  *  Note: inSuffix may also be the 'item' being searched for as well, (ex: "ibm-5348_P100-1997.cnv"), in which case
471  *        the 'item' parameter is often the same as pkg. (Though sometimes might have a tree part as well, ex: "icudt62l-curr").
472  */
UDataPathIterator(const char * inPath,const char * pkg,const char * item,const char * inSuffix,UBool doCheckLastFour,UErrorCode * pErrorCode)473 UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg,
474                                      const char *item, const char *inSuffix, UBool doCheckLastFour,
475                                      UErrorCode *pErrorCode)
476 {
477 #ifdef UDATA_DEBUG
478         fprintf(stderr, "SUFFIX1=%s PATH=%s\n", inSuffix, inPath);
479 #endif
480     /** Path **/
481     if(inPath == nullptr) {
482         path = u_getDataDirectory();
483     } else {
484         path = inPath;
485     }
486 
487     /** Package **/
488     if(pkg != nullptr) {
489       packageStub.append(U_FILE_SEP_CHAR, *pErrorCode).append(pkg, *pErrorCode);
490 #ifdef UDATA_DEBUG
491       fprintf(stderr, "STUB=%s [%d]\n", packageStub.data(), packageStub.length());
492 #endif
493     }
494 
495     /** Item **/
496     basename = findBasename(item);
497     basenameLen = static_cast<int32_t>(uprv_strlen(basename));
498 
499     /** Item path **/
500     if(basename == item) {
501         nextPath = path;
502     } else {
503         itemPath.append(item, static_cast<int32_t>(basename - item), *pErrorCode);
504         nextPath = itemPath.data();
505     }
506 #ifdef UDATA_DEBUG
507     fprintf(stderr, "SUFFIX=%s [%p]\n", inSuffix, (void*) inSuffix);
508 #endif
509 
510     /** Suffix  **/
511     if(inSuffix != nullptr) {
512         suffix = inSuffix;
513     } else {
514         suffix = "";
515     }
516 
517     checkLastFour = doCheckLastFour;
518 
519     /* pathBuffer will hold the output path strings returned by this iterator */
520 
521 #ifdef UDATA_DEBUG
522     fprintf(stderr, "0: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n",
523             item,
524             path,
525             basename,
526             suffix.data(),
527             itemPath.data(),
528             nextPath,
529             checkLastFour?"true":"false");
530 #endif
531 }
532 
533 /**
534  * Get the next path on the list.
535  *
536  * @param iter The Iter to be used
537  * @param len  If set, pointer to the length of the returned path, for convenience.
538  * @return Pointer to the next path segment, or nullptr if there are no more.
539  */
next(UErrorCode * pErrorCode)540 const char *UDataPathIterator::next(UErrorCode *pErrorCode)
541 {
542     if(U_FAILURE(*pErrorCode)) {
543         return nullptr;
544     }
545 
546     const char *currentPath = nullptr;
547     int32_t     pathLen = 0;
548     const char *pathBasename;
549 
550     do
551     {
552         if( nextPath == nullptr ) {
553             break;
554         }
555         currentPath = nextPath;
556 
557         if(nextPath == itemPath.data()) { /* we were processing item's path. */
558             nextPath = path; /* start with regular path next tm. */
559             pathLen = static_cast<int32_t>(uprv_strlen(currentPath));
560         } else {
561             /* fix up next for next time */
562             nextPath = uprv_strchr(currentPath, U_PATH_SEP_CHAR);
563             if(nextPath == nullptr) {
564                 /* segment: entire path */
565                 pathLen = static_cast<int32_t>(uprv_strlen(currentPath));
566             } else {
567                 /* segment: until next segment */
568                 pathLen = static_cast<int32_t>(nextPath - currentPath);
569                 /* skip divider */
570                 nextPath ++;
571             }
572         }
573 
574         if(pathLen == 0) {
575             continue;
576         }
577 
578 #ifdef UDATA_DEBUG
579         fprintf(stderr, "rest of path (IDD) = %s\n", currentPath);
580         fprintf(stderr, "                     ");
581         {
582             int32_t qqq;
583             for(qqq=0;qqq<pathLen;qqq++)
584             {
585                 fprintf(stderr, " ");
586             }
587 
588             fprintf(stderr, "^\n");
589         }
590 #endif
591         pathBuffer.clear().append(currentPath, pathLen, *pErrorCode);
592 
593         /* check for .dat files */
594         pathBasename = findBasename(pathBuffer.data());
595 
596         if(checkLastFour &&
597            (pathLen>=4) &&
598            uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix.data(), 4)==0 && /* suffix matches */
599            uprv_strncmp(findBasename(pathBuffer.data()), basename, basenameLen)==0  && /* base matches */
600            uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */
601 
602 #ifdef UDATA_DEBUG
603             fprintf(stderr, "Have %s file on the path: %s\n", suffix.data(), pathBuffer.data());
604 #endif
605             /* do nothing */
606         }
607         else
608         {       /* regular dir path */
609             if(pathBuffer[pathLen-1] != U_FILE_SEP_CHAR) {
610                 if((pathLen>=4) &&
611                    uprv_strncmp(pathBuffer.data()+(pathLen-4), ".dat", 4) == 0)
612                 {
613 #ifdef UDATA_DEBUG
614                     fprintf(stderr, "skipping non-directory .dat file %s\n", pathBuffer.data());
615 #endif
616                     continue;
617                 }
618 
619                 /* Check if it is a directory with the same name as our package */
620                 if(!packageStub.isEmpty() &&
621                    (pathLen > packageStub.length()) &&
622                    !uprv_strcmp(pathBuffer.data() + pathLen - packageStub.length(), packageStub.data())) {
623 #ifdef UDATA_DEBUG
624                   fprintf(stderr, "Found stub %s (will add package %s of len %d)\n", packageStub.data(), basename, basenameLen);
625 #endif
626                   pathBuffer.truncate(pathLen - packageStub.length());
627                 }
628                 pathBuffer.append(U_FILE_SEP_CHAR, *pErrorCode);
629             }
630 
631             /* + basename */
632             pathBuffer.append(packageStub.data()+1, packageStub.length()-1, *pErrorCode);
633 
634             if (!suffix.empty())  /* tack on suffix */
635             {
636                 if (suffix.length() > 4) {
637                     // If the suffix is actually an item ("ibm-5348_P100-1997.cnv") and not an extension (".res")
638                     // then we need to ensure that the path ends with a separator.
639                     pathBuffer.ensureEndsWithFileSeparator(*pErrorCode);
640                 }
641                 pathBuffer.append(suffix, *pErrorCode);
642             }
643         }
644 
645 #ifdef UDATA_DEBUG
646         fprintf(stderr, " -->  %s\n", pathBuffer.data());
647 #endif
648 
649         return pathBuffer.data();
650 
651     } while(path);
652 
653     /* fell way off the end */
654     return nullptr;
655 }
656 
657 U_NAMESPACE_END
658 
659 /* ==================================================================================*/
660 
661 
662 /*----------------------------------------------------------------------*
663  *                                                                      *
664  *  Add a static reference to the common data library                   *
665  *   Unless overridden by an explicit udata_setCommonData, this will be *
666  *      our common data.                                                *
667  *                                                                      *
668  *----------------------------------------------------------------------*/
669 #if !defined(ICU_DATA_DIR_WINDOWS)
670 // When using the Windows system data, we expect only a single data file.
671 extern "C" const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT;
672 #endif
673 
674 /*
675  * This would be a good place for weak-linkage declarations of
676  * partial-data-library access functions where each returns a pointer
677  * to its data package, if it is linked in.
678  */
679 /*
680 extern const void *uprv_getICUData_collation() ATTRIBUTE_WEAK;
681 extern const void *uprv_getICUData_conversion() ATTRIBUTE_WEAK;
682 */
683 
684 /*----------------------------------------------------------------------*
685  *                                                                      *
686  *   openCommonData   Attempt to open a common format (.dat) file       *
687  *                    Map it into memory (if it's not there already)    *
688  *                    and return a UDataMemory object for it.           *
689  *                                                                      *
690  *                    If the requested data is already open and cached  *
691  *                       just return the cached UDataMem object.        *
692  *                                                                      *
693  *----------------------------------------------------------------------*/
694 static UDataMemory *
openCommonData(const char * path,int32_t commonDataIndex,UErrorCode * pErrorCode)695 openCommonData(const char *path,          /*  Path from OpenChoice?          */
696                int32_t commonDataIndex,   /*  ICU Data (index >= 0) if path == nullptr */
697                UErrorCode *pErrorCode)
698 {
699     UDataMemory tData;
700     const char *pathBuffer;
701     const char *inBasename;
702 
703     if (U_FAILURE(*pErrorCode)) {
704         return nullptr;
705     }
706 
707     UDataMemory_init(&tData);
708 
709     /* ??????? TODO revisit this */
710     if (commonDataIndex >= 0) {
711         /* "mini-cache" for common ICU data */
712         if(commonDataIndex >= UPRV_LENGTHOF(gCommonICUDataArray)) {
713             return nullptr;
714         }
715         {
716             Mutex lock;
717             if(gCommonICUDataArray[commonDataIndex] != nullptr) {
718                 return gCommonICUDataArray[commonDataIndex];
719             }
720 #if !defined(ICU_DATA_DIR_WINDOWS)
721 // When using the Windows system data, we expect only a single data file.
722             int32_t i;
723             for(i = 0; i < commonDataIndex; ++i) {
724                 if(gCommonICUDataArray[i]->pHeader == &U_ICUDATA_ENTRY_POINT) {
725                     /* The linked-in data is already in the list. */
726                     return nullptr;
727                 }
728             }
729 #endif
730         }
731 
732 // BEGIN Android-added: Use specialized libandroidicuinit to load the data on Android/ART host.
733 #ifdef AOSP_ICU_INIT // Do nothing on other platforms, e.g. Windows
734         // android_icu_init() is only called once.
735         umtx_initOnce(gAospInitOnce, &android_icu_init);
736 #endif // AOSP_ICU_INIT
737 // END Android-added: Use specialized libandroidicuinit to load the data on Android/ART host.
738 
739         /* Add the linked-in data to the list. */
740         /*
741          * This is where we would check and call weakly linked partial-data-library
742          * access functions.
743          */
744         /*
745         if (uprv_getICUData_collation) {
746             setCommonICUDataPointer(uprv_getICUData_collation(), false, pErrorCode);
747         }
748         if (uprv_getICUData_conversion) {
749             setCommonICUDataPointer(uprv_getICUData_conversion(), false, pErrorCode);
750         }
751         */
752 #if !defined(ICU_DATA_DIR_WINDOWS)
753 // When using the Windows system data, we expect only a single data file.
754         setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, false, pErrorCode);
755         {
756             Mutex lock;
757             return gCommonICUDataArray[commonDataIndex];
758         }
759 #endif
760     }
761 
762 
763     /* request is NOT for ICU Data.  */
764 
765     /* Find the base name portion of the supplied path.   */
766     /*   inBasename will be left pointing somewhere within the original path string.      */
767     inBasename = findBasename(path);
768 #ifdef UDATA_DEBUG
769     fprintf(stderr, "inBasename = %s\n", inBasename);
770 #endif
771 
772     if(*inBasename==0) {
773         /* no basename.     This will happen if the original path was a directory name,   */
774         /*    like  "a/b/c/".   (Fallback to separate files will still work.)             */
775 #ifdef UDATA_DEBUG
776         fprintf(stderr, "ocd: no basename in %s, bailing.\n", path);
777 #endif
778         if (U_SUCCESS(*pErrorCode)) {
779             *pErrorCode=U_FILE_ACCESS_ERROR;
780         }
781         return nullptr;
782     }
783 
784    /* Is the requested common data file already open and cached?                     */
785    /*   Note that the cache is keyed by the base name only.  The rest of the path,   */
786    /*     if any, is not considered.                                                 */
787     UDataMemory  *dataToReturn = udata_findCachedData(inBasename, *pErrorCode);
788     if (dataToReturn != nullptr || U_FAILURE(*pErrorCode)) {
789         return dataToReturn;
790     }
791 
792     /* Requested item is not in the cache.
793      * Hunt it down, trying all the path locations
794      */
795 
796     UDataPathIterator iter(u_getDataDirectory(), inBasename, path, ".dat", true, pErrorCode);
797 
798     while ((UDataMemory_isLoaded(&tData)==false) && (pathBuffer = iter.next(pErrorCode)) != nullptr)
799     {
800 #ifdef UDATA_DEBUG
801         fprintf(stderr, "ocd: trying path %s - ", pathBuffer);
802 #endif
803         uprv_mapFile(&tData, pathBuffer, pErrorCode);
804 #ifdef UDATA_DEBUG
805         fprintf(stderr, "%s\n", UDataMemory_isLoaded(&tData)?"LOADED":"not loaded");
806 #endif
807     }
808     if (U_FAILURE(*pErrorCode)) {
809         return nullptr;
810     }
811 
812     if (U_FAILURE(*pErrorCode)) {
813         return nullptr;
814     }
815     if (!UDataMemory_isLoaded(&tData)) {
816         /* no common data */
817         *pErrorCode=U_FILE_ACCESS_ERROR;
818         return nullptr;
819     }
820 
821     /* we have mapped a file, check its header */
822     udata_checkCommonData(&tData, pErrorCode);
823 
824 
825     /* Cache the UDataMemory struct for this .dat file,
826      *   so we won't need to hunt it down and map it again next time
827      *   something is needed from it.                */
828     return udata_cacheDataItem(inBasename, &tData, pErrorCode);
829 }
830 
831 
832 /*----------------------------------------------------------------------*
833  *                                                                      *
834  *   extendICUData   If the full set of ICU data was not loaded at      *
835  *                   program startup, load it now.  This function will  *
836  *                   be called when the lookup of an ICU data item in   *
837  *                   the common ICU data fails.                         *
838  *                                                                      *
839  *                   return true if new data is loaded, false otherwise.*
840  *                                                                      *
841  *----------------------------------------------------------------------*/
extendICUData(UErrorCode * pErr)842 static UBool extendICUData(UErrorCode *pErr)
843 {
844     UDataMemory   *pData;
845     UDataMemory   copyPData;
846     UBool         didUpdate = false;
847 
848     /*
849      * There is a chance for a race condition here.
850      * Normally, ICU data is loaded from a DLL or via mmap() and
851      * setCommonICUData() will detect if the same address is set twice.
852      * If ICU is built with data loading via fread() then the address will
853      * be different each time the common data is loaded and we may add
854      * multiple copies of the data.
855      * In this case, use a mutex to prevent the race.
856      * Use a specific mutex to avoid nested locks of the global mutex.
857      */
858 #if MAP_IMPLEMENTATION==MAP_STDIO
859     static UMutex extendICUDataMutex;
860     umtx_lock(&extendICUDataMutex);
861 #endif
862     if(!umtx_loadAcquire(gHaveTriedToLoadCommonData)) {
863         /* See if we can explicitly open a .dat file for the ICUData. */
864         pData = openCommonData(
865                    U_ICUDATA_NAME,            /*  "icudt20l" , for example.          */
866                    -1,                        /*  Pretend we're not opening ICUData  */
867                    pErr);
868 
869         /* How about if there is no pData, eh... */
870 
871        UDataMemory_init(&copyPData);
872        if(pData != nullptr) {
873           UDatamemory_assign(&copyPData, pData);
874           copyPData.map = nullptr;     /* The mapping for this data is owned by the hash table */
875           copyPData.mapAddr = nullptr; /*   which will unmap it when ICU is shut down.         */
876                                        /* CommonICUData is also unmapped when ICU is shut down.*/
877                                        /* To avoid unmapping the data twice, zero out the map  */
878                                        /*   fields in the UDataMemory that we're assigning     */
879                                        /*   to CommonICUData.                                  */
880 
881           didUpdate = /* no longer using this result */
882               setCommonICUData(&copyPData,/*  The new common data.                                */
883                        false,             /*  No warnings if write didn't happen                  */
884                        pErr);             /*  setCommonICUData honors errors; NOP if error set    */
885         }
886 
887         umtx_storeRelease(gHaveTriedToLoadCommonData, 1);
888     }
889 
890     didUpdate = findCommonICUDataByName(U_ICUDATA_NAME, *pErr);  /* Return 'true' when a racing writes out the extended                 */
891                                                           /* data after another thread has failed to see it (in openCommonData), so     */
892                                                           /* extended data can be examined.                                             */
893                                                           /* Also handles a race through here before gHaveTriedToLoadCommonData is set. */
894 
895 #if MAP_IMPLEMENTATION==MAP_STDIO
896     umtx_unlock(&extendICUDataMutex);
897 #endif
898     return didUpdate;               /* Return true if ICUData pointer was updated.   */
899                                     /*   (Could potentially have been done by another thread racing */
900                                     /*   us through here, but that's fine, we still return true    */
901                                     /*   so that current thread will also examine extended data.   */
902 }
903 
904 /*----------------------------------------------------------------------*
905  *                                                                      *
906  *   udata_setCommonData                                                *
907  *                                                                      *
908  *----------------------------------------------------------------------*/
909 U_CAPI void U_EXPORT2
udata_setCommonData(const void * data,UErrorCode * pErrorCode)910 udata_setCommonData(const void *data, UErrorCode *pErrorCode) {
911     UDataMemory dataMemory;
912 
913     if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
914         return;
915     }
916 
917     if(data==nullptr) {
918         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
919         return;
920     }
921 
922     /* set the data pointer and test for validity */
923     UDataMemory_init(&dataMemory);
924     UDataMemory_setData(&dataMemory, data);
925     udata_checkCommonData(&dataMemory, pErrorCode);
926     if (U_FAILURE(*pErrorCode)) {return;}
927 
928     /* we have good data */
929     /* Set it up as the ICU Common Data.  */
930     setCommonICUData(&dataMemory, true, pErrorCode);
931 }
932 
933 /*---------------------------------------------------------------------------
934  *
935  *  udata_setAppData
936  *
937  *---------------------------------------------------------------------------- */
938 U_CAPI void U_EXPORT2
udata_setAppData(const char * path,const void * data,UErrorCode * err)939 udata_setAppData(const char *path, const void *data, UErrorCode *err)
940 {
941     UDataMemory     udm;
942 
943     if(err==nullptr || U_FAILURE(*err)) {
944         return;
945     }
946     if(data==nullptr) {
947         *err=U_ILLEGAL_ARGUMENT_ERROR;
948         return;
949     }
950 
951     UDataMemory_init(&udm);
952     UDataMemory_setData(&udm, data);
953     udata_checkCommonData(&udm, err);
954     udata_cacheDataItem(path, &udm, err);
955 }
956 
957 /*----------------------------------------------------------------------------*
958  *                                                                            *
959  *  checkDataItem     Given a freshly located/loaded data item, either        *
960  *                    an entry in a common file or a separately loaded file,  *
961  *                    sanity check its header, and see if the data is         *
962  *                    acceptable to the app.                                  *
963  *                    If the data is good, create and return a UDataMemory    *
964  *                    object that can be returned to the application.         *
965  *                    Return nullptr on any sort of failure.                     *
966  *                                                                            *
967  *----------------------------------------------------------------------------*/
968 static UDataMemory *
checkDataItem(const DataHeader * pHeader,UDataMemoryIsAcceptable * isAcceptable,void * context,const char * type,const char * name,UErrorCode * nonFatalErr,UErrorCode * fatalErr)969 checkDataItem
970 (
971  const DataHeader         *pHeader,         /* The data item to be checked.                */
972  UDataMemoryIsAcceptable  *isAcceptable,    /* App's call-back function                    */
973  void                     *context,         /*   pass-thru param for above.                */
974  const char               *type,            /*   pass-thru param for above.                */
975  const char               *name,            /*   pass-thru param for above.                */
976  UErrorCode               *nonFatalErr,     /* Error code if this data was not acceptable  */
977                                             /*   but openChoice should continue with       */
978                                             /*   trying to get data from fallback path.    */
979  UErrorCode               *fatalErr         /* Bad error, caller should return immediately */
980  )
981 {
982     UDataMemory  *rDataMem = nullptr;          /* the new UDataMemory, to be returned.        */
983 
984     if (U_FAILURE(*fatalErr)) {
985         return nullptr;
986     }
987 
988     if(pHeader->dataHeader.magic1==0xda &&
989         pHeader->dataHeader.magic2==0x27 &&
990         (isAcceptable==nullptr || isAcceptable(context, type, name, &pHeader->info))
991     ) {
992         rDataMem=UDataMemory_createNewInstance(fatalErr);
993         if (U_FAILURE(*fatalErr)) {
994             return nullptr;
995         }
996         rDataMem->pHeader = pHeader;
997     } else {
998         /* the data is not acceptable, look further */
999         /* If we eventually find something good, this errorcode will be */
1000         /*    cleared out.                                              */
1001         *nonFatalErr=U_INVALID_FORMAT_ERROR;
1002     }
1003     return rDataMem;
1004 }
1005 
1006 /**
1007  * @return 0 if not loaded, 1 if loaded or err
1008  */
doLoadFromIndividualFiles(const char * pkgName,const char * dataPath,const char * tocEntryPathSuffix,const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * subErrorCode,UErrorCode * pErrorCode)1009 static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
1010         const char *dataPath, const char *tocEntryPathSuffix,
1011             /* following arguments are the same as doOpenChoice itself */
1012             const char *path, const char *type, const char *name,
1013              UDataMemoryIsAcceptable *isAcceptable, void *context,
1014              UErrorCode *subErrorCode,
1015              UErrorCode *pErrorCode)
1016 {
1017     const char         *pathBuffer;
1018     UDataMemory         dataMemory;
1019     UDataMemory *pEntryData;
1020 
1021     /* look in ind. files: package\nam.typ  ========================= */
1022     /* init path iterator for individual files */
1023     UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, false, pErrorCode);
1024 
1025     while ((pathBuffer = iter.next(pErrorCode)) != nullptr)
1026     {
1027 #ifdef UDATA_DEBUG
1028         fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer);
1029 #endif
1030         if (uprv_mapFile(&dataMemory, pathBuffer, pErrorCode))
1031         {
1032             pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
1033             if (pEntryData != nullptr) {
1034                 /* Data is good.
1035                 *  Hand off ownership of the backing memory to the user's UDataMemory.
1036                 *  and return it.   */
1037                 pEntryData->mapAddr = dataMemory.mapAddr;
1038                 pEntryData->map     = dataMemory.map;
1039 
1040 #ifdef UDATA_DEBUG
1041                 fprintf(stderr, "** Mapped file: %s\n", pathBuffer);
1042 #endif
1043                 return pEntryData;
1044             }
1045 
1046             /* the data is not acceptable, or some error occurred.  Either way, unmap the memory */
1047             udata_close(&dataMemory);
1048 
1049             /* If we had a nasty error, bail out completely.  */
1050             if (U_FAILURE(*pErrorCode)) {
1051                 return nullptr;
1052             }
1053 
1054             /* Otherwise remember that we found data but didn't like it for some reason  */
1055             *subErrorCode=U_INVALID_FORMAT_ERROR;
1056         }
1057 #ifdef UDATA_DEBUG
1058         fprintf(stderr, "%s\n", UDataMemory_isLoaded(&dataMemory)?"LOADED":"not loaded");
1059 #endif
1060     }
1061     return nullptr;
1062 }
1063 
1064 /**
1065  * @return 0 if not loaded, 1 if loaded or err
1066  */
doLoadFromCommonData(UBool isICUData,const char *,const char *,const char *,const char * tocEntryName,const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * subErrorCode,UErrorCode * pErrorCode)1067 static UDataMemory *doLoadFromCommonData(UBool isICUData, const char * /*pkgName*/,
1068         const char * /*dataPath*/, const char * /*tocEntryPathSuffix*/, const char *tocEntryName,
1069             /* following arguments are the same as doOpenChoice itself */
1070             const char *path, const char *type, const char *name,
1071              UDataMemoryIsAcceptable *isAcceptable, void *context,
1072              UErrorCode *subErrorCode,
1073              UErrorCode *pErrorCode)
1074 {
1075     UDataMemory        *pEntryData;
1076     const DataHeader   *pHeader;
1077     UDataMemory        *pCommonData;
1078     int32_t            commonDataIndex;
1079     UBool              checkedExtendedICUData = false;
1080     /* try to get common data.  The loop is for platforms such as the 390 that do
1081      *  not initially load the full set of ICU data.  If the lookup of an ICU data item
1082      *  fails, the full (but slower to load) set is loaded, the and the loop repeats,
1083      *  trying the lookup again.  Once the full set of ICU data is loaded, the loop wont
1084      *  repeat because the full set will be checked the first time through.
1085      *
1086      *  The loop also handles the fallback to a .dat file if the application linked
1087      *   to the stub data library rather than a real library.
1088      */
1089     for (commonDataIndex = isICUData ? 0 : -1;;) {
1090         pCommonData=openCommonData(path, commonDataIndex, subErrorCode); /** search for pkg **/
1091 
1092         if(U_SUCCESS(*subErrorCode) && pCommonData!=nullptr) {
1093             int32_t length;
1094 
1095             /* look up the data piece in the common data */
1096             pHeader=pCommonData->vFuncs->Lookup(pCommonData, tocEntryName, &length, subErrorCode);
1097 #ifdef UDATA_DEBUG
1098             fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName, (void*) pHeader, u_errorName(*subErrorCode));
1099 #endif
1100 
1101             if(pHeader!=nullptr) {
1102                 pEntryData = checkDataItem(pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
1103 #ifdef UDATA_DEBUG
1104                 fprintf(stderr, "pEntryData=%p\n", (void*) pEntryData);
1105 #endif
1106                 if (U_FAILURE(*pErrorCode)) {
1107                     return nullptr;
1108                 }
1109                 if (pEntryData != nullptr) {
1110                     pEntryData->length = length;
1111                     return pEntryData;
1112                 }
1113             }
1114         }
1115         // If we failed due to being out-of-memory, then stop early and report the error.
1116         if (*subErrorCode == U_MEMORY_ALLOCATION_ERROR) {
1117             *pErrorCode = *subErrorCode;
1118             return nullptr;
1119         }
1120         /* Data wasn't found.  If we were looking for an ICUData item and there is
1121          * more data available, load it and try again,
1122          * otherwise break out of this loop. */
1123         if (!isICUData) {
1124             return nullptr;
1125         } else if (pCommonData != nullptr) {
1126             ++commonDataIndex;  /* try the next data package */
1127         } else if ((!checkedExtendedICUData) && extendICUData(subErrorCode)) {
1128             checkedExtendedICUData = true;
1129             /* try this data package slot again: it changed from nullptr to non-nullptr */
1130         } else {
1131             return nullptr;
1132         }
1133     }
1134 }
1135 
1136 /*
1137  * Identify the Time Zone resources that are subject to special override data loading.
1138  */
isTimeZoneFile(const char * name,const char * type)1139 static UBool isTimeZoneFile(const char *name, const char *type) {
1140     return ((uprv_strcmp(type, "res") == 0) &&
1141             (uprv_strcmp(name, "zoneinfo64") == 0 ||
1142              uprv_strcmp(name, "timezoneTypes") == 0 ||
1143              uprv_strcmp(name, "windowsZones") == 0 ||
1144              uprv_strcmp(name, "metaZones") == 0));
1145 }
1146 
1147 /*
1148  *  A note on the ownership of Mapped Memory
1149  *
1150  *  For common format files, ownership resides with the UDataMemory object
1151  *    that lives in the cache of opened common data.  These UDataMemorys are private
1152  *    to the udata implementation, and are never seen directly by users.
1153  *
1154  *    The UDataMemory objects returned to users will have the address of some desired
1155  *    data within the mapped region, but they wont have the mapping info itself, and thus
1156  *    won't cause anything to be removed from memory when they are closed.
1157  *
1158  *  For individual data files, the UDataMemory returned to the user holds the
1159  *  information necessary to unmap the data on close.  If the user independently
1160  *  opens the same data file twice, two completely independent mappings will be made.
1161  *  (There is no cache of opened data items from individual files, only a cache of
1162  *   opened Common Data files, that is, files containing a collection of data items.)
1163  *
1164  *  For common data passed in from the user via udata_setAppData() or
1165  *  udata_setCommonData(), ownership remains with the user.
1166  *
1167  *  UDataMemory objects themselves, as opposed to the memory they describe,
1168  *  can be anywhere - heap, stack/local or global.
1169  *  They have a flag to indicate when they're heap allocated and thus
1170  *  must be deleted when closed.
1171  */
1172 
1173 
1174 /*----------------------------------------------------------------------------*
1175  *                                                                            *
1176  * main data loading functions                                                *
1177  *                                                                            *
1178  *----------------------------------------------------------------------------*/
1179 static UDataMemory *
doOpenChoice(const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * pErrorCode)1180 doOpenChoice(const char *path, const char *type, const char *name,
1181              UDataMemoryIsAcceptable *isAcceptable, void *context,
1182              UErrorCode *pErrorCode)
1183 {
1184     UDataMemory         *retVal = nullptr;
1185 
1186     const char         *dataPath;
1187 
1188     int32_t             tocEntrySuffixIndex;
1189     const char         *tocEntryPathSuffix;
1190     UErrorCode          subErrorCode=U_ZERO_ERROR;
1191     const char         *treeChar;
1192 
1193     UBool               isICUData = false;
1194 
1195 
1196     FileTracer::traceOpen(path, type, name);
1197 
1198 
1199     /* Is this path ICU data? */
1200     if(path == nullptr ||
1201        !strcmp(path, U_ICUDATA_ALIAS) ||  /* "ICUDATA" */
1202        !uprv_strncmp(path, U_ICUDATA_NAME U_TREE_SEPARATOR_STRING, /* "icudt26e-" */
1203                      uprv_strlen(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING)) ||
1204        !uprv_strncmp(path, U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING, /* "ICUDATA-" */
1205                      uprv_strlen(U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING))) {
1206       isICUData = true;
1207     }
1208 
1209 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)  /* Windows:  try "foo\bar" and "foo/bar" */
1210     /* remap from alternate path char to the main one */
1211     CharString altSepPath;
1212     if(path) {
1213         if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != nullptr) {
1214             altSepPath.append(path, *pErrorCode);
1215             char *p;
1216             while ((p = uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR)) != nullptr) {
1217                 *p = U_FILE_SEP_CHAR;
1218             }
1219 #if defined (UDATA_DEBUG)
1220             fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.data());
1221 #endif
1222             path = altSepPath.data();
1223         }
1224     }
1225 #endif
1226 
1227     CharString tocEntryName; /* entry name in tree format. ex:  'icudt28b/coll/ar.res' */
1228     CharString tocEntryPath; /* entry name in path format. ex:  'icudt28b\\coll\\ar.res' */
1229 
1230     CharString pkgName;
1231     CharString treeName;
1232 
1233     /* ======= Set up strings */
1234     if(path==nullptr) {
1235         pkgName.append(U_ICUDATA_NAME, *pErrorCode);
1236     } else {
1237         const char *pkg;
1238         const char *first;
1239         pkg = uprv_strrchr(path, U_FILE_SEP_CHAR);
1240         first = uprv_strchr(path, U_FILE_SEP_CHAR);
1241         if(uprv_pathIsAbsolute(path) || (pkg != first)) { /* more than one slash in the path- not a tree name */
1242             /* see if this is an /absolute/path/to/package  path */
1243             if(pkg) {
1244                 pkgName.append(pkg+1, *pErrorCode);
1245             } else {
1246                 pkgName.append(path, *pErrorCode);
1247             }
1248         } else {
1249             treeChar = uprv_strchr(path, U_TREE_SEPARATOR);
1250             if(treeChar) {
1251                 treeName.append(treeChar+1, *pErrorCode); /* following '-' */
1252                 if(isICUData) {
1253                     pkgName.append(U_ICUDATA_NAME, *pErrorCode);
1254                 } else {
1255                     pkgName.append(path, static_cast<int32_t>(treeChar - path), *pErrorCode);
1256                     if (first == nullptr) {
1257                         /*
1258                         This user data has no path, but there is a tree name.
1259                         Look up the correct path from the data cache later.
1260                         */
1261                         path = pkgName.data();
1262                     }
1263                 }
1264             } else {
1265                 if(isICUData) {
1266                     pkgName.append(U_ICUDATA_NAME, *pErrorCode);
1267                 } else {
1268                     pkgName.append(path, *pErrorCode);
1269                 }
1270             }
1271         }
1272     }
1273 
1274 #ifdef UDATA_DEBUG
1275     fprintf(stderr, " P=%s T=%s\n", pkgName.data(), treeName.data());
1276 #endif
1277 
1278     /* setting up the entry name and file name
1279      * Make up a full name by appending the type to the supplied
1280      *  name, assuming that a type was supplied.
1281      */
1282 
1283     /* prepend the package */
1284     tocEntryName.append(pkgName, *pErrorCode);
1285     tocEntryPath.append(pkgName, *pErrorCode);
1286     tocEntrySuffixIndex = tocEntryName.length();
1287 
1288     if(!treeName.isEmpty()) {
1289         tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
1290         tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
1291     }
1292 
1293     tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
1294     tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
1295     if(type!=nullptr && *type!=0) {
1296         tocEntryName.append(".", *pErrorCode).append(type, *pErrorCode);
1297         tocEntryPath.append(".", *pErrorCode).append(type, *pErrorCode);
1298     }
1299     // The +1 is for the U_FILE_SEP_CHAR that is always appended above.
1300     tocEntryPathSuffix = tocEntryPath.data() + tocEntrySuffixIndex + 1; /* suffix starts here */
1301 
1302 #ifdef UDATA_DEBUG
1303     fprintf(stderr, " tocEntryName = %s\n", tocEntryName.data());
1304     fprintf(stderr, " tocEntryPath = %s\n", tocEntryName.data());
1305 #endif
1306 
1307 #if !defined(ICU_DATA_DIR_WINDOWS)
1308     if(path == nullptr) {
1309         path = COMMON_DATA_NAME; /* "icudt26e" */
1310     }
1311 #else
1312     // When using the Windows system data, we expects only a single data file.
1313     path = COMMON_DATA_NAME; /* "icudt26e" */
1314 #endif
1315 
1316     /************************ Begin loop looking for ind. files ***************/
1317 #ifdef UDATA_DEBUG
1318     fprintf(stderr, "IND: inBasename = %s, pkg=%s\n", "(n/a)", packageNameFromPath(path));
1319 #endif
1320 
1321     /* End of dealing with a null basename */
1322     dataPath = u_getDataDirectory();
1323 
1324     /****    Time zone individual files override  */
1325     if (isICUData && isTimeZoneFile(name, type)) {
1326         const char *tzFilesDir = u_getTimeZoneFilesDirectory(pErrorCode);
1327         if (tzFilesDir[0] != 0) {
1328 #ifdef UDATA_DEBUG
1329             fprintf(stderr, "Trying Time Zone Files directory = %s\n", tzFilesDir);
1330 #endif
1331             retVal = doLoadFromIndividualFiles(/* pkgName.data() */ "", tzFilesDir, tocEntryPathSuffix,
1332                             /* path */ "", type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1333             if((retVal != nullptr) || U_FAILURE(*pErrorCode)) {
1334                 return retVal;
1335             }
1336         }
1337     }
1338 
1339     /****    COMMON PACKAGE  - only if packages are first. */
1340     if(gDataFileAccess == UDATA_PACKAGES_FIRST) {
1341 #ifdef UDATA_DEBUG
1342         fprintf(stderr, "Trying packages (UDATA_PACKAGES_FIRST)\n");
1343 #endif
1344         /* #2 */
1345         retVal = doLoadFromCommonData(isICUData,
1346                             pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
1347                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1348         if((retVal != nullptr) || U_FAILURE(*pErrorCode)) {
1349             return retVal;
1350         }
1351     }
1352 
1353     /****    INDIVIDUAL FILES  */
1354     if((gDataFileAccess==UDATA_PACKAGES_FIRST) ||
1355        (gDataFileAccess==UDATA_FILES_FIRST)) {
1356 #ifdef UDATA_DEBUG
1357         fprintf(stderr, "Trying individual files\n");
1358 #endif
1359         /* Check to make sure that there is a dataPath to iterate over */
1360         if ((dataPath && *dataPath) || !isICUData) {
1361             retVal = doLoadFromIndividualFiles(pkgName.data(), dataPath, tocEntryPathSuffix,
1362                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1363             if((retVal != nullptr) || U_FAILURE(*pErrorCode)) {
1364                 return retVal;
1365             }
1366         }
1367     }
1368 
1369     /****    COMMON PACKAGE  */
1370     if((gDataFileAccess==UDATA_ONLY_PACKAGES) ||
1371        (gDataFileAccess==UDATA_FILES_FIRST)) {
1372 #ifdef UDATA_DEBUG
1373         fprintf(stderr, "Trying packages (UDATA_ONLY_PACKAGES || UDATA_FILES_FIRST)\n");
1374 #endif
1375         retVal = doLoadFromCommonData(isICUData,
1376                             pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
1377                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1378         if((retVal != nullptr) || U_FAILURE(*pErrorCode)) {
1379             return retVal;
1380         }
1381     }
1382 
1383     /* Load from DLL.  If we haven't attempted package load, we also haven't had any chance to
1384         try a DLL (static or setCommonData/etc)  load.
1385          If we ever have a "UDATA_ONLY_FILES", add it to the or list here.  */
1386     if(gDataFileAccess==UDATA_NO_FILES) {
1387 #ifdef UDATA_DEBUG
1388         fprintf(stderr, "Trying common data (UDATA_NO_FILES)\n");
1389 #endif
1390         retVal = doLoadFromCommonData(isICUData,
1391                             pkgName.data(), "", tocEntryPathSuffix, tocEntryName.data(),
1392                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1393         if((retVal != nullptr) || U_FAILURE(*pErrorCode)) {
1394             return retVal;
1395         }
1396     }
1397 
1398     /* data not found */
1399     if(U_SUCCESS(*pErrorCode)) {
1400         if(U_SUCCESS(subErrorCode)) {
1401             /* file not found */
1402             *pErrorCode=U_FILE_ACCESS_ERROR;
1403         } else {
1404             /* entry point not found or rejected */
1405             *pErrorCode=subErrorCode;
1406         }
1407     }
1408     return retVal;
1409 }
1410 
1411 
1412 
1413 /* API ---------------------------------------------------------------------- */
1414 
1415 U_CAPI UDataMemory * U_EXPORT2
udata_open(const char * path,const char * type,const char * name,UErrorCode * pErrorCode)1416 udata_open(const char *path, const char *type, const char *name,
1417            UErrorCode *pErrorCode) {
1418 #ifdef UDATA_DEBUG
1419   fprintf(stderr, "udata_open(): Opening: %s : %s . %s\n", (path?path:"nullptr"), name, type);
1420     fflush(stderr);
1421 #endif
1422 
1423     if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
1424         return nullptr;
1425     } else if(name==nullptr || *name==0) {
1426         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1427         return nullptr;
1428     } else {
1429         return doOpenChoice(path, type, name, nullptr, nullptr, pErrorCode);
1430     }
1431 }
1432 
1433 
1434 
1435 U_CAPI UDataMemory * U_EXPORT2
udata_openChoice(const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * pErrorCode)1436 udata_openChoice(const char *path, const char *type, const char *name,
1437                  UDataMemoryIsAcceptable *isAcceptable, void *context,
1438                  UErrorCode *pErrorCode) {
1439 #ifdef UDATA_DEBUG
1440   fprintf(stderr, "udata_openChoice(): Opening: %s : %s . %s\n", (path?path:"nullptr"), name, type);
1441 #endif
1442 
1443     if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
1444         return nullptr;
1445     } else if(name==nullptr || *name==0 || isAcceptable==nullptr) {
1446         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1447         return nullptr;
1448     } else {
1449         return doOpenChoice(path, type, name, isAcceptable, context, pErrorCode);
1450     }
1451 }
1452 
1453 
1454 
1455 U_CAPI void U_EXPORT2
udata_getInfo(UDataMemory * pData,UDataInfo * pInfo)1456 udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) {
1457     if(pInfo!=nullptr) {
1458         if(pData!=nullptr && pData->pHeader!=nullptr) {
1459             const UDataInfo *info=&pData->pHeader->info;
1460             uint16_t dataInfoSize=udata_getInfoSize(info);
1461             if(pInfo->size>dataInfoSize) {
1462                 pInfo->size=dataInfoSize;
1463             }
1464             uprv_memcpy((uint16_t *)pInfo+1, (const uint16_t *)info+1, pInfo->size-2);
1465             if(info->isBigEndian!=U_IS_BIG_ENDIAN) {
1466                 /* opposite endianness */
1467                 uint16_t x=info->reservedWord;
1468                 pInfo->reservedWord=(uint16_t)((x<<8)|(x>>8));
1469             }
1470         } else {
1471             pInfo->size=0;
1472         }
1473     }
1474 }
1475 
1476 
udata_setFileAccess(UDataFileAccess access,UErrorCode *)1477 U_CAPI void U_EXPORT2 udata_setFileAccess(UDataFileAccess access, UErrorCode * /*status*/)
1478 {
1479     // Note: this function is documented as not thread safe.
1480     gDataFileAccess = access;
1481 }
1482