• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 1997-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *
11 *  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
12 *
13 *   Date        Name        Description
14 *   04/14/97    aliu        Creation.
15 *   04/24/97    aliu        Added getDefaultDataDirectory() and
16 *                            getDefaultLocaleID().
17 *   04/28/97    aliu        Rewritten to assume Unix and apply general methods
18 *                            for assumed case.  Non-UNIX platforms must be
19 *                            special-cased.  Rewrote numeric methods dealing
20 *                            with NaN and Infinity to be platform independent
21 *                             over all IEEE 754 platforms.
22 *   05/13/97    aliu        Restored sign of timezone
23 *                            (semantics are hours West of GMT)
24 *   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
25 *                             nextDouble..
26 *   07/22/98    stephen     Added remainder, max, min, trunc
27 *   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
28 *   08/24/98    stephen     Added longBitsFromDouble
29 *   09/08/98    stephen     Minor changes for Mac Port
30 *   03/02/99    stephen     Removed openFile().  Added AS400 support.
31 *                            Fixed EBCDIC tables
32 *   04/15/99    stephen     Converted to C.
33 *   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
34 *   08/04/99    jeffrey R.  Added OS/2 changes
35 *   11/15/99    helena      Integrated S/390 IEEE support.
36 *   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
37 *   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
38 *   01/03/08    Steven L.   Fake Time Support
39 ******************************************************************************
40 */
41 
42 // Defines _XOPEN_SOURCE for access to POSIX functions.
43 // Must be before any other #includes.
44 #include "uposixdefs.h"
45 
46 // First, the platform type. Need this for U_PLATFORM.
47 #include "unicode/platform.h"
48 
49 #if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
50 /* tzset isn't defined in strict ANSI on MinGW. */
51 #undef __STRICT_ANSI__
52 #endif
53 
54 /*
55  * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
56  */
57 #include <time.h>
58 
59 #if !U_PLATFORM_USES_ONLY_WIN32_API
60 #include <sys/time.h>
61 #endif
62 
63 /* include the rest of the ICU headers */
64 #include "unicode/putil.h"
65 #include "unicode/ustring.h"
66 #include "putilimp.h"
67 #include "uassert.h"
68 #include "umutex.h"
69 #include "cmemory.h"
70 #include "cstring.h"
71 #include "locmap.h"
72 #include "ucln_cmn.h"
73 #include "charstr.h"
74 
75 /* Include standard headers. */
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <math.h>
80 #include <locale.h>
81 #include <float.h>
82 
83 #ifndef U_COMMON_IMPLEMENTATION
84 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu
85 #endif
86 
87 
88 /* include system headers */
89 #if U_PLATFORM_USES_ONLY_WIN32_API
90     /*
91      * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
92      * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
93      * to use native APIs as much as possible?
94      */
95 #ifndef WIN32_LEAN_AND_MEAN
96 #   define WIN32_LEAN_AND_MEAN
97 #endif
98 #   define VC_EXTRALEAN
99 #   define NOUSER
100 #   define NOSERVICE
101 #   define NOIME
102 #   define NOMCX
103 #   include <windows.h>
104 #   include "unicode/uloc.h"
105 #   include "wintz.h"
106 #elif U_PLATFORM == U_PF_OS400
107 #   include <float.h>
108 #   include <qusec.h>       /* error code structure */
109 #   include <qusrjobi.h>
110 #   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
111 #   include <mih/testptr.h> /* For uprv_maximumPtr */
112 #elif U_PLATFORM == U_PF_OS390
113 #   include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
114 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
115 #   include <limits.h>
116 #   include <unistd.h>
117 #   if U_PLATFORM == U_PF_SOLARIS
118 #       ifndef _XPG4_2
119 #           define _XPG4_2
120 #       endif
121 #   elif U_PLATFORM == U_PF_ANDROID
122 #       include <sys/system_properties.h>
123 #       include <dlfcn.h>
124 #   endif
125 #elif U_PLATFORM == U_PF_QNX
126 #   include <sys/neutrino.h>
127 #endif
128 
129 
130 /*
131  * Only include langinfo.h if we have a way to get the codeset. If we later
132  * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
133  *
134  */
135 
136 #if U_HAVE_NL_LANGINFO_CODESET
137 #include <langinfo.h>
138 #endif
139 
140 /**
141  * Simple things (presence of functions, etc) should just go in configure.in and be added to
142  * icucfg.h via autoheader.
143  */
144 #if U_PLATFORM_IMPLEMENTS_POSIX
145 #   if U_PLATFORM == U_PF_OS400
146 #    define HAVE_DLFCN_H 0
147 #    define HAVE_DLOPEN 0
148 #   else
149 #   ifndef HAVE_DLFCN_H
150 #    define HAVE_DLFCN_H 1
151 #   endif
152 #   ifndef HAVE_DLOPEN
153 #    define HAVE_DLOPEN 1
154 #   endif
155 #   endif
156 #   ifndef HAVE_GETTIMEOFDAY
157 #    define HAVE_GETTIMEOFDAY 1
158 #   endif
159 #else
160 #   define HAVE_DLFCN_H 0
161 #   define HAVE_DLOPEN 0
162 #   define HAVE_GETTIMEOFDAY 0
163 #endif
164 
165 U_NAMESPACE_USE
166 
167 /* Define the extension for data files, again... */
168 #define DATA_TYPE "dat"
169 
170 /* Leave this copyright notice here! */
171 static const char copyright[] = U_COPYRIGHT_STRING;
172 
173 /* floating point implementations ------------------------------------------- */
174 
175 /* We return QNAN rather than SNAN*/
176 #define SIGN 0x80000000U
177 
178 /* Make it easy to define certain types of constants */
179 typedef union {
180     int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
181     double d64;
182 } BitPatternConversion;
183 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
184 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
185 
186 /*---------------------------------------------------------------------------
187   Platform utilities
188   Our general strategy is to assume we're on a POSIX platform.  Platforms which
189   are non-POSIX must declare themselves so.  The default POSIX implementation
190   will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
191   functions).
192   ---------------------------------------------------------------------------*/
193 
194 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
195 #   undef U_POSIX_LOCALE
196 #else
197 #   define U_POSIX_LOCALE    1
198 #endif
199 
200 /*
201     WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
202     can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
203 */
204 #if !IEEE_754
205 static char*
u_topNBytesOfDouble(double * d,int n)206 u_topNBytesOfDouble(double* d, int n)
207 {
208 #if U_IS_BIG_ENDIAN
209     return (char*)d;
210 #else
211     return (char*)(d + 1) - n;
212 #endif
213 }
214 
215 static char*
u_bottomNBytesOfDouble(double * d,int n)216 u_bottomNBytesOfDouble(double* d, int n)
217 {
218 #if U_IS_BIG_ENDIAN
219     return (char*)(d + 1) - n;
220 #else
221     return (char*)d;
222 #endif
223 }
224 #endif   /* !IEEE_754 */
225 
226 #if IEEE_754
227 static UBool
u_signBit(double d)228 u_signBit(double d) {
229     uint8_t hiByte;
230 #if U_IS_BIG_ENDIAN
231     hiByte = *(uint8_t *)&d;
232 #else
233     hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
234 #endif
235     return (hiByte & 0x80) != 0;
236 }
237 #endif
238 
239 
240 
241 #if defined (U_DEBUG_FAKETIME)
242 /* Override the clock to test things without having to move the system clock.
243  * Assumes POSIX gettimeofday() will function
244  */
245 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
246 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
247 UBool fakeClock_set = false; /** True if fake clock has spun up **/
248 
getUTCtime_real()249 static UDate getUTCtime_real() {
250     struct timeval posixTime;
251     gettimeofday(&posixTime, nullptr);
252     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
253 }
254 
getUTCtime_fake()255 static UDate getUTCtime_fake() {
256     static UMutex fakeClockMutex;
257     umtx_lock(&fakeClockMutex);
258     if(!fakeClock_set) {
259         UDate real = getUTCtime_real();
260         const char *fake_start = getenv("U_FAKETIME_START");
261         if((fake_start!=nullptr) && (fake_start[0]!=0)) {
262             sscanf(fake_start,"%lf",&fakeClock_t0);
263             fakeClock_dt = fakeClock_t0 - real;
264             fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
265                     "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
266                     fakeClock_t0, fake_start, fakeClock_dt, real);
267         } else {
268           fakeClock_dt = 0;
269             fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
270                     "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
271         }
272         fakeClock_set = true;
273     }
274     umtx_unlock(&fakeClockMutex);
275 
276     return getUTCtime_real() + fakeClock_dt;
277 }
278 #endif
279 
280 #if U_PLATFORM_USES_ONLY_WIN32_API
281 typedef union {
282     int64_t int64;
283     FILETIME fileTime;
284 } FileTimeConversion;   /* This is like a ULARGE_INTEGER */
285 
286 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
287 #define EPOCH_BIAS  INT64_C(116444736000000000)
288 #define HECTONANOSECOND_PER_MILLISECOND   10000
289 
290 #endif
291 
292 /*---------------------------------------------------------------------------
293   Universal Implementations
294   These are designed to work on all platforms.  Try these, and if they
295   don't work on your platform, then special case your platform with new
296   implementations.
297 ---------------------------------------------------------------------------*/
298 
299 U_CAPI UDate U_EXPORT2
uprv_getUTCtime()300 uprv_getUTCtime()
301 {
302 #if defined(U_DEBUG_FAKETIME)
303     return getUTCtime_fake(); /* Hook for overriding the clock */
304 #else
305     return uprv_getRawUTCtime();
306 #endif
307 }
308 
309 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
310 U_CAPI UDate U_EXPORT2
uprv_getRawUTCtime()311 uprv_getRawUTCtime()
312 {
313 #if U_PLATFORM_USES_ONLY_WIN32_API
314 
315     FileTimeConversion winTime;
316     GetSystemTimeAsFileTime(&winTime.fileTime);
317     return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
318 #else
319 
320 #if HAVE_GETTIMEOFDAY
321     struct timeval posixTime;
322     gettimeofday(&posixTime, nullptr);
323     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
324 #else
325     time_t epochtime;
326     time(&epochtime);
327     return (UDate)epochtime * U_MILLIS_PER_SECOND;
328 #endif
329 
330 #endif
331 }
332 
333 /*-----------------------------------------------------------------------------
334   IEEE 754
335   These methods detect and return NaN and infinity values for doubles
336   conforming to IEEE 754.  Platforms which support this standard include X86,
337   Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
338   If this doesn't work on your platform, you have non-IEEE floating-point, and
339   will need to code your own versions.  A naive implementation is to return 0.0
340   for getNaN and getInfinity, and false for isNaN and isInfinite.
341   ---------------------------------------------------------------------------*/
342 
343 U_CAPI UBool U_EXPORT2
uprv_isNaN(double number)344 uprv_isNaN(double number)
345 {
346 #if IEEE_754
347     BitPatternConversion convertedNumber;
348     convertedNumber.d64 = number;
349     /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
350     return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
351 
352 #elif U_PLATFORM == U_PF_OS390
353     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
354                         sizeof(uint32_t));
355     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
356                         sizeof(uint32_t));
357 
358     return ((highBits & 0x7F080000L) == 0x7F080000L) &&
359       (lowBits == 0x00000000L);
360 
361 #else
362     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
363     /* you'll need to replace this default implementation with what's correct*/
364     /* for your platform.*/
365     return number != number;
366 #endif
367 }
368 
369 U_CAPI UBool U_EXPORT2
uprv_isInfinite(double number)370 uprv_isInfinite(double number)
371 {
372 #if IEEE_754
373     BitPatternConversion convertedNumber;
374     convertedNumber.d64 = number;
375     /* Infinity is exactly 0x7FF0000000000000U. */
376     return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
377 #elif U_PLATFORM == U_PF_OS390
378     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
379                         sizeof(uint32_t));
380     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
381                         sizeof(uint32_t));
382 
383     return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
384 
385 #else
386     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
387     /* value, you'll need to replace this default implementation with what's*/
388     /* correct for your platform.*/
389     return number == (2.0 * number);
390 #endif
391 }
392 
393 U_CAPI UBool U_EXPORT2
uprv_isPositiveInfinity(double number)394 uprv_isPositiveInfinity(double number)
395 {
396 #if IEEE_754 || U_PLATFORM == U_PF_OS390
397     return (UBool)(number > 0 && uprv_isInfinite(number));
398 #else
399     return uprv_isInfinite(number);
400 #endif
401 }
402 
403 U_CAPI UBool U_EXPORT2
uprv_isNegativeInfinity(double number)404 uprv_isNegativeInfinity(double number)
405 {
406 #if IEEE_754 || U_PLATFORM == U_PF_OS390
407     return (UBool)(number < 0 && uprv_isInfinite(number));
408 
409 #else
410     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
411                         sizeof(uint32_t));
412     return((highBits & SIGN) && uprv_isInfinite(number));
413 
414 #endif
415 }
416 
417 U_CAPI double U_EXPORT2
uprv_getNaN()418 uprv_getNaN()
419 {
420 #if IEEE_754 || U_PLATFORM == U_PF_OS390
421     return gNan.d64;
422 #else
423     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
424     /* you'll need to replace this default implementation with what's correct*/
425     /* for your platform.*/
426     return 0.0;
427 #endif
428 }
429 
430 U_CAPI double U_EXPORT2
uprv_getInfinity()431 uprv_getInfinity()
432 {
433 #if IEEE_754 || U_PLATFORM == U_PF_OS390
434     return gInf.d64;
435 #else
436     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
437     /* value, you'll need to replace this default implementation with what's*/
438     /* correct for your platform.*/
439     return 0.0;
440 #endif
441 }
442 
443 U_CAPI double U_EXPORT2
uprv_floor(double x)444 uprv_floor(double x)
445 {
446     return floor(x);
447 }
448 
449 U_CAPI double U_EXPORT2
uprv_ceil(double x)450 uprv_ceil(double x)
451 {
452     return ceil(x);
453 }
454 
455 U_CAPI double U_EXPORT2
uprv_round(double x)456 uprv_round(double x)
457 {
458     return uprv_floor(x + 0.5);
459 }
460 
461 U_CAPI double U_EXPORT2
uprv_fabs(double x)462 uprv_fabs(double x)
463 {
464     return fabs(x);
465 }
466 
467 U_CAPI double U_EXPORT2
uprv_modf(double x,double * y)468 uprv_modf(double x, double* y)
469 {
470     return modf(x, y);
471 }
472 
473 U_CAPI double U_EXPORT2
uprv_fmod(double x,double y)474 uprv_fmod(double x, double y)
475 {
476     return fmod(x, y);
477 }
478 
479 U_CAPI double U_EXPORT2
uprv_pow(double x,double y)480 uprv_pow(double x, double y)
481 {
482     /* This is declared as "double pow(double x, double y)" */
483     return pow(x, y);
484 }
485 
486 U_CAPI double U_EXPORT2
uprv_pow10(int32_t x)487 uprv_pow10(int32_t x)
488 {
489     return pow(10.0, (double)x);
490 }
491 
492 U_CAPI double U_EXPORT2
uprv_fmax(double x,double y)493 uprv_fmax(double x, double y)
494 {
495 #if IEEE_754
496     /* first handle NaN*/
497     if(uprv_isNaN(x) || uprv_isNaN(y))
498         return uprv_getNaN();
499 
500     /* check for -0 and 0*/
501     if(x == 0.0 && y == 0.0 && u_signBit(x))
502         return y;
503 
504 #endif
505 
506     /* this should work for all flt point w/o NaN and Inf special cases */
507     return (x > y ? x : y);
508 }
509 
510 U_CAPI double U_EXPORT2
uprv_fmin(double x,double y)511 uprv_fmin(double x, double y)
512 {
513 #if IEEE_754
514     /* first handle NaN*/
515     if(uprv_isNaN(x) || uprv_isNaN(y))
516         return uprv_getNaN();
517 
518     /* check for -0 and 0*/
519     if(x == 0.0 && y == 0.0 && u_signBit(y))
520         return y;
521 
522 #endif
523 
524     /* this should work for all flt point w/o NaN and Inf special cases */
525     return (x > y ? y : x);
526 }
527 
528 U_CAPI UBool U_EXPORT2
uprv_add32_overflow(int32_t a,int32_t b,int32_t * res)529 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
530     // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
531     // This function could be optimized by calling one of those primitives.
532     auto a64 = static_cast<int64_t>(a);
533     auto b64 = static_cast<int64_t>(b);
534     int64_t res64 = a64 + b64;
535     *res = static_cast<int32_t>(res64);
536     return res64 != *res;
537 }
538 
539 U_CAPI UBool U_EXPORT2
uprv_mul32_overflow(int32_t a,int32_t b,int32_t * res)540 uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
541     // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
542     // This function could be optimized by calling one of those primitives.
543     auto a64 = static_cast<int64_t>(a);
544     auto b64 = static_cast<int64_t>(b);
545     int64_t res64 = a64 * b64;
546     *res = static_cast<int32_t>(res64);
547     return res64 != *res;
548 }
549 
550 /**
551  * Truncates the given double.
552  * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
553  * This is different than calling floor() or ceil():
554  * floor(3.3) = 3, floor(-3.3) = -4
555  * ceil(3.3) = 4, ceil(-3.3) = -3
556  */
557 U_CAPI double U_EXPORT2
uprv_trunc(double d)558 uprv_trunc(double d)
559 {
560 #if IEEE_754
561     /* handle error cases*/
562     if(uprv_isNaN(d))
563         return uprv_getNaN();
564     if(uprv_isInfinite(d))
565         return uprv_getInfinity();
566 
567     if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
568         return ceil(d);
569     else
570         return floor(d);
571 
572 #else
573     return d >= 0 ? floor(d) : ceil(d);
574 
575 #endif
576 }
577 
578 /**
579  * Return the largest positive number that can be represented by an integer
580  * type of arbitrary bit length.
581  */
582 U_CAPI double U_EXPORT2
uprv_maxMantissa()583 uprv_maxMantissa()
584 {
585     return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
586 }
587 
588 U_CAPI double U_EXPORT2
uprv_log(double d)589 uprv_log(double d)
590 {
591     return log(d);
592 }
593 
594 U_CAPI void * U_EXPORT2
uprv_maximumPtr(void * base)595 uprv_maximumPtr(void * base)
596 {
597 #if U_PLATFORM == U_PF_OS400
598     /*
599      * With the provided function we should never be out of range of a given segment
600      * (a traditional/typical segment that is).  Our segments have 5 bytes for the
601      * id and 3 bytes for the offset.  The key is that the casting takes care of
602      * only retrieving the offset portion minus x1000.  Hence, the smallest offset
603      * seen in a program is x001000 and when casted to an int would be 0.
604      * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
605      *
606      * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
607      * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
608      * This function determines the activation based on the pointer that is passed in and
609      * calculates the appropriate maximum available size for
610      * each pointer type (TERASPACE and non-TERASPACE)
611      *
612      * Unlike other operating systems, the pointer model isn't determined at
613      * compile time on i5/OS.
614      */
615     if ((base != nullptr) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
616         /* if it is a TERASPACE pointer the max is 2GB - 4k */
617         return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
618     }
619     /* otherwise 16MB since nullptr ptr is not checkable or the ptr is not TERASPACE */
620     return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
621 
622 #else
623     return U_MAX_PTR(base);
624 #endif
625 }
626 
627 /*---------------------------------------------------------------------------
628   Platform-specific Implementations
629   Try these, and if they don't work on your platform, then special case your
630   platform with new implementations.
631   ---------------------------------------------------------------------------*/
632 
633 /* Generic time zone layer -------------------------------------------------- */
634 
635 /* Time zone utilities */
636 U_CAPI void U_EXPORT2
uprv_tzset()637 uprv_tzset()
638 {
639 #if defined(U_TZSET)
640     U_TZSET();
641 #else
642     /* no initialization*/
643 #endif
644 }
645 
646 U_CAPI int32_t U_EXPORT2
uprv_timezone()647 uprv_timezone()
648 {
649 #ifdef U_TIMEZONE
650     return U_TIMEZONE;
651 #else
652     time_t t, t1, t2;
653     struct tm tmrec;
654     int32_t tdiff = 0;
655 
656     time(&t);
657     uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
658 #if U_PLATFORM != U_PF_IPHONE
659     UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
660 #endif
661     t1 = mktime(&tmrec);                 /* local time in seconds*/
662     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
663     t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
664     tdiff = t2 - t1;
665 
666 #if U_PLATFORM != U_PF_IPHONE
667     /* imitate NT behaviour, which returns same timezone offset to GMT for
668        winter and summer.
669        This does not work on all platforms. For instance, on glibc on Linux
670        and on Mac OS 10.5, tdiff calculated above remains the same
671        regardless of whether DST is in effect or not. iOS is another
672        platform where this does not work. Linux + glibc and Mac OS 10.5
673        have U_TIMEZONE defined so that this code is not reached.
674     */
675     if (dst_checked)
676         tdiff += 3600;
677 #endif
678     return tdiff;
679 #endif
680 }
681 
682 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
683    some platforms need to have it declared here. */
684 
685 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
686 /* RS6000 and others reject char **tzname.  */
687 extern U_IMPORT char *U_TZNAME[];
688 #endif
689 
690 #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
691 /* These platforms are likely to use Olson timezone IDs. */
692 /* common targets of the symbolic link at TZDEFAULT are:
693  * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
694  * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
695  * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
696  * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
697  * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
698  * To avoid checking lots of paths, just check that the target path
699  * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
700  */
701 
702 #define CHECK_LOCALTIME_LINK 1
703 #if U_PLATFORM_IS_DARWIN_BASED
704 #include <tzfile.h>
705 #define TZZONEINFO      (TZDIR "/")
706 #elif U_PLATFORM == U_PF_SOLARIS
707 #define TZDEFAULT       "/etc/localtime"
708 #define TZZONEINFO      "/usr/share/lib/zoneinfo/"
709 #define TZ_ENV_CHECK    "localtime"
710 #else
711 #define TZDEFAULT       "/etc/localtime"
712 #define TZZONEINFO      "/usr/share/zoneinfo/"
713 #endif
714 #define TZZONEINFOTAIL  "/zoneinfo/"
715 #if U_HAVE_DIRENT_H
716 #define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
717 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
718    symlinked to /etc/localtime, which makes searchForTZFile return
719    'localtime' when it's the first match. */
720 #define TZFILE_SKIP2    "localtime"
721 #define SEARCH_TZFILE
722 #include <dirent.h>  /* Needed to search through system timezone files */
723 #endif
724 static char gTimeZoneBuffer[PATH_MAX];
725 static const char *gTimeZoneBufferPtr = nullptr;
726 #endif
727 
728 #if !U_PLATFORM_USES_ONLY_WIN32_API
729 #define isNonDigit(ch) (ch < '0' || '9' < ch)
730 #define isDigit(ch) ('0' <= ch && ch <= '9')
isValidOlsonID(const char * id)731 static UBool isValidOlsonID(const char *id) {
732     int32_t idx = 0;
733     int32_t idxMax = 0;
734 
735     /* Determine if this is something like Iceland (Olson ID)
736     or AST4ADT (non-Olson ID) */
737     while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
738         idx++;
739     }
740 
741     /* Allow at maximum 2 numbers at the end of the id to support zone id's
742     like GMT+11. */
743     idxMax = idx + 2;
744     while (id[idx] && isDigit(id[idx]) && idx < idxMax) {
745         idx++;
746     }
747 
748     /* If we went through the whole string, then it might be okay.
749     The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
750     "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
751     The rest of the time it could be an Olson ID. George */
752     return (UBool)(id[idx] == 0
753         || uprv_strcmp(id, "PST8PDT") == 0
754         || uprv_strcmp(id, "MST7MDT") == 0
755         || uprv_strcmp(id, "CST6CDT") == 0
756         || uprv_strcmp(id, "EST5EDT") == 0);
757 }
758 
759 /* On some Unix-like OS, 'posix' subdirectory in
760    /usr/share/zoneinfo replicates the top-level contents. 'right'
761    subdirectory has the same set of files, but individual files
762    are different from those in the top-level directory or 'posix'
763    because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
764    has files for UTC.
765    When the first match for /etc/localtime is in either of them
766    (usually in posix because 'right' has different file contents),
767    or TZ environment variable points to one of them, createTimeZone
768    fails because, say, 'posix/America/New_York' is not an Olson
769    timezone id ('America/New_York' is). So, we have to skip
770    'posix/' and 'right/' at the beginning. */
skipZoneIDPrefix(const char ** id)771 static void skipZoneIDPrefix(const char** id) {
772     if (uprv_strncmp(*id, "posix/", 6) == 0
773         || uprv_strncmp(*id, "right/", 6) == 0)
774     {
775         *id += 6;
776     }
777 }
778 #endif
779 
780 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
781 
782 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
783 typedef struct OffsetZoneMapping {
784     int32_t offsetSeconds;
785     int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
786     const char *stdID;
787     const char *dstID;
788     const char *olsonID;
789 } OffsetZoneMapping;
790 
791 enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
792 
793 /*
794 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
795 and maps it to an Olson ID.
796 Before adding anything to this list, take a look at
797 icu/source/tools/tzcode/tz.alias
798 Sometimes no daylight savings (0) is important to define due to aliases.
799 This list can be tested with icu/source/test/compat/tzone.pl
800 More values could be added to daylightType to increase precision.
801 */
802 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
803     {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
804     {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
805     {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
806     {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
807     {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
808     {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
809     {-36000, 2, "EST", "EST", "Australia/Sydney"},
810     {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
811     {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
812     {-34200, 2, "CST", "CST", "Australia/South"},
813     {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
814     {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
815     {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
816     {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
817     {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
818     {-28800, 2, "WST", "WST", "Australia/West"},
819     {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
820     {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
821     {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
822     {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
823     {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
824     {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
825     {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
826     {-14400, 1, "AZT", "AZST", "Asia/Baku"},
827     {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
828     {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
829     {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
830     {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
831     {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
832     {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
833     {-3600, 0, "CET", "WEST", "Africa/Algiers"},
834     {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
835     {0, 1, "GMT", "IST", "Europe/Dublin"},
836     {0, 1, "GMT", "BST", "Europe/London"},
837     {0, 0, "WET", "WEST", "Africa/Casablanca"},
838     {0, 0, "WET", "WET", "Africa/El_Aaiun"},
839     {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
840     {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
841     {10800, 1, "PMST", "PMDT", "America/Miquelon"},
842     {10800, 2, "UYT", "UYST", "America/Montevideo"},
843     {10800, 1, "WGT", "WGST", "America/Godthab"},
844     {10800, 2, "BRT", "BRST", "Brazil/East"},
845     {12600, 1, "NST", "NDT", "America/St_Johns"},
846     {14400, 1, "AST", "ADT", "Canada/Atlantic"},
847     {14400, 2, "AMT", "AMST", "America/Cuiaba"},
848     {14400, 2, "CLT", "CLST", "Chile/Continental"},
849     {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
850     {14400, 2, "PYT", "PYST", "America/Asuncion"},
851     {18000, 1, "CST", "CDT", "America/Havana"},
852     {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
853     {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
854     {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
855     {21600, 0, "CST", "CDT", "America/Guatemala"},
856     {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
857     {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
858     {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
859     {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
860     {32400, 1, "AKST", "AKDT", "US/Alaska"},
861     {36000, 1, "HAST", "HADT", "US/Aleutian"}
862 };
863 
864 /*#define DEBUG_TZNAME*/
865 
remapShortTimeZone(const char * stdID,const char * dstID,int32_t daylightType,int32_t offset)866 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
867 {
868     int32_t idx;
869 #ifdef DEBUG_TZNAME
870     fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
871 #endif
872     for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
873     {
874         if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
875             && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
876             && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
877             && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
878         {
879             return OFFSET_ZONE_MAPPINGS[idx].olsonID;
880         }
881     }
882     return nullptr;
883 }
884 #endif
885 
886 #ifdef SEARCH_TZFILE
887 #define MAX_READ_SIZE 512
888 
889 typedef struct DefaultTZInfo {
890     char* defaultTZBuffer;
891     int64_t defaultTZFileSize;
892     FILE* defaultTZFilePtr;
893     UBool defaultTZstatus;
894     int32_t defaultTZPosition;
895 } DefaultTZInfo;
896 
897 /*
898  * This method compares the two files given to see if they are a match.
899  * It is currently use to compare two TZ files.
900  */
compareBinaryFiles(const char * defaultTZFileName,const char * TZFileName,DefaultTZInfo * tzInfo)901 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
902     FILE* file;
903     int64_t sizeFile;
904     int64_t sizeFileLeft;
905     int32_t sizeFileRead;
906     int32_t sizeFileToRead;
907     char bufferFile[MAX_READ_SIZE];
908     UBool result = true;
909 
910     if (tzInfo->defaultTZFilePtr == nullptr) {
911         tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
912     }
913     file = fopen(TZFileName, "r");
914 
915     tzInfo->defaultTZPosition = 0; /* reset position to begin search */
916 
917     if (file != nullptr && tzInfo->defaultTZFilePtr != nullptr) {
918         /* First check that the file size are equal. */
919         if (tzInfo->defaultTZFileSize == 0) {
920             fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
921             tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
922         }
923         fseek(file, 0, SEEK_END);
924         sizeFile = ftell(file);
925         sizeFileLeft = sizeFile;
926 
927         if (sizeFile != tzInfo->defaultTZFileSize) {
928             result = false;
929         } else {
930             /* Store the data from the files in separate buffers and
931              * compare each byte to determine equality.
932              */
933             if (tzInfo->defaultTZBuffer == nullptr) {
934                 rewind(tzInfo->defaultTZFilePtr);
935                 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
936                 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
937             }
938             rewind(file);
939             while(sizeFileLeft > 0) {
940                 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
941                 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
942 
943                 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
944                 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
945                     result = false;
946                     break;
947                 }
948                 sizeFileLeft -= sizeFileRead;
949                 tzInfo->defaultTZPosition += sizeFileRead;
950             }
951         }
952     } else {
953         result = false;
954     }
955 
956     if (file != nullptr) {
957         fclose(file);
958     }
959 
960     return result;
961 }
962 
963 
964 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
965 #define SKIP1 "."
966 #define SKIP2 ".."
967 static UBool U_CALLCONV putil_cleanup();
968 static CharString *gSearchTZFileResult = nullptr;
969 
970 /*
971  * This method recursively traverses the directory given for a matching TZ file and returns the first match.
972  * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
973  */
searchForTZFile(const char * path,DefaultTZInfo * tzInfo)974 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
975     DIR* dirp = nullptr;
976     struct dirent* dirEntry = nullptr;
977     char* result = nullptr;
978     UErrorCode status = U_ZERO_ERROR;
979 
980     /* Save the current path */
981     CharString curpath(path, -1, status);
982     if (U_FAILURE(status)) {
983         goto cleanupAndReturn;
984     }
985 
986     dirp = opendir(path);
987     if (dirp == nullptr) {
988         goto cleanupAndReturn;
989     }
990 
991     if (gSearchTZFileResult == nullptr) {
992         gSearchTZFileResult = new CharString;
993         if (gSearchTZFileResult == nullptr) {
994             goto cleanupAndReturn;
995         }
996         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
997     }
998 
999     /* Check each entry in the directory. */
1000     while((dirEntry = readdir(dirp)) != nullptr) {
1001         const char* dirName = dirEntry->d_name;
1002         if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0
1003             && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
1004             /* Create a newpath with the new entry to test each entry in the directory. */
1005             CharString newpath(curpath, status);
1006             newpath.append(dirName, -1, status);
1007             if (U_FAILURE(status)) {
1008                 break;
1009             }
1010 
1011             DIR* subDirp = nullptr;
1012             if ((subDirp = opendir(newpath.data())) != nullptr) {
1013                 /* If this new path is a directory, make a recursive call with the newpath. */
1014                 closedir(subDirp);
1015                 newpath.append('/', status);
1016                 if (U_FAILURE(status)) {
1017                     break;
1018                 }
1019                 result = searchForTZFile(newpath.data(), tzInfo);
1020                 /*
1021                  Have to get out here. Otherwise, we'd keep looking
1022                  and return the first match in the top-level directory
1023                  if there's a match in the top-level. If not, this function
1024                  would return nullptr and set gTimeZoneBufferPtr to nullptr in initDefault().
1025                  It worked without this in most cases because we have a fallback of calling
1026                  localtime_r to figure out the default timezone.
1027                 */
1028                 if (result != nullptr)
1029                     break;
1030             } else {
1031                 if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
1032                     int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
1033                     if (amountToSkip > newpath.length()) {
1034                         amountToSkip = newpath.length();
1035                     }
1036                     const char* zoneid = newpath.data() + amountToSkip;
1037                     skipZoneIDPrefix(&zoneid);
1038                     gSearchTZFileResult->clear();
1039                     gSearchTZFileResult->append(zoneid, -1, status);
1040                     if (U_FAILURE(status)) {
1041                         break;
1042                     }
1043                     result = gSearchTZFileResult->data();
1044                     /* Get out after the first one found. */
1045                     break;
1046                 }
1047             }
1048         }
1049     }
1050 
1051   cleanupAndReturn:
1052     if (dirp) {
1053         closedir(dirp);
1054     }
1055     return result;
1056 }
1057 #endif
1058 
1059 #if U_PLATFORM == U_PF_ANDROID
1060 typedef int(system_property_read_callback)(const prop_info* info,
1061                                            void (*callback)(void* cookie,
1062                                                             const char* name,
1063                                                             const char* value,
1064                                                             uint32_t serial),
1065                                            void* cookie);
1066 typedef int(system_property_get)(const char*, char*);
1067 
1068 static char gAndroidTimeZone[PROP_VALUE_MAX] = { '\0' };
1069 
u_property_read(void * cookie,const char * name,const char * value,uint32_t serial)1070 static void u_property_read(void* cookie, const char* name, const char* value,
1071                             uint32_t serial) {
1072     uprv_strcpy((char* )cookie, value);
1073 }
1074 #endif
1075 
1076 U_CAPI void U_EXPORT2
uprv_tzname_clear_cache()1077 uprv_tzname_clear_cache()
1078 {
1079 #if U_PLATFORM == U_PF_ANDROID
1080     /* Android's timezone is stored in system property. */
1081     gAndroidTimeZone[0] = '\0';
1082     void* libc = dlopen("libc.so", RTLD_NOLOAD);
1083     if (libc) {
1084         /* Android API 26+ has new API to get system property and old API
1085          * (__system_property_get) is deprecated */
1086         system_property_read_callback* property_read_callback =
1087             (system_property_read_callback*)dlsym(
1088                 libc, "__system_property_read_callback");
1089         if (property_read_callback) {
1090             const prop_info* info =
1091                 __system_property_find("persist.sys.timezone");
1092             if (info) {
1093                 property_read_callback(info, &u_property_read, gAndroidTimeZone);
1094             }
1095         } else {
1096             system_property_get* property_get =
1097                 (system_property_get*)dlsym(libc, "__system_property_get");
1098             if (property_get) {
1099                 property_get("persist.sys.timezone", gAndroidTimeZone);
1100             }
1101         }
1102         dlclose(libc);
1103     }
1104 #endif
1105 
1106 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1107     gTimeZoneBufferPtr = nullptr;
1108 #endif
1109 }
1110 
1111 U_CAPI const char* U_EXPORT2
uprv_tzname(int n)1112 uprv_tzname(int n)
1113 {
1114     (void)n; // Avoid unreferenced parameter warning.
1115     const char *tzid = nullptr;
1116 #if U_PLATFORM_USES_ONLY_WIN32_API
1117     tzid = uprv_detectWindowsTimeZone();
1118 
1119     if (tzid != nullptr) {
1120         return tzid;
1121     }
1122 
1123 #ifndef U_TZNAME
1124     // The return value is free'd in timezone.cpp on Windows because
1125     // the other code path returns a pointer to a heap location.
1126     // If we don't have a name already, then tzname wouldn't be any
1127     // better, so just fall back.
1128     return uprv_strdup("");
1129 #endif // !U_TZNAME
1130 
1131 #else
1132 
1133 /*#if U_PLATFORM_IS_DARWIN_BASED
1134     int ret;
1135 
1136     tzid = getenv("TZFILE");
1137     if (tzid != nullptr) {
1138         return tzid;
1139     }
1140 #endif*/
1141 
1142 /* This code can be temporarily disabled to test tzname resolution later on. */
1143 #ifndef DEBUG_TZNAME
1144 #if U_PLATFORM == U_PF_ANDROID
1145     tzid = gAndroidTimeZone;
1146 #else
1147     tzid = getenv("TZ");
1148 #endif
1149     if (tzid != nullptr && isValidOlsonID(tzid)
1150 #if U_PLATFORM == U_PF_SOLARIS
1151     /* Don't misinterpret TZ "localtime" on Solaris as a time zone name. */
1152         && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1153 #endif
1154     ) {
1155         /* The colon forces tzset() to treat the remainder as zoneinfo path */
1156         if (tzid[0] == ':') {
1157             tzid++;
1158         }
1159         /* This might be a good Olson ID. */
1160         skipZoneIDPrefix(&tzid);
1161         return tzid;
1162     }
1163     /* else U_TZNAME will give a better result. */
1164 #endif
1165 
1166 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1167     /* Caller must handle threading issues */
1168     if (gTimeZoneBufferPtr == nullptr) {
1169         /*
1170         This is a trick to look at the name of the link to get the Olson ID
1171         because the tzfile contents is underspecified.
1172         This isn't guaranteed to work because it may not be a symlink.
1173         */
1174         char *ret = realpath(TZDEFAULT, gTimeZoneBuffer);
1175         if (ret != nullptr && uprv_strcmp(TZDEFAULT, gTimeZoneBuffer) != 0) {
1176             int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
1177             const char *tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
1178             // MacOS14 has the realpath as something like
1179             // /usr/share/zoneinfo.default/Australia/Melbourne
1180             // which will not have "/zoneinfo/" in the path.
1181             // Therefore if we fail, we fall back to read the link which is
1182             // /var/db/timezone/zoneinfo/Australia/Melbourne
1183             // We also fall back to reading the link if the realpath leads to something like
1184             // /usr/share/zoneinfo/posixrules
1185             if (tzZoneInfoTailPtr == nullptr ||
1186                     uprv_strcmp(tzZoneInfoTailPtr + tzZoneInfoTailLen, "posixrules") == 0) {
1187                 ssize_t size = readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
1188                 if (size > 0) {
1189                     gTimeZoneBuffer[size] = 0;
1190                     tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
1191                 }
1192             }
1193             if (tzZoneInfoTailPtr != nullptr) {
1194                 tzZoneInfoTailPtr += tzZoneInfoTailLen;
1195                 skipZoneIDPrefix(&tzZoneInfoTailPtr);
1196                 if (isValidOlsonID(tzZoneInfoTailPtr)) {
1197                     return (gTimeZoneBufferPtr = tzZoneInfoTailPtr);
1198                 }
1199             }
1200         } else {
1201 #if defined(SEARCH_TZFILE)
1202             DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1203             if (tzInfo != nullptr) {
1204                 tzInfo->defaultTZBuffer = nullptr;
1205                 tzInfo->defaultTZFileSize = 0;
1206                 tzInfo->defaultTZFilePtr = nullptr;
1207                 tzInfo->defaultTZstatus = false;
1208                 tzInfo->defaultTZPosition = 0;
1209 
1210                 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1211 
1212                 /* Free previously allocated memory */
1213                 if (tzInfo->defaultTZBuffer != nullptr) {
1214                     uprv_free(tzInfo->defaultTZBuffer);
1215                 }
1216                 if (tzInfo->defaultTZFilePtr != nullptr) {
1217                     fclose(tzInfo->defaultTZFilePtr);
1218                 }
1219                 uprv_free(tzInfo);
1220             }
1221 
1222             if (gTimeZoneBufferPtr != nullptr && isValidOlsonID(gTimeZoneBufferPtr)) {
1223                 return gTimeZoneBufferPtr;
1224             }
1225 #endif
1226         }
1227     }
1228     else {
1229         return gTimeZoneBufferPtr;
1230     }
1231 #endif
1232 #endif
1233 
1234 #ifdef U_TZNAME
1235 #if U_PLATFORM_USES_ONLY_WIN32_API
1236     /* The return value is free'd in timezone.cpp on Windows because
1237      * the other code path returns a pointer to a heap location. */
1238     return uprv_strdup(U_TZNAME[n]);
1239 #else
1240     /*
1241     U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1242     So we remap the abbreviation to an olson ID.
1243 
1244     Since Windows exposes a little more timezone information,
1245     we normally don't use this code on Windows because
1246     uprv_detectWindowsTimeZone should have already given the correct answer.
1247     */
1248     {
1249         struct tm juneSol, decemberSol;
1250         int daylightType;
1251         static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1252         static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1253 
1254         /* This probing will tell us when daylight savings occurs.  */
1255         localtime_r(&juneSolstice, &juneSol);
1256         localtime_r(&decemberSolstice, &decemberSol);
1257         if(decemberSol.tm_isdst > 0) {
1258           daylightType = U_DAYLIGHT_DECEMBER;
1259         } else if(juneSol.tm_isdst > 0) {
1260           daylightType = U_DAYLIGHT_JUNE;
1261         } else {
1262           daylightType = U_DAYLIGHT_NONE;
1263         }
1264         tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1265         if (tzid != nullptr) {
1266             return tzid;
1267         }
1268     }
1269     return U_TZNAME[n];
1270 #endif
1271 #else
1272     return "";
1273 #endif
1274 }
1275 
1276 /* Get and set the ICU data directory --------------------------------------- */
1277 
1278 static icu::UInitOnce gDataDirInitOnce {};
1279 static char *gDataDirectory = nullptr;
1280 
1281 UInitOnce gTimeZoneFilesInitOnce {};
1282 static CharString *gTimeZoneFilesDirectory = nullptr;
1283 
1284 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1285  static const char *gCorrectedPOSIXLocale = nullptr; /* Sometimes heap allocated */
1286  static bool gCorrectedPOSIXLocaleHeapAllocated = false;
1287 #endif
1288 
putil_cleanup()1289 static UBool U_CALLCONV putil_cleanup()
1290 {
1291     if (gDataDirectory && *gDataDirectory) {
1292         uprv_free(gDataDirectory);
1293     }
1294     gDataDirectory = nullptr;
1295     gDataDirInitOnce.reset();
1296 
1297     delete gTimeZoneFilesDirectory;
1298     gTimeZoneFilesDirectory = nullptr;
1299     gTimeZoneFilesInitOnce.reset();
1300 
1301 #ifdef SEARCH_TZFILE
1302     delete gSearchTZFileResult;
1303     gSearchTZFileResult = nullptr;
1304 #endif
1305 
1306 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1307     if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
1308         uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
1309         gCorrectedPOSIXLocale = nullptr;
1310         gCorrectedPOSIXLocaleHeapAllocated = false;
1311     }
1312 #endif
1313     return true;
1314 }
1315 
1316 /*
1317  * Set the data directory.
1318  *    Make a copy of the passed string, and set the global data dir to point to it.
1319  */
1320 U_CAPI void U_EXPORT2
u_setDataDirectory(const char * directory)1321 u_setDataDirectory(const char *directory) {
1322     char *newDataDir;
1323     int32_t length;
1324 
1325     if(directory==nullptr || *directory==0) {
1326         /* A small optimization to prevent the malloc and copy when the
1327         shared library is used, and this is a way to make sure that nullptr
1328         is never returned.
1329         */
1330         newDataDir = (char *)"";
1331     }
1332     else {
1333         length=(int32_t)uprv_strlen(directory);
1334         newDataDir = (char *)uprv_malloc(length + 2);
1335         /* Exit out if newDataDir could not be created. */
1336         if (newDataDir == nullptr) {
1337             return;
1338         }
1339         uprv_strcpy(newDataDir, directory);
1340 
1341 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1342         {
1343             char *p;
1344             while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != nullptr) {
1345                 *p = U_FILE_SEP_CHAR;
1346             }
1347         }
1348 #endif
1349     }
1350 
1351     if (gDataDirectory && *gDataDirectory) {
1352         uprv_free(gDataDirectory);
1353     }
1354     gDataDirectory = newDataDir;
1355     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1356 }
1357 
1358 U_CAPI UBool U_EXPORT2
uprv_pathIsAbsolute(const char * path)1359 uprv_pathIsAbsolute(const char *path)
1360 {
1361   if(!path || !*path) {
1362     return false;
1363   }
1364 
1365   if(*path == U_FILE_SEP_CHAR) {
1366     return true;
1367   }
1368 
1369 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1370   if(*path == U_FILE_ALT_SEP_CHAR) {
1371     return true;
1372   }
1373 #endif
1374 
1375 #if U_PLATFORM_USES_ONLY_WIN32_API
1376   if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1377        ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1378       path[1] == ':' ) {
1379     return true;
1380   }
1381 #endif
1382 
1383   return false;
1384 }
1385 
1386 /* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1387    (needed for some Darwin ICU build environments) */
1388 #if U_PLATFORM_IS_DARWIN_BASED && defined(TARGET_OS_SIMULATOR) && TARGET_OS_SIMULATOR
1389 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1390 #  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1391 # endif
1392 #endif
1393 
1394 #if defined(ICU_DATA_DIR_WINDOWS)
1395 // Helper function to get the ICU Data Directory under the Windows directory location.
getIcuDataDirectoryUnderWindowsDirectory(char * directoryBuffer,UINT bufferLength)1396 static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
1397 {
1398     wchar_t windowsPath[MAX_PATH];
1399     char windowsPathUtf8[MAX_PATH];
1400 
1401     UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
1402     if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
1403         // Convert UTF-16 to a UTF-8 string.
1404         UErrorCode status = U_ZERO_ERROR;
1405         int32_t windowsPathUtf8Len = 0;
1406         u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
1407             &windowsPathUtf8Len, reinterpret_cast<const char16_t*>(windowsPath), -1, &status);
1408 
1409         if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
1410             (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
1411             // Ensure it always has a separator, so we can append the ICU data path.
1412             if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
1413                 windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
1414                 windowsPathUtf8[windowsPathUtf8Len] = '\0';
1415             }
1416             // Check if the concatenated string will fit.
1417             if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
1418                 uprv_strcpy(directoryBuffer, windowsPathUtf8);
1419                 uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
1420                 return true;
1421             }
1422         }
1423     }
1424 
1425     return false;
1426 }
1427 #endif
1428 
dataDirectoryInitFn()1429 static void U_CALLCONV dataDirectoryInitFn() {
1430     /* If we already have the directory, then return immediately. Will happen if user called
1431      * u_setDataDirectory().
1432      */
1433     if (gDataDirectory) {
1434         return;
1435     }
1436 
1437     const char *path = nullptr;
1438 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1439     char datadir_path_buffer[PATH_MAX];
1440 #endif
1441 
1442     /*
1443     When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1444     override ICU's data with the ICU_DATA environment variable. This prevents
1445     problems where multiple custom copies of ICU's specific version of data
1446     are installed on a system. Either the application must define the data
1447     directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1448     ICU, set the data with udata_setCommonData or trust that all of the
1449     required data is contained in ICU's data library that contains
1450     the entry point defined by U_ICUDATA_ENTRY_POINT.
1451 
1452     There may also be some platforms where environment variables
1453     are not allowed.
1454     */
1455 #   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1456     /* First try to get the environment variable */
1457 #     if U_PLATFORM_HAS_WINUWP_API == 0  // Windows UWP does not support getenv
1458         path=getenv("ICU_DATA");
1459 #     endif
1460 #   endif
1461 
1462     /* ICU_DATA_DIR may be set as a compile option.
1463      * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1464      * and is used only when data is built in archive mode eliminating the need
1465      * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1466      * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1467      * set their own path.
1468      */
1469 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1470     if(path==nullptr || *path==0) {
1471 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1472         const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1473 # endif
1474 # ifdef ICU_DATA_DIR
1475         path=ICU_DATA_DIR;
1476 # else
1477         path=U_ICU_DATA_DEFAULT_DIR;
1478 # endif
1479 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1480         if (prefix != nullptr) {
1481             snprintf(datadir_path_buffer, sizeof(datadir_path_buffer), "%s%s", prefix, path);
1482             path=datadir_path_buffer;
1483         }
1484 # endif
1485     }
1486 #endif
1487 
1488 #if defined(ICU_DATA_DIR_WINDOWS)
1489     char datadir_path_buffer[MAX_PATH];
1490     if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1491         path = datadir_path_buffer;
1492     }
1493 #endif
1494 
1495     if(path==nullptr) {
1496         /* It looks really bad, set it to something. */
1497         path = "";
1498     }
1499 
1500     u_setDataDirectory(path);
1501     return;
1502 }
1503 
1504 U_CAPI const char * U_EXPORT2
u_getDataDirectory()1505 u_getDataDirectory() {
1506     umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
1507     return gDataDirectory;
1508 }
1509 
setTimeZoneFilesDir(const char * path,UErrorCode & status)1510 static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1511     if (U_FAILURE(status)) {
1512         return;
1513     }
1514     gTimeZoneFilesDirectory->clear();
1515     gTimeZoneFilesDirectory->append(path, status);
1516 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1517     char *p = gTimeZoneFilesDirectory->data();
1518     while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != nullptr) {
1519         *p = U_FILE_SEP_CHAR;
1520     }
1521 #endif
1522 }
1523 
1524 #define TO_STRING(x) TO_STRING_2(x)
1525 #define TO_STRING_2(x) #x
1526 
TimeZoneDataDirInitFn(UErrorCode & status)1527 static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1528     U_ASSERT(gTimeZoneFilesDirectory == nullptr);
1529     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1530     gTimeZoneFilesDirectory = new CharString();
1531     if (gTimeZoneFilesDirectory == nullptr) {
1532         status = U_MEMORY_ALLOCATION_ERROR;
1533         return;
1534     }
1535 
1536     const char *dir = "";
1537 
1538 #if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
1539     char timezonefilesdir_path_buffer[PATH_MAX];
1540     const char *prefix = getenv(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR);
1541 #endif
1542 
1543 #if U_PLATFORM_HAS_WINUWP_API == 1
1544 // The UWP version does not support the environment variable setting.
1545 
1546 # if defined(ICU_DATA_DIR_WINDOWS)
1547     // When using the Windows system data, we can possibly pick up time zone data from the Windows directory.
1548     char datadir_path_buffer[MAX_PATH];
1549     if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1550         dir = datadir_path_buffer;
1551     }
1552 # endif
1553 
1554 #else
1555     dir = getenv("ICU_TIMEZONE_FILES_DIR");
1556 #endif // U_PLATFORM_HAS_WINUWP_API
1557 
1558 #if defined(U_TIMEZONE_FILES_DIR)
1559     if (dir == nullptr) {
1560         // Build time configuration setting.
1561         dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1562     }
1563 #endif
1564 
1565     if (dir == nullptr) {
1566         dir = "";
1567     }
1568 
1569 #if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
1570     if (prefix != nullptr) {
1571         snprintf(timezonefilesdir_path_buffer, sizeof(timezonefilesdir_path_buffer), "%s%s", prefix, dir);
1572         dir = timezonefilesdir_path_buffer;
1573     }
1574 #endif
1575 
1576     setTimeZoneFilesDir(dir, status);
1577 }
1578 
1579 
1580 U_CAPI const char * U_EXPORT2
u_getTimeZoneFilesDirectory(UErrorCode * status)1581 u_getTimeZoneFilesDirectory(UErrorCode *status) {
1582     umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1583     return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1584 }
1585 
1586 U_CAPI void U_EXPORT2
u_setTimeZoneFilesDirectory(const char * path,UErrorCode * status)1587 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1588     umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1589     setTimeZoneFilesDir(path, *status);
1590 
1591     // Note: this function does some extra churn, first setting based on the
1592     //       environment, then immediately replacing with the value passed in.
1593     //       The logic is simpler that way, and performance shouldn't be an issue.
1594 }
1595 
1596 
1597 #if U_POSIX_LOCALE
1598 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1599  * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1600  * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1601  */
uprv_getPOSIXIDForCategory(int category)1602 static const char *uprv_getPOSIXIDForCategory(int category)
1603 {
1604     const char* posixID = nullptr;
1605     if (category == LC_MESSAGES || category == LC_CTYPE) {
1606         /*
1607         * On Solaris two different calls to setlocale can result in
1608         * different values. Only get this value once.
1609         *
1610         * We must check this first because an application can set this.
1611         *
1612         * LC_ALL can't be used because it's platform dependent. The LANG
1613         * environment variable seems to affect LC_CTYPE variable by default.
1614         * Here is what setlocale(LC_ALL, nullptr) can return.
1615         * HPUX can return 'C C C C C C C'
1616         * Solaris can return /en_US/C/C/C/C/C on the second try.
1617         * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1618         *
1619         * The default codepage detection also needs to use LC_CTYPE.
1620         *
1621         * Do not call setlocale(LC_*, "")! Using an empty string instead
1622         * of nullptr, will modify the libc behavior.
1623         */
1624         posixID = setlocale(category, nullptr);
1625         if ((posixID == 0)
1626             || (uprv_strcmp("C", posixID) == 0)
1627             || (uprv_strcmp("POSIX", posixID) == 0))
1628         {
1629             /* Maybe we got some garbage.  Try something more reasonable */
1630             posixID = getenv("LC_ALL");
1631             /* Solaris speaks POSIX -  See IEEE Std 1003.1-2008
1632              * This is needed to properly handle empty env. variables
1633              */
1634 #if U_PLATFORM == U_PF_SOLARIS
1635             if ((posixID == 0) || (posixID[0] == '\0')) {
1636                 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1637                 if ((posixID == 0) || (posixID[0] == '\0')) {
1638 #else
1639             if (posixID == 0) {
1640                 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1641                 if (posixID == 0) {
1642 #endif
1643                     posixID = getenv("LANG");
1644                 }
1645             }
1646         }
1647     }
1648     if ((posixID==0)
1649         || (uprv_strcmp("C", posixID) == 0)
1650         || (uprv_strcmp("POSIX", posixID) == 0))
1651     {
1652         /* Nothing worked.  Give it a nice POSIX default value. */
1653         posixID = "en_US_POSIX";
1654         // Note: this test will not catch 'C.UTF-8',
1655         // that will be handled in uprv_getDefaultLocaleID().
1656         // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
1657         // caller which expects to see "en_US_POSIX" in many branches.
1658     }
1659     return posixID;
1660 }
1661 
1662 /* Return just the POSIX id for the default locale, whatever happens to be in
1663  * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1664  */
1665 static const char *uprv_getPOSIXIDForDefaultLocale()
1666 {
1667     static const char* posixID = nullptr;
1668     if (posixID == 0) {
1669         posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1670     }
1671     return posixID;
1672 }
1673 
1674 #if !U_CHARSET_IS_UTF8
1675 /* Return just the POSIX id for the default codepage, whatever happens to be in
1676  * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1677  */
1678 static const char *uprv_getPOSIXIDForDefaultCodepage()
1679 {
1680     static const char* posixID = nullptr;
1681     if (posixID == 0) {
1682         posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1683     }
1684     return posixID;
1685 }
1686 #endif
1687 #endif
1688 
1689 /* NOTE: The caller should handle thread safety */
1690 U_CAPI const char* U_EXPORT2
1691 uprv_getDefaultLocaleID()
1692 {
1693 #if U_POSIX_LOCALE
1694 /*
1695   Note that:  (a '!' means the ID is improper somehow)
1696      LC_ALL  ---->     default_loc          codepage
1697 --------------------------------------------------------
1698      ab.CD             ab                   CD
1699      ab@CD             ab__CD               -
1700      ab@CD.EF          ab__CD               EF
1701 
1702      ab_CD.EF@GH       ab_CD_GH             EF
1703 
1704 Some 'improper' ways to do the same as above:
1705   !  ab_CD@GH.EF       ab_CD_GH             EF
1706   !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1707   !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1708 
1709      _CD@GH            _CD_GH               -
1710      _CD.EF@GH         _CD_GH               EF
1711 
1712 The variant cannot have dots in it.
1713 The 'rightmost' variant (@xxx) wins.
1714 The leftmost codepage (.xxx) wins.
1715 */
1716     const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1717 
1718     /* Format: (no spaces)
1719     ll [ _CC ] [ . MM ] [ @ VV]
1720 
1721       l = lang, C = ctry, M = charmap, V = variant
1722     */
1723 
1724     if (gCorrectedPOSIXLocale != nullptr) {
1725         return gCorrectedPOSIXLocale;
1726     }
1727 
1728     // Copy the ID into owned memory.
1729     // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
1730     char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
1731     if (correctedPOSIXLocale == nullptr) {
1732         return nullptr;
1733     }
1734     uprv_strcpy(correctedPOSIXLocale, posixID);
1735 
1736     char *limit;
1737     if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
1738         *limit = 0;
1739     }
1740     if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
1741         *limit = 0;
1742     }
1743 
1744     if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
1745         || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
1746       // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
1747       // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
1748       uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
1749     }
1750 
1751     /* Note that we scan the *uncorrected* ID. */
1752     const char *p;
1753     if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
1754         p++;
1755 
1756         /* Take care of any special cases here.. */
1757         if (!uprv_strcmp(p, "nynorsk")) {
1758             p = "NY";
1759             /* Don't worry about no__NY. In practice, it won't appear. */
1760         }
1761 
1762         if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) {
1763             uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
1764         }
1765         else {
1766             uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1767         }
1768 
1769         const char *q;
1770         if ((q = uprv_strchr(p, '.')) != nullptr) {
1771             /* How big will the resulting string be? */
1772             int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1773             uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
1774             correctedPOSIXLocale[len] = 0;
1775         }
1776         else {
1777             /* Anything following the @ sign */
1778             uprv_strcat(correctedPOSIXLocale, p);
1779         }
1780 
1781         /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1782          * How about 'russian' -> 'ru'?
1783          * Many of the other locales using ISO codes will be handled by the
1784          * canonicalization functions in uloc_getDefault.
1785          */
1786     }
1787 
1788     if (gCorrectedPOSIXLocale == nullptr) {
1789         gCorrectedPOSIXLocale = correctedPOSIXLocale;
1790         gCorrectedPOSIXLocaleHeapAllocated = true;
1791         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1792         correctedPOSIXLocale = nullptr;
1793     }
1794     posixID = gCorrectedPOSIXLocale;
1795 
1796     if (correctedPOSIXLocale != nullptr) {  /* Was already set - clean up. */
1797         uprv_free(correctedPOSIXLocale);
1798     }
1799 
1800     return posixID;
1801 
1802 #elif U_PLATFORM_USES_ONLY_WIN32_API
1803 #define POSIX_LOCALE_CAPACITY 64
1804     UErrorCode status = U_ZERO_ERROR;
1805     char *correctedPOSIXLocale = nullptr;
1806 
1807     // If we have already figured this out just use the cached value
1808     if (gCorrectedPOSIXLocale != nullptr) {
1809         return gCorrectedPOSIXLocale;
1810     }
1811 
1812     // No cached value, need to determine the current value
1813     static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1814     int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
1815 
1816     // Now we should have a Windows locale name that needs converted to the POSIX style.
1817     if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
1818     {
1819         // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
1820         char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1821 
1822         int32_t i;
1823         for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
1824         {
1825             if (windowsLocale[i] == '_')
1826             {
1827                 modifiedWindowsLocale[i] = '-';
1828             }
1829             else
1830             {
1831                 modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
1832             }
1833 
1834             if (modifiedWindowsLocale[i] == '\0')
1835             {
1836                 break;
1837             }
1838         }
1839 
1840         if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
1841         {
1842             // Ran out of room, can't really happen, maybe we'll be lucky about a matching
1843             // locale when tags are dropped
1844             modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
1845         }
1846 
1847         // Now normalize the resulting name
1848         correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1849         /* TODO: Should we just exit on memory allocation failure? */
1850         if (correctedPOSIXLocale)
1851         {
1852             int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1853             if (U_SUCCESS(status))
1854             {
1855                 *(correctedPOSIXLocale + posixLen) = 0;
1856                 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1857                 gCorrectedPOSIXLocaleHeapAllocated = true;
1858                 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1859             }
1860             else
1861             {
1862                 uprv_free(correctedPOSIXLocale);
1863             }
1864         }
1865     }
1866 
1867     // If unable to find a locale we can agree upon, use en-US by default
1868     if (gCorrectedPOSIXLocale == nullptr) {
1869         gCorrectedPOSIXLocale = "en_US";
1870     }
1871     return gCorrectedPOSIXLocale;
1872 
1873 #elif U_PLATFORM == U_PF_OS400
1874     /* locales are process scoped and are by definition thread safe */
1875     static char correctedLocale[64];
1876     const  char *localeID = getenv("LC_ALL");
1877            char *p;
1878 
1879     if (localeID == nullptr)
1880         localeID = getenv("LANG");
1881     if (localeID == nullptr)
1882         localeID = setlocale(LC_ALL, nullptr);
1883     /* Make sure we have something... */
1884     if (localeID == nullptr)
1885         return "en_US_POSIX";
1886 
1887     /* Extract the locale name from the path. */
1888     if((p = uprv_strrchr(localeID, '/')) != nullptr)
1889     {
1890         /* Increment p to start of locale name. */
1891         p++;
1892         localeID = p;
1893     }
1894 
1895     /* Copy to work location. */
1896     uprv_strcpy(correctedLocale, localeID);
1897 
1898     /* Strip off the '.locale' extension. */
1899     if((p = uprv_strchr(correctedLocale, '.')) != nullptr) {
1900         *p = 0;
1901     }
1902 
1903     /* Upper case the locale name. */
1904     T_CString_toUpperCase(correctedLocale);
1905 
1906     /* See if we are using the POSIX locale.  Any of the
1907     * following are equivalent and use the same QLGPGCMA
1908     * (POSIX) locale.
1909     * QLGPGCMA2 means UCS2
1910     * QLGPGCMA_4 means UTF-32
1911     * QLGPGCMA_8 means UTF-8
1912     */
1913     if ((uprv_strcmp("C", correctedLocale) == 0) ||
1914         (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1915         (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1916     {
1917         uprv_strcpy(correctedLocale, "en_US_POSIX");
1918     }
1919     else
1920     {
1921         int16_t LocaleLen;
1922 
1923         /* Lower case the lang portion. */
1924         for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1925         {
1926             *p = uprv_tolower(*p);
1927         }
1928 
1929         /* Adjust for Euro.  After '_E' add 'URO'. */
1930         LocaleLen = uprv_strlen(correctedLocale);
1931         if (correctedLocale[LocaleLen - 2] == '_' &&
1932             correctedLocale[LocaleLen - 1] == 'E')
1933         {
1934             uprv_strcat(correctedLocale, "URO");
1935         }
1936 
1937         /* If using Lotus-based locale then convert to
1938          * equivalent non Lotus.
1939          */
1940         else if (correctedLocale[LocaleLen - 2] == '_' &&
1941             correctedLocale[LocaleLen - 1] == 'L')
1942         {
1943             correctedLocale[LocaleLen - 2] = 0;
1944         }
1945 
1946         /* There are separate simplified and traditional
1947          * locales called zh_HK_S and zh_HK_T.
1948          */
1949         else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1950         {
1951             uprv_strcpy(correctedLocale, "zh_HK");
1952         }
1953 
1954         /* A special zh_CN_GBK locale...
1955         */
1956         else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1957         {
1958             uprv_strcpy(correctedLocale, "zh_CN");
1959         }
1960 
1961     }
1962 
1963     return correctedLocale;
1964 #endif
1965 
1966 }
1967 
1968 #if !U_CHARSET_IS_UTF8
1969 #if U_POSIX_LOCALE
1970 /*
1971 Due to various platform differences, one platform may specify a charset,
1972 when they really mean a different charset. Remap the names so that they are
1973 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1974 here. Before adding anything to this function, please consider adding unique
1975 names to the ICU alias table in the data directory.
1976 */
1977 static const char*
1978 remapPlatformDependentCodepage(const char *locale, const char *name) {
1979     if (locale != nullptr && *locale == 0) {
1980         /* Make sure that an empty locale is handled the same way. */
1981         locale = nullptr;
1982     }
1983     if (name == nullptr) {
1984         return nullptr;
1985     }
1986 #if U_PLATFORM == U_PF_AIX
1987     if (uprv_strcmp(name, "IBM-943") == 0) {
1988         /* Use the ASCII compatible ibm-943 */
1989         name = "Shift-JIS";
1990     }
1991     else if (uprv_strcmp(name, "IBM-1252") == 0) {
1992         /* Use the windows-1252 that contains the Euro */
1993         name = "IBM-5348";
1994     }
1995 #elif U_PLATFORM == U_PF_SOLARIS
1996     if (locale != nullptr && uprv_strcmp(name, "EUC") == 0) {
1997         /* Solaris underspecifies the "EUC" name. */
1998         if (uprv_strcmp(locale, "zh_CN") == 0) {
1999             name = "EUC-CN";
2000         }
2001         else if (uprv_strcmp(locale, "zh_TW") == 0) {
2002             name = "EUC-TW";
2003         }
2004         else if (uprv_strcmp(locale, "ko_KR") == 0) {
2005             name = "EUC-KR";
2006         }
2007     }
2008     else if (uprv_strcmp(name, "eucJP") == 0) {
2009         /*
2010         ibm-954 is the best match.
2011         ibm-33722 is the default for eucJP (similar to Windows).
2012         */
2013         name = "eucjis";
2014     }
2015     else if (uprv_strcmp(name, "646") == 0) {
2016         /*
2017          * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
2018          * ISO-8859-1 instead of US-ASCII(646).
2019          */
2020         name = "ISO-8859-1";
2021     }
2022 #elif U_PLATFORM_IS_DARWIN_BASED
2023     if (locale == nullptr && *name == 0) {
2024         /*
2025         No locale was specified, and an empty name was passed in.
2026         This usually indicates that nl_langinfo didn't return valid information.
2027         Mac OS X uses UTF-8 by default (especially the locale data and console).
2028         */
2029         name = "UTF-8";
2030     }
2031     else if (uprv_strcmp(name, "CP949") == 0) {
2032         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2033         name = "EUC-KR";
2034     }
2035     else if (locale != nullptr && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
2036         /*
2037          * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2038          */
2039         name = "UTF-8";
2040     }
2041 #elif U_PLATFORM == U_PF_BSD
2042     if (uprv_strcmp(name, "CP949") == 0) {
2043         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2044         name = "EUC-KR";
2045     }
2046 #elif U_PLATFORM == U_PF_HPUX
2047     if (locale != nullptr && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
2048         /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
2049         /* zh_TW.big5 is not the same charset as zh_HK.big5! */
2050         name = "hkbig5";
2051     }
2052     else if (uprv_strcmp(name, "eucJP") == 0) {
2053         /*
2054         ibm-1350 is the best match, but unavailable.
2055         ibm-954 is mostly a superset of ibm-1350.
2056         ibm-33722 is the default for eucJP (similar to Windows).
2057         */
2058         name = "eucjis";
2059     }
2060 #elif U_PLATFORM == U_PF_LINUX
2061     if (locale != nullptr && uprv_strcmp(name, "euc") == 0) {
2062         /* Linux underspecifies the "EUC" name. */
2063         if (uprv_strcmp(locale, "korean") == 0) {
2064             name = "EUC-KR";
2065         }
2066         else if (uprv_strcmp(locale, "japanese") == 0) {
2067             /* See comment below about eucJP */
2068             name = "eucjis";
2069         }
2070     }
2071     else if (uprv_strcmp(name, "eucjp") == 0) {
2072         /*
2073         ibm-1350 is the best match, but unavailable.
2074         ibm-954 is mostly a superset of ibm-1350.
2075         ibm-33722 is the default for eucJP (similar to Windows).
2076         */
2077         name = "eucjis";
2078     }
2079     else if (locale != nullptr && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
2080             (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
2081         /*
2082          * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2083          */
2084         name = "UTF-8";
2085     }
2086     /*
2087      * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
2088      * it by falling back to 'US-ASCII' when nullptr is returned from this
2089      * function. So, we don't have to worry about it here.
2090      */
2091 #endif
2092     /* return nullptr when "" is passed in */
2093     if (*name == 0) {
2094         name = nullptr;
2095     }
2096     return name;
2097 }
2098 
2099 static const char*
2100 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
2101 {
2102     char localeBuf[100];
2103     const char *name = nullptr;
2104     char *variant = nullptr;
2105 
2106     if (localeName != nullptr && (name = (uprv_strchr(localeName, '.'))) != nullptr) {
2107         size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
2108         uprv_strncpy(localeBuf, localeName, localeCapacity);
2109         localeBuf[localeCapacity-1] = 0; /* ensure NUL termination */
2110         name = uprv_strncpy(buffer, name+1, buffCapacity);
2111         buffer[buffCapacity-1] = 0; /* ensure NUL termination */
2112         if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != nullptr) {
2113             *variant = 0;
2114         }
2115         name = remapPlatformDependentCodepage(localeBuf, name);
2116     }
2117     return name;
2118 }
2119 #endif
2120 
2121 static const char*
2122 int_getDefaultCodepage()
2123 {
2124 #if U_PLATFORM == U_PF_OS400
2125     uint32_t ccsid = 37; /* Default to ibm-37 */
2126     static char codepage[64];
2127     Qwc_JOBI0400_t jobinfo;
2128     Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
2129 
2130     EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
2131         "*                         ", "                ", &error);
2132 
2133     if (error.Bytes_Available == 0) {
2134         if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
2135             ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
2136         }
2137         else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
2138             ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
2139         }
2140         /* else use the default */
2141     }
2142     snprintf(codepage, sizeof(codepage), "ibm-%d", ccsid);
2143     return codepage;
2144 
2145 #elif U_PLATFORM == U_PF_OS390
2146     static char codepage[64];
2147 
2148     strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
2149     strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
2150     codepage[63] = 0; /* NUL terminate */
2151 
2152     return codepage;
2153 
2154 #elif U_PLATFORM_USES_ONLY_WIN32_API
2155     static char codepage[64];
2156     DWORD codepageNumber = 0;
2157 
2158 #if U_PLATFORM_HAS_WINUWP_API == 1
2159     // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
2160     // have folks use Unicode than a "system" code page, however this is the same
2161     // codepage as the system default locale codepage.  (FWIW, the system locale is
2162     // ONLY used for codepage, it should never be used for anything else)
2163     GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
2164         (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
2165 #else
2166     // Win32 apps can call GetACP
2167     codepageNumber = GetACP();
2168 #endif
2169     // Special case for UTF-8
2170     if (codepageNumber == 65001)
2171     {
2172         return "UTF-8";
2173     }
2174     // Windows codepages can look like windows-1252, so format the found number
2175     // the numbers are eclectic, however all valid system code pages, besides UTF-8
2176     // are between 3 and 19999
2177     if (codepageNumber > 0 && codepageNumber < 20000)
2178     {
2179         snprintf(codepage, sizeof(codepage), "windows-%ld", codepageNumber);
2180         return codepage;
2181     }
2182     // If the codepage number call failed then return UTF-8
2183     return "UTF-8";
2184 
2185 #elif U_POSIX_LOCALE
2186     static char codesetName[100];
2187     const char *localeName = nullptr;
2188     const char *name = nullptr;
2189 
2190     localeName = uprv_getPOSIXIDForDefaultCodepage();
2191     uprv_memset(codesetName, 0, sizeof(codesetName));
2192     /* On Solaris nl_langinfo returns C locale values unless setlocale
2193      * was called earlier.
2194      */
2195 #if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
2196     /* When available, check nl_langinfo first because it usually gives more
2197        useful names. It depends on LC_CTYPE.
2198        nl_langinfo may use the same buffer as setlocale. */
2199     {
2200         const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
2201 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
2202         /*
2203          * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
2204          * instead of ASCII.
2205          */
2206         if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
2207             codeset = remapPlatformDependentCodepage(localeName, codeset);
2208         } else
2209 #endif
2210         {
2211             codeset = remapPlatformDependentCodepage(nullptr, codeset);
2212         }
2213 
2214         if (codeset != nullptr) {
2215             uprv_strncpy(codesetName, codeset, sizeof(codesetName));
2216             codesetName[sizeof(codesetName)-1] = 0;
2217             return codesetName;
2218         }
2219     }
2220 #endif
2221 
2222     /* Use setlocale in a nice way, and then check some environment variables.
2223        Maybe the application used setlocale already.
2224     */
2225     uprv_memset(codesetName, 0, sizeof(codesetName));
2226     name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
2227     if (name) {
2228         /* if we can find the codeset name from setlocale, return that. */
2229         return name;
2230     }
2231 
2232     if (*codesetName == 0)
2233     {
2234         /* Everything failed. Return US ASCII (ISO 646). */
2235         (void)uprv_strcpy(codesetName, "US-ASCII");
2236     }
2237     return codesetName;
2238 #else
2239     return "US-ASCII";
2240 #endif
2241 }
2242 
2243 
2244 U_CAPI const char*  U_EXPORT2
2245 uprv_getDefaultCodepage()
2246 {
2247     static char const  *name = nullptr;
2248     umtx_lock(nullptr);
2249     if (name == nullptr) {
2250         name = int_getDefaultCodepage();
2251     }
2252     umtx_unlock(nullptr);
2253     return name;
2254 }
2255 #endif  /* !U_CHARSET_IS_UTF8 */
2256 
2257 
2258 /* end of platform-specific implementation -------------- */
2259 
2260 /* version handling --------------------------------------------------------- */
2261 
2262 U_CAPI void U_EXPORT2
2263 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2264     char *end;
2265     uint16_t part=0;
2266 
2267     if(versionArray==nullptr) {
2268         return;
2269     }
2270 
2271     if(versionString!=nullptr) {
2272         for(;;) {
2273             versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2274             if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2275                 break;
2276             }
2277             versionString=end+1;
2278         }
2279     }
2280 
2281     while(part<U_MAX_VERSION_LENGTH) {
2282         versionArray[part++]=0;
2283     }
2284 }
2285 
2286 U_CAPI void U_EXPORT2
2287 u_versionFromUString(UVersionInfo versionArray, const char16_t *versionString) {
2288     if(versionArray!=nullptr && versionString!=nullptr) {
2289         char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2290         int32_t len = u_strlen(versionString);
2291         if(len>U_MAX_VERSION_STRING_LENGTH) {
2292             len = U_MAX_VERSION_STRING_LENGTH;
2293         }
2294         u_UCharsToChars(versionString, versionChars, len);
2295         versionChars[len]=0;
2296         u_versionFromString(versionArray, versionChars);
2297     }
2298 }
2299 
2300 U_CAPI void U_EXPORT2
2301 u_versionToString(const UVersionInfo versionArray, char *versionString) {
2302     uint16_t count, part;
2303     uint8_t field;
2304 
2305     if(versionString==nullptr) {
2306         return;
2307     }
2308 
2309     if(versionArray==nullptr) {
2310         versionString[0]=0;
2311         return;
2312     }
2313 
2314     /* count how many fields need to be written */
2315     for(count=4; count>0 && versionArray[count-1]==0; --count) {
2316     }
2317 
2318     if(count <= 1) {
2319         count = 2;
2320     }
2321 
2322     /* write the first part */
2323     /* write the decimal field value */
2324     field=versionArray[0];
2325     if(field>=100) {
2326         *versionString++=(char)('0'+field/100);
2327         field%=100;
2328     }
2329     if(field>=10) {
2330         *versionString++=(char)('0'+field/10);
2331         field%=10;
2332     }
2333     *versionString++=(char)('0'+field);
2334 
2335     /* write the following parts */
2336     for(part=1; part<count; ++part) {
2337         /* write a dot first */
2338         *versionString++=U_VERSION_DELIMITER;
2339 
2340         /* write the decimal field value */
2341         field=versionArray[part];
2342         if(field>=100) {
2343             *versionString++=(char)('0'+field/100);
2344             field%=100;
2345         }
2346         if(field>=10) {
2347             *versionString++=(char)('0'+field/10);
2348             field%=10;
2349         }
2350         *versionString++=(char)('0'+field);
2351     }
2352 
2353     /* NUL-terminate */
2354     *versionString=0;
2355 }
2356 
2357 U_CAPI void U_EXPORT2
2358 u_getVersion(UVersionInfo versionArray) {
2359     (void)copyright;   // Suppress unused variable warning from clang.
2360     u_versionFromString(versionArray, U_ICU_VERSION);
2361 }
2362 
2363 /**
2364  * icucfg.h dependent code
2365  */
2366 
2367 #if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2368 
2369 #if HAVE_DLFCN_H
2370 #ifdef __MVS__
2371 #ifndef __SUSV3
2372 #define __SUSV3 1
2373 #endif
2374 #endif
2375 #include <dlfcn.h>
2376 #endif /* HAVE_DLFCN_H */
2377 
2378 U_CAPI void * U_EXPORT2
2379 uprv_dl_open(const char *libName, UErrorCode *status) {
2380   void *ret = nullptr;
2381   if(U_FAILURE(*status)) return ret;
2382   ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2383   if(ret==nullptr) {
2384 #ifdef U_TRACE_DYLOAD
2385     printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2386 #endif
2387     *status = U_MISSING_RESOURCE_ERROR;
2388   }
2389   return ret;
2390 }
2391 
2392 U_CAPI void U_EXPORT2
2393 uprv_dl_close(void *lib, UErrorCode *status) {
2394   if(U_FAILURE(*status)) return;
2395   dlclose(lib);
2396 }
2397 
2398 U_CAPI UVoidFunction* U_EXPORT2
2399 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2400   union {
2401       UVoidFunction *fp;
2402       void *vp;
2403   } uret;
2404   uret.fp = nullptr;
2405   if(U_FAILURE(*status)) return uret.fp;
2406   uret.vp = dlsym(lib, sym);
2407   if(uret.vp == nullptr) {
2408 #ifdef U_TRACE_DYLOAD
2409     printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2410 #endif
2411     *status = U_MISSING_RESOURCE_ERROR;
2412   }
2413   return uret.fp;
2414 }
2415 
2416 #elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
2417 
2418 /* Windows API implementation. */
2419 // Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
2420 
2421 U_CAPI void * U_EXPORT2
2422 uprv_dl_open(const char *libName, UErrorCode *status) {
2423   HMODULE lib = nullptr;
2424 
2425   if(U_FAILURE(*status)) return nullptr;
2426 
2427   lib = LoadLibraryA(libName);
2428 
2429   if(lib==nullptr) {
2430     *status = U_MISSING_RESOURCE_ERROR;
2431   }
2432 
2433   return (void*)lib;
2434 }
2435 
2436 U_CAPI void U_EXPORT2
2437 uprv_dl_close(void *lib, UErrorCode *status) {
2438   HMODULE handle = (HMODULE)lib;
2439   if(U_FAILURE(*status)) return;
2440 
2441   FreeLibrary(handle);
2442 
2443   return;
2444 }
2445 
2446 U_CAPI UVoidFunction* U_EXPORT2
2447 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2448   HMODULE handle = (HMODULE)lib;
2449   UVoidFunction* addr = nullptr;
2450 
2451   if(U_FAILURE(*status) || lib==nullptr) return nullptr;
2452 
2453   addr = (UVoidFunction*)GetProcAddress(handle, sym);
2454 
2455   if(addr==nullptr) {
2456     DWORD lastError = GetLastError();
2457     if(lastError == ERROR_PROC_NOT_FOUND) {
2458       *status = U_MISSING_RESOURCE_ERROR;
2459     } else {
2460       *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2461     }
2462   }
2463 
2464   return addr;
2465 }
2466 
2467 #else
2468 
2469 /* No dynamic loading, null (nonexistent) implementation. */
2470 
2471 U_CAPI void * U_EXPORT2
2472 uprv_dl_open(const char *libName, UErrorCode *status) {
2473     (void)libName;
2474     if(U_FAILURE(*status)) return nullptr;
2475     *status = U_UNSUPPORTED_ERROR;
2476     return nullptr;
2477 }
2478 
2479 U_CAPI void U_EXPORT2
2480 uprv_dl_close(void *lib, UErrorCode *status) {
2481     (void)lib;
2482     if(U_FAILURE(*status)) return;
2483     *status = U_UNSUPPORTED_ERROR;
2484     return;
2485 }
2486 
2487 U_CAPI UVoidFunction* U_EXPORT2
2488 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2489   (void)lib;
2490   (void)sym;
2491   if(U_SUCCESS(*status)) {
2492     *status = U_UNSUPPORTED_ERROR;
2493   }
2494   return (UVoidFunction*)nullptr;
2495 }
2496 
2497 #endif
2498 
2499 /*
2500  * Hey, Emacs, please set the following:
2501  *
2502  * Local Variables:
2503  * indent-tabs-mode: nil
2504  * End:
2505  *
2506  */
2507