• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 1997-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *
11 *  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
12 *
13 *   Date        Name        Description
14 *   04/14/97    aliu        Creation.
15 *   04/24/97    aliu        Added getDefaultDataDirectory() and
16 *                            getDefaultLocaleID().
17 *   04/28/97    aliu        Rewritten to assume Unix and apply general methods
18 *                            for assumed case.  Non-UNIX platforms must be
19 *                            special-cased.  Rewrote numeric methods dealing
20 *                            with NaN and Infinity to be platform independent
21 *                             over all IEEE 754 platforms.
22 *   05/13/97    aliu        Restored sign of timezone
23 *                            (semantics are hours West of GMT)
24 *   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
25 *                             nextDouble..
26 *   07/22/98    stephen     Added remainder, max, min, trunc
27 *   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
28 *   08/24/98    stephen     Added longBitsFromDouble
29 *   09/08/98    stephen     Minor changes for Mac Port
30 *   03/02/99    stephen     Removed openFile().  Added AS400 support.
31 *                            Fixed EBCDIC tables
32 *   04/15/99    stephen     Converted to C.
33 *   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
34 *   08/04/99    jeffrey R.  Added OS/2 changes
35 *   11/15/99    helena      Integrated S/390 IEEE support.
36 *   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
37 *   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
38 *   01/03/08    Steven L.   Fake Time Support
39 ******************************************************************************
40 */
41 
42 // Defines _XOPEN_SOURCE for access to POSIX functions.
43 // Must be before any other #includes.
44 #include "uposixdefs.h"
45 
46 // First, the platform type. Need this for U_PLATFORM.
47 #include "unicode/platform.h"
48 
49 #if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
50 /* tzset isn't defined in strict ANSI on MinGW. */
51 #undef __STRICT_ANSI__
52 #endif
53 
54 /*
55  * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
56  */
57 #include <time.h>
58 
59 #if !U_PLATFORM_USES_ONLY_WIN32_API
60 #include <sys/time.h>
61 #endif
62 
63 /* include the rest of the ICU headers */
64 #include "unicode/putil.h"
65 #include "unicode/ustring.h"
66 #include "putilimp.h"
67 #include "uassert.h"
68 #include "umutex.h"
69 #include "cmemory.h"
70 #include "cstring.h"
71 #include "locmap.h"
72 #include "ucln_cmn.h"
73 #include "charstr.h"
74 
75 /* Include standard headers. */
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <math.h>
80 #include <locale.h>
81 #include <float.h>
82 
83 #ifndef U_COMMON_IMPLEMENTATION
84 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu
85 #endif
86 
87 
88 /* include system headers */
89 #if U_PLATFORM_USES_ONLY_WIN32_API
90     /*
91      * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
92      * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
93      * to use native APIs as much as possible?
94      */
95 #ifndef WIN32_LEAN_AND_MEAN
96 #   define WIN32_LEAN_AND_MEAN
97 #endif
98 #   define VC_EXTRALEAN
99 #   define NOUSER
100 #   define NOSERVICE
101 #   define NOIME
102 #   define NOMCX
103 #   include <windows.h>
104 #   include "unicode/uloc.h"
105 #   include "wintz.h"
106 #elif U_PLATFORM == U_PF_OS400
107 #   include <float.h>
108 #   include <qusec.h>       /* error code structure */
109 #   include <qusrjobi.h>
110 #   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
111 #   include <mih/testptr.h> /* For uprv_maximumPtr */
112 #elif U_PLATFORM == U_PF_OS390
113 #   include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
114 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
115 #   include <limits.h>
116 #   include <unistd.h>
117 #   if U_PLATFORM == U_PF_SOLARIS
118 #       ifndef _XPG4_2
119 #           define _XPG4_2
120 #       endif
121 #   elif U_PLATFORM == U_PF_ANDROID
122 #       include <sys/system_properties.h>
123 #       include <dlfcn.h>
124 #   endif
125 #elif U_PLATFORM == U_PF_QNX
126 #   include <sys/neutrino.h>
127 #endif
128 
129 
130 /*
131  * Only include langinfo.h if we have a way to get the codeset. If we later
132  * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
133  *
134  */
135 
136 #if U_HAVE_NL_LANGINFO_CODESET
137 #include <langinfo.h>
138 #endif
139 
140 /**
141  * Simple things (presence of functions, etc) should just go in configure.in and be added to
142  * icucfg.h via autoheader.
143  */
144 #if U_PLATFORM_IMPLEMENTS_POSIX
145 #   if U_PLATFORM == U_PF_OS400
146 #    define HAVE_DLFCN_H 0
147 #    define HAVE_DLOPEN 0
148 #   else
149 #   ifndef HAVE_DLFCN_H
150 #    define HAVE_DLFCN_H 1
151 #   endif
152 #   ifndef HAVE_DLOPEN
153 #    define HAVE_DLOPEN 1
154 #   endif
155 #   endif
156 #   ifndef HAVE_GETTIMEOFDAY
157 #    define HAVE_GETTIMEOFDAY 1
158 #   endif
159 #else
160 #   define HAVE_DLFCN_H 0
161 #   define HAVE_DLOPEN 0
162 #   define HAVE_GETTIMEOFDAY 0
163 #endif
164 
165 U_NAMESPACE_USE
166 
167 /* Define the extension for data files, again... */
168 #define DATA_TYPE "dat"
169 
170 /* Leave this copyright notice here! */
171 static const char copyright[] = U_COPYRIGHT_STRING;
172 
173 /* floating point implementations ------------------------------------------- */
174 
175 /* We return QNAN rather than SNAN*/
176 #define SIGN 0x80000000U
177 
178 /* Make it easy to define certain types of constants */
179 typedef union {
180     int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
181     double d64;
182 } BitPatternConversion;
183 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
184 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
185 
186 /*---------------------------------------------------------------------------
187   Platform utilities
188   Our general strategy is to assume we're on a POSIX platform.  Platforms which
189   are non-POSIX must declare themselves so.  The default POSIX implementation
190   will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
191   functions).
192   ---------------------------------------------------------------------------*/
193 
194 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
195 #   undef U_POSIX_LOCALE
196 #else
197 #   define U_POSIX_LOCALE    1
198 #endif
199 
200 /*
201     WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
202     can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
203 */
204 #if !IEEE_754
205 static char*
u_topNBytesOfDouble(double * d,int n)206 u_topNBytesOfDouble(double* d, int n)
207 {
208 #if U_IS_BIG_ENDIAN
209     return (char*)d;
210 #else
211     return (char*)(d + 1) - n;
212 #endif
213 }
214 
215 static char*
u_bottomNBytesOfDouble(double * d,int n)216 u_bottomNBytesOfDouble(double* d, int n)
217 {
218 #if U_IS_BIG_ENDIAN
219     return (char*)(d + 1) - n;
220 #else
221     return (char*)d;
222 #endif
223 }
224 #endif   /* !IEEE_754 */
225 
226 #if IEEE_754
227 static UBool
u_signBit(double d)228 u_signBit(double d) {
229     uint8_t hiByte;
230 #if U_IS_BIG_ENDIAN
231     hiByte = *(uint8_t *)&d;
232 #else
233     hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
234 #endif
235     return (hiByte & 0x80) != 0;
236 }
237 #endif
238 
239 
240 
241 #if defined (U_DEBUG_FAKETIME)
242 /* Override the clock to test things without having to move the system clock.
243  * Assumes POSIX gettimeofday() will function
244  */
245 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
246 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
247 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
248 
getUTCtime_real()249 static UDate getUTCtime_real() {
250     struct timeval posixTime;
251     gettimeofday(&posixTime, NULL);
252     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
253 }
254 
getUTCtime_fake()255 static UDate getUTCtime_fake() {
256     static UMutex fakeClockMutex;
257     umtx_lock(&fakeClockMutex);
258     if(!fakeClock_set) {
259         UDate real = getUTCtime_real();
260         const char *fake_start = getenv("U_FAKETIME_START");
261         if((fake_start!=NULL) && (fake_start[0]!=0)) {
262             sscanf(fake_start,"%lf",&fakeClock_t0);
263             fakeClock_dt = fakeClock_t0 - real;
264             fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
265                     "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
266                     fakeClock_t0, fake_start, fakeClock_dt, real);
267         } else {
268           fakeClock_dt = 0;
269             fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
270                     "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
271         }
272         fakeClock_set = TRUE;
273     }
274     umtx_unlock(&fakeClockMutex);
275 
276     return getUTCtime_real() + fakeClock_dt;
277 }
278 #endif
279 
280 #if U_PLATFORM_USES_ONLY_WIN32_API
281 typedef union {
282     int64_t int64;
283     FILETIME fileTime;
284 } FileTimeConversion;   /* This is like a ULARGE_INTEGER */
285 
286 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
287 #define EPOCH_BIAS  INT64_C(116444736000000000)
288 #define HECTONANOSECOND_PER_MILLISECOND   10000
289 
290 #endif
291 
292 /*---------------------------------------------------------------------------
293   Universal Implementations
294   These are designed to work on all platforms.  Try these, and if they
295   don't work on your platform, then special case your platform with new
296   implementations.
297 ---------------------------------------------------------------------------*/
298 
299 U_CAPI UDate U_EXPORT2
uprv_getUTCtime()300 uprv_getUTCtime()
301 {
302 #if defined(U_DEBUG_FAKETIME)
303     return getUTCtime_fake(); /* Hook for overriding the clock */
304 #else
305     return uprv_getRawUTCtime();
306 #endif
307 }
308 
309 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
310 U_CAPI UDate U_EXPORT2
uprv_getRawUTCtime()311 uprv_getRawUTCtime()
312 {
313 #if U_PLATFORM_USES_ONLY_WIN32_API
314 
315     FileTimeConversion winTime;
316     GetSystemTimeAsFileTime(&winTime.fileTime);
317     return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
318 #else
319 
320 #if HAVE_GETTIMEOFDAY
321     struct timeval posixTime;
322     gettimeofday(&posixTime, NULL);
323     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
324 #else
325     time_t epochtime;
326     time(&epochtime);
327     return (UDate)epochtime * U_MILLIS_PER_SECOND;
328 #endif
329 
330 #endif
331 }
332 
333 /*-----------------------------------------------------------------------------
334   IEEE 754
335   These methods detect and return NaN and infinity values for doubles
336   conforming to IEEE 754.  Platforms which support this standard include X86,
337   Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
338   If this doesn't work on your platform, you have non-IEEE floating-point, and
339   will need to code your own versions.  A naive implementation is to return 0.0
340   for getNaN and getInfinity, and false for isNaN and isInfinite.
341   ---------------------------------------------------------------------------*/
342 
343 U_CAPI UBool U_EXPORT2
uprv_isNaN(double number)344 uprv_isNaN(double number)
345 {
346 #if IEEE_754
347     BitPatternConversion convertedNumber;
348     convertedNumber.d64 = number;
349     /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
350     return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
351 
352 #elif U_PLATFORM == U_PF_OS390
353     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
354                         sizeof(uint32_t));
355     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
356                         sizeof(uint32_t));
357 
358     return ((highBits & 0x7F080000L) == 0x7F080000L) &&
359       (lowBits == 0x00000000L);
360 
361 #else
362     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
363     /* you'll need to replace this default implementation with what's correct*/
364     /* for your platform.*/
365     return number != number;
366 #endif
367 }
368 
369 U_CAPI UBool U_EXPORT2
uprv_isInfinite(double number)370 uprv_isInfinite(double number)
371 {
372 #if IEEE_754
373     BitPatternConversion convertedNumber;
374     convertedNumber.d64 = number;
375     /* Infinity is exactly 0x7FF0000000000000U. */
376     return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
377 #elif U_PLATFORM == U_PF_OS390
378     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
379                         sizeof(uint32_t));
380     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
381                         sizeof(uint32_t));
382 
383     return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
384 
385 #else
386     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
387     /* value, you'll need to replace this default implementation with what's*/
388     /* correct for your platform.*/
389     return number == (2.0 * number);
390 #endif
391 }
392 
393 U_CAPI UBool U_EXPORT2
uprv_isPositiveInfinity(double number)394 uprv_isPositiveInfinity(double number)
395 {
396 #if IEEE_754 || U_PLATFORM == U_PF_OS390
397     return (UBool)(number > 0 && uprv_isInfinite(number));
398 #else
399     return uprv_isInfinite(number);
400 #endif
401 }
402 
403 U_CAPI UBool U_EXPORT2
uprv_isNegativeInfinity(double number)404 uprv_isNegativeInfinity(double number)
405 {
406 #if IEEE_754 || U_PLATFORM == U_PF_OS390
407     return (UBool)(number < 0 && uprv_isInfinite(number));
408 
409 #else
410     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
411                         sizeof(uint32_t));
412     return((highBits & SIGN) && uprv_isInfinite(number));
413 
414 #endif
415 }
416 
417 U_CAPI double U_EXPORT2
uprv_getNaN()418 uprv_getNaN()
419 {
420 #if IEEE_754 || U_PLATFORM == U_PF_OS390
421     return gNan.d64;
422 #else
423     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
424     /* you'll need to replace this default implementation with what's correct*/
425     /* for your platform.*/
426     return 0.0;
427 #endif
428 }
429 
430 U_CAPI double U_EXPORT2
uprv_getInfinity()431 uprv_getInfinity()
432 {
433 #if IEEE_754 || U_PLATFORM == U_PF_OS390
434     return gInf.d64;
435 #else
436     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
437     /* value, you'll need to replace this default implementation with what's*/
438     /* correct for your platform.*/
439     return 0.0;
440 #endif
441 }
442 
443 U_CAPI double U_EXPORT2
uprv_floor(double x)444 uprv_floor(double x)
445 {
446     return floor(x);
447 }
448 
449 U_CAPI double U_EXPORT2
uprv_ceil(double x)450 uprv_ceil(double x)
451 {
452     return ceil(x);
453 }
454 
455 U_CAPI double U_EXPORT2
uprv_round(double x)456 uprv_round(double x)
457 {
458     return uprv_floor(x + 0.5);
459 }
460 
461 U_CAPI double U_EXPORT2
uprv_fabs(double x)462 uprv_fabs(double x)
463 {
464     return fabs(x);
465 }
466 
467 U_CAPI double U_EXPORT2
uprv_modf(double x,double * y)468 uprv_modf(double x, double* y)
469 {
470     return modf(x, y);
471 }
472 
473 U_CAPI double U_EXPORT2
uprv_fmod(double x,double y)474 uprv_fmod(double x, double y)
475 {
476     return fmod(x, y);
477 }
478 
479 U_CAPI double U_EXPORT2
uprv_pow(double x,double y)480 uprv_pow(double x, double y)
481 {
482     /* This is declared as "double pow(double x, double y)" */
483     return pow(x, y);
484 }
485 
486 U_CAPI double U_EXPORT2
uprv_pow10(int32_t x)487 uprv_pow10(int32_t x)
488 {
489     return pow(10.0, (double)x);
490 }
491 
492 U_CAPI double U_EXPORT2
uprv_fmax(double x,double y)493 uprv_fmax(double x, double y)
494 {
495 #if IEEE_754
496     /* first handle NaN*/
497     if(uprv_isNaN(x) || uprv_isNaN(y))
498         return uprv_getNaN();
499 
500     /* check for -0 and 0*/
501     if(x == 0.0 && y == 0.0 && u_signBit(x))
502         return y;
503 
504 #endif
505 
506     /* this should work for all flt point w/o NaN and Inf special cases */
507     return (x > y ? x : y);
508 }
509 
510 U_CAPI double U_EXPORT2
uprv_fmin(double x,double y)511 uprv_fmin(double x, double y)
512 {
513 #if IEEE_754
514     /* first handle NaN*/
515     if(uprv_isNaN(x) || uprv_isNaN(y))
516         return uprv_getNaN();
517 
518     /* check for -0 and 0*/
519     if(x == 0.0 && y == 0.0 && u_signBit(y))
520         return y;
521 
522 #endif
523 
524     /* this should work for all flt point w/o NaN and Inf special cases */
525     return (x > y ? y : x);
526 }
527 
528 U_CAPI UBool U_EXPORT2
uprv_add32_overflow(int32_t a,int32_t b,int32_t * res)529 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
530     // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
531     // This function could be optimized by calling one of those primitives.
532     auto a64 = static_cast<int64_t>(a);
533     auto b64 = static_cast<int64_t>(b);
534     int64_t res64 = a64 + b64;
535     *res = static_cast<int32_t>(res64);
536     return res64 != *res;
537 }
538 
539 U_CAPI UBool U_EXPORT2
uprv_mul32_overflow(int32_t a,int32_t b,int32_t * res)540 uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
541     // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
542     // This function could be optimized by calling one of those primitives.
543     auto a64 = static_cast<int64_t>(a);
544     auto b64 = static_cast<int64_t>(b);
545     int64_t res64 = a64 * b64;
546     *res = static_cast<int32_t>(res64);
547     return res64 != *res;
548 }
549 
550 /**
551  * Truncates the given double.
552  * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
553  * This is different than calling floor() or ceil():
554  * floor(3.3) = 3, floor(-3.3) = -4
555  * ceil(3.3) = 4, ceil(-3.3) = -3
556  */
557 U_CAPI double U_EXPORT2
uprv_trunc(double d)558 uprv_trunc(double d)
559 {
560 #if IEEE_754
561     /* handle error cases*/
562     if(uprv_isNaN(d))
563         return uprv_getNaN();
564     if(uprv_isInfinite(d))
565         return uprv_getInfinity();
566 
567     if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
568         return ceil(d);
569     else
570         return floor(d);
571 
572 #else
573     return d >= 0 ? floor(d) : ceil(d);
574 
575 #endif
576 }
577 
578 /**
579  * Return the largest positive number that can be represented by an integer
580  * type of arbitrary bit length.
581  */
582 U_CAPI double U_EXPORT2
uprv_maxMantissa(void)583 uprv_maxMantissa(void)
584 {
585     return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
586 }
587 
588 U_CAPI double U_EXPORT2
uprv_log(double d)589 uprv_log(double d)
590 {
591     return log(d);
592 }
593 
594 U_CAPI void * U_EXPORT2
uprv_maximumPtr(void * base)595 uprv_maximumPtr(void * base)
596 {
597 #if U_PLATFORM == U_PF_OS400
598     /*
599      * With the provided function we should never be out of range of a given segment
600      * (a traditional/typical segment that is).  Our segments have 5 bytes for the
601      * id and 3 bytes for the offset.  The key is that the casting takes care of
602      * only retrieving the offset portion minus x1000.  Hence, the smallest offset
603      * seen in a program is x001000 and when casted to an int would be 0.
604      * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
605      *
606      * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
607      * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
608      * This function determines the activation based on the pointer that is passed in and
609      * calculates the appropriate maximum available size for
610      * each pointer type (TERASPACE and non-TERASPACE)
611      *
612      * Unlike other operating systems, the pointer model isn't determined at
613      * compile time on i5/OS.
614      */
615     if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
616         /* if it is a TERASPACE pointer the max is 2GB - 4k */
617         return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
618     }
619     /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
620     return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
621 
622 #else
623     return U_MAX_PTR(base);
624 #endif
625 }
626 
627 /*---------------------------------------------------------------------------
628   Platform-specific Implementations
629   Try these, and if they don't work on your platform, then special case your
630   platform with new implementations.
631   ---------------------------------------------------------------------------*/
632 
633 /* Generic time zone layer -------------------------------------------------- */
634 
635 /* Time zone utilities */
636 U_CAPI void U_EXPORT2
uprv_tzset()637 uprv_tzset()
638 {
639 #if defined(U_TZSET)
640     U_TZSET();
641 #else
642     /* no initialization*/
643 #endif
644 }
645 
646 U_CAPI int32_t U_EXPORT2
uprv_timezone()647 uprv_timezone()
648 {
649 #ifdef U_TIMEZONE
650     return U_TIMEZONE;
651 #else
652     time_t t, t1, t2;
653     struct tm tmrec;
654     int32_t tdiff = 0;
655 
656     time(&t);
657     uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
658 #if U_PLATFORM != U_PF_IPHONE
659     UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
660 #endif
661     t1 = mktime(&tmrec);                 /* local time in seconds*/
662     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
663     t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
664     tdiff = t2 - t1;
665 
666 #if U_PLATFORM != U_PF_IPHONE
667     /* imitate NT behaviour, which returns same timezone offset to GMT for
668        winter and summer.
669        This does not work on all platforms. For instance, on glibc on Linux
670        and on Mac OS 10.5, tdiff calculated above remains the same
671        regardless of whether DST is in effect or not. iOS is another
672        platform where this does not work. Linux + glibc and Mac OS 10.5
673        have U_TIMEZONE defined so that this code is not reached.
674     */
675     if (dst_checked)
676         tdiff += 3600;
677 #endif
678     return tdiff;
679 #endif
680 }
681 
682 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
683    some platforms need to have it declared here. */
684 
685 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
686 /* RS6000 and others reject char **tzname.  */
687 extern U_IMPORT char *U_TZNAME[];
688 #endif
689 
690 #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
691 /* These platforms are likely to use Olson timezone IDs. */
692 /* common targets of the symbolic link at TZDEFAULT are:
693  * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
694  * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
695  * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
696  * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
697  * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
698  * To avoid checking lots of paths, just check that the target path
699  * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
700  */
701 
702 #define CHECK_LOCALTIME_LINK 1
703 #if U_PLATFORM_IS_DARWIN_BASED
704 #include <tzfile.h>
705 #define TZZONEINFO      (TZDIR "/")
706 #elif U_PLATFORM == U_PF_SOLARIS
707 #define TZDEFAULT       "/etc/localtime"
708 #define TZZONEINFO      "/usr/share/lib/zoneinfo/"
709 #define TZ_ENV_CHECK    "localtime"
710 #else
711 #define TZDEFAULT       "/etc/localtime"
712 #define TZZONEINFO      "/usr/share/zoneinfo/"
713 #endif
714 #define TZZONEINFOTAIL  "/zoneinfo/"
715 #if U_HAVE_DIRENT_H
716 #define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
717 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
718    symlinked to /etc/localtime, which makes searchForTZFile return
719    'localtime' when it's the first match. */
720 #define TZFILE_SKIP2    "localtime"
721 #define SEARCH_TZFILE
722 #include <dirent.h>  /* Needed to search through system timezone files */
723 #endif
724 static char gTimeZoneBuffer[PATH_MAX];
725 static char *gTimeZoneBufferPtr = NULL;
726 #endif
727 
728 #if !U_PLATFORM_USES_ONLY_WIN32_API
729 #define isNonDigit(ch) (ch < '0' || '9' < ch)
isValidOlsonID(const char * id)730 static UBool isValidOlsonID(const char *id) {
731     int32_t idx = 0;
732 
733     /* Determine if this is something like Iceland (Olson ID)
734     or AST4ADT (non-Olson ID) */
735     while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
736         idx++;
737     }
738 
739     /* If we went through the whole string, then it might be okay.
740     The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
741     "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
742     The rest of the time it could be an Olson ID. George */
743     return (UBool)(id[idx] == 0
744         || uprv_strcmp(id, "PST8PDT") == 0
745         || uprv_strcmp(id, "MST7MDT") == 0
746         || uprv_strcmp(id, "CST6CDT") == 0
747         || uprv_strcmp(id, "EST5EDT") == 0);
748 }
749 
750 /* On some Unix-like OS, 'posix' subdirectory in
751    /usr/share/zoneinfo replicates the top-level contents. 'right'
752    subdirectory has the same set of files, but individual files
753    are different from those in the top-level directory or 'posix'
754    because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
755    has files for UTC.
756    When the first match for /etc/localtime is in either of them
757    (usually in posix because 'right' has different file contents),
758    or TZ environment variable points to one of them, createTimeZone
759    fails because, say, 'posix/America/New_York' is not an Olson
760    timezone id ('America/New_York' is). So, we have to skip
761    'posix/' and 'right/' at the beginning. */
skipZoneIDPrefix(const char ** id)762 static void skipZoneIDPrefix(const char** id) {
763     if (uprv_strncmp(*id, "posix/", 6) == 0
764         || uprv_strncmp(*id, "right/", 6) == 0)
765     {
766         *id += 6;
767     }
768 }
769 #endif
770 
771 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
772 
773 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
774 typedef struct OffsetZoneMapping {
775     int32_t offsetSeconds;
776     int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
777     const char *stdID;
778     const char *dstID;
779     const char *olsonID;
780 } OffsetZoneMapping;
781 
782 enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
783 
784 /*
785 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
786 and maps it to an Olson ID.
787 Before adding anything to this list, take a look at
788 icu/source/tools/tzcode/tz.alias
789 Sometimes no daylight savings (0) is important to define due to aliases.
790 This list can be tested with icu/source/test/compat/tzone.pl
791 More values could be added to daylightType to increase precision.
792 */
793 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
794     {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
795     {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
796     {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
797     {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
798     {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
799     {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
800     {-36000, 2, "EST", "EST", "Australia/Sydney"},
801     {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
802     {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
803     {-34200, 2, "CST", "CST", "Australia/South"},
804     {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
805     {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
806     {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
807     {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
808     {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
809     {-28800, 2, "WST", "WST", "Australia/West"},
810     {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
811     {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
812     {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
813     {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
814     {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
815     {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
816     {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
817     {-14400, 1, "AZT", "AZST", "Asia/Baku"},
818     {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
819     {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
820     {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
821     {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
822     {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
823     {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
824     {-3600, 0, "CET", "WEST", "Africa/Algiers"},
825     {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
826     {0, 1, "GMT", "IST", "Europe/Dublin"},
827     {0, 1, "GMT", "BST", "Europe/London"},
828     {0, 0, "WET", "WEST", "Africa/Casablanca"},
829     {0, 0, "WET", "WET", "Africa/El_Aaiun"},
830     {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
831     {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
832     {10800, 1, "PMST", "PMDT", "America/Miquelon"},
833     {10800, 2, "UYT", "UYST", "America/Montevideo"},
834     {10800, 1, "WGT", "WGST", "America/Godthab"},
835     {10800, 2, "BRT", "BRST", "Brazil/East"},
836     {12600, 1, "NST", "NDT", "America/St_Johns"},
837     {14400, 1, "AST", "ADT", "Canada/Atlantic"},
838     {14400, 2, "AMT", "AMST", "America/Cuiaba"},
839     {14400, 2, "CLT", "CLST", "Chile/Continental"},
840     {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
841     {14400, 2, "PYT", "PYST", "America/Asuncion"},
842     {18000, 1, "CST", "CDT", "America/Havana"},
843     {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
844     {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
845     {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
846     {21600, 0, "CST", "CDT", "America/Guatemala"},
847     {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
848     {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
849     {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
850     {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
851     {32400, 1, "AKST", "AKDT", "US/Alaska"},
852     {36000, 1, "HAST", "HADT", "US/Aleutian"}
853 };
854 
855 /*#define DEBUG_TZNAME*/
856 
remapShortTimeZone(const char * stdID,const char * dstID,int32_t daylightType,int32_t offset)857 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
858 {
859     int32_t idx;
860 #ifdef DEBUG_TZNAME
861     fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
862 #endif
863     for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
864     {
865         if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
866             && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
867             && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
868             && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
869         {
870             return OFFSET_ZONE_MAPPINGS[idx].olsonID;
871         }
872     }
873     return NULL;
874 }
875 #endif
876 
877 #ifdef SEARCH_TZFILE
878 #define MAX_READ_SIZE 512
879 
880 typedef struct DefaultTZInfo {
881     char* defaultTZBuffer;
882     int64_t defaultTZFileSize;
883     FILE* defaultTZFilePtr;
884     UBool defaultTZstatus;
885     int32_t defaultTZPosition;
886 } DefaultTZInfo;
887 
888 /*
889  * This method compares the two files given to see if they are a match.
890  * It is currently use to compare two TZ files.
891  */
compareBinaryFiles(const char * defaultTZFileName,const char * TZFileName,DefaultTZInfo * tzInfo)892 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
893     FILE* file;
894     int64_t sizeFile;
895     int64_t sizeFileLeft;
896     int32_t sizeFileRead;
897     int32_t sizeFileToRead;
898     char bufferFile[MAX_READ_SIZE];
899     UBool result = TRUE;
900 
901     if (tzInfo->defaultTZFilePtr == NULL) {
902         tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
903     }
904     file = fopen(TZFileName, "r");
905 
906     tzInfo->defaultTZPosition = 0; /* reset position to begin search */
907 
908     if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
909         /* First check that the file size are equal. */
910         if (tzInfo->defaultTZFileSize == 0) {
911             fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
912             tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
913         }
914         fseek(file, 0, SEEK_END);
915         sizeFile = ftell(file);
916         sizeFileLeft = sizeFile;
917 
918         if (sizeFile != tzInfo->defaultTZFileSize) {
919             result = FALSE;
920         } else {
921             /* Store the data from the files in seperate buffers and
922              * compare each byte to determine equality.
923              */
924             if (tzInfo->defaultTZBuffer == NULL) {
925                 rewind(tzInfo->defaultTZFilePtr);
926                 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
927                 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
928             }
929             rewind(file);
930             while(sizeFileLeft > 0) {
931                 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
932                 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
933 
934                 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
935                 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
936                     result = FALSE;
937                     break;
938                 }
939                 sizeFileLeft -= sizeFileRead;
940                 tzInfo->defaultTZPosition += sizeFileRead;
941             }
942         }
943     } else {
944         result = FALSE;
945     }
946 
947     if (file != NULL) {
948         fclose(file);
949     }
950 
951     return result;
952 }
953 
954 
955 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
956 #define SKIP1 "."
957 #define SKIP2 ".."
958 static UBool U_CALLCONV putil_cleanup(void);
959 static CharString *gSearchTZFileResult = NULL;
960 
961 /*
962  * This method recursively traverses the directory given for a matching TZ file and returns the first match.
963  * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
964  */
searchForTZFile(const char * path,DefaultTZInfo * tzInfo)965 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
966     DIR* dirp = NULL;
967     struct dirent* dirEntry = NULL;
968     char* result = NULL;
969     UErrorCode status = U_ZERO_ERROR;
970 
971     /* Save the current path */
972     CharString curpath(path, -1, status);
973     if (U_FAILURE(status)) {
974         goto cleanupAndReturn;
975     }
976 
977     dirp = opendir(path);
978     if (dirp == NULL) {
979         goto cleanupAndReturn;
980     }
981 
982     if (gSearchTZFileResult == NULL) {
983         gSearchTZFileResult = new CharString;
984         if (gSearchTZFileResult == NULL) {
985             goto cleanupAndReturn;
986         }
987         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
988     }
989 
990     /* Check each entry in the directory. */
991     while((dirEntry = readdir(dirp)) != NULL) {
992         const char* dirName = dirEntry->d_name;
993         if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0
994             && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
995             /* Create a newpath with the new entry to test each entry in the directory. */
996             CharString newpath(curpath, status);
997             newpath.append(dirName, -1, status);
998             if (U_FAILURE(status)) {
999                 break;
1000             }
1001 
1002             DIR* subDirp = NULL;
1003             if ((subDirp = opendir(newpath.data())) != NULL) {
1004                 /* If this new path is a directory, make a recursive call with the newpath. */
1005                 closedir(subDirp);
1006                 newpath.append('/', status);
1007                 if (U_FAILURE(status)) {
1008                     break;
1009                 }
1010                 result = searchForTZFile(newpath.data(), tzInfo);
1011                 /*
1012                  Have to get out here. Otherwise, we'd keep looking
1013                  and return the first match in the top-level directory
1014                  if there's a match in the top-level. If not, this function
1015                  would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
1016                  It worked without this in most cases because we have a fallback of calling
1017                  localtime_r to figure out the default timezone.
1018                 */
1019                 if (result != NULL)
1020                     break;
1021             } else {
1022                 if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
1023                     int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
1024                     if (amountToSkip > newpath.length()) {
1025                         amountToSkip = newpath.length();
1026                     }
1027                     const char* zoneid = newpath.data() + amountToSkip;
1028                     skipZoneIDPrefix(&zoneid);
1029                     gSearchTZFileResult->clear();
1030                     gSearchTZFileResult->append(zoneid, -1, status);
1031                     if (U_FAILURE(status)) {
1032                         break;
1033                     }
1034                     result = gSearchTZFileResult->data();
1035                     /* Get out after the first one found. */
1036                     break;
1037                 }
1038             }
1039         }
1040     }
1041 
1042   cleanupAndReturn:
1043     if (dirp) {
1044         closedir(dirp);
1045     }
1046     return result;
1047 }
1048 #endif
1049 
1050 #if U_PLATFORM == U_PF_ANDROID
1051 typedef int(system_property_read_callback)(const prop_info* info,
1052                                            void (*callback)(void* cookie,
1053                                                             const char* name,
1054                                                             const char* value,
1055                                                             uint32_t serial),
1056                                            void* cookie);
1057 typedef int(system_property_get)(const char*, char*);
1058 
1059 static char gAndroidTimeZone[PROP_VALUE_MAX] = { '\0' };
1060 
u_property_read(void * cookie,const char * name,const char * value,uint32_t serial)1061 static void u_property_read(void* cookie, const char* name, const char* value,
1062                             uint32_t serial) {
1063     uprv_strcpy((char* )cookie, value);
1064 }
1065 #endif
1066 
1067 U_CAPI void U_EXPORT2
uprv_tzname_clear_cache(void)1068 uprv_tzname_clear_cache(void)
1069 {
1070 #if U_PLATFORM == U_PF_ANDROID
1071     /* Android's timezone is stored in system property. */
1072     gAndroidTimeZone[0] = '\0';
1073     void* libc = dlopen("libc.so", RTLD_NOLOAD);
1074     if (libc) {
1075         /* Android API 26+ has new API to get system property and old API
1076          * (__system_property_get) is deprecated */
1077         system_property_read_callback* property_read_callback =
1078             (system_property_read_callback*)dlsym(
1079                 libc, "__system_property_read_callback");
1080         if (property_read_callback) {
1081             const prop_info* info =
1082                 __system_property_find("persist.sys.timezone");
1083             if (info) {
1084                 property_read_callback(info, &u_property_read, gAndroidTimeZone);
1085             }
1086         } else {
1087             system_property_get* property_get =
1088                 (system_property_get*)dlsym(libc, "__system_property_get");
1089             if (property_get) {
1090                 property_get("persist.sys.timezone", gAndroidTimeZone);
1091             }
1092         }
1093         dlclose(libc);
1094     }
1095 #endif
1096 
1097 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1098     gTimeZoneBufferPtr = NULL;
1099 #endif
1100 }
1101 
1102 U_CAPI const char* U_EXPORT2
uprv_tzname(int n)1103 uprv_tzname(int n)
1104 {
1105     (void)n; // Avoid unreferenced parameter warning.
1106     const char *tzid = NULL;
1107 #if U_PLATFORM_USES_ONLY_WIN32_API
1108     tzid = uprv_detectWindowsTimeZone();
1109 
1110     if (tzid != NULL) {
1111         return tzid;
1112     }
1113 
1114 #ifndef U_TZNAME
1115     // The return value is free'd in timezone.cpp on Windows because
1116     // the other code path returns a pointer to a heap location.
1117     // If we don't have a name already, then tzname wouldn't be any
1118     // better, so just fall back.
1119     return uprv_strdup("");
1120 #endif // !U_TZNAME
1121 
1122 #else
1123 
1124 /*#if U_PLATFORM_IS_DARWIN_BASED
1125     int ret;
1126 
1127     tzid = getenv("TZFILE");
1128     if (tzid != NULL) {
1129         return tzid;
1130     }
1131 #endif*/
1132 
1133 /* This code can be temporarily disabled to test tzname resolution later on. */
1134 #ifndef DEBUG_TZNAME
1135 #if U_PLATFORM == U_PF_ANDROID
1136     tzid = gAndroidTimeZone;
1137 #else
1138     tzid = getenv("TZ");
1139 #endif
1140     if (tzid != NULL && isValidOlsonID(tzid)
1141 #if U_PLATFORM == U_PF_SOLARIS
1142     /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1143         && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1144 #endif
1145     ) {
1146         /* The colon forces tzset() to treat the remainder as zoneinfo path */
1147         if (tzid[0] == ':') {
1148             tzid++;
1149         }
1150         /* This might be a good Olson ID. */
1151         skipZoneIDPrefix(&tzid);
1152         return tzid;
1153     }
1154     /* else U_TZNAME will give a better result. */
1155 #endif
1156 
1157 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1158     /* Caller must handle threading issues */
1159     if (gTimeZoneBufferPtr == NULL) {
1160         /*
1161         This is a trick to look at the name of the link to get the Olson ID
1162         because the tzfile contents is underspecified.
1163         This isn't guaranteed to work because it may not be a symlink.
1164         */
1165         int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
1166         if (0 < ret) {
1167             int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
1168             gTimeZoneBuffer[ret] = 0;
1169             char *  tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
1170 
1171             if (tzZoneInfoTailPtr != NULL
1172                 && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen))
1173             {
1174                 return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen);
1175             }
1176         } else {
1177 #if defined(SEARCH_TZFILE)
1178             DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1179             if (tzInfo != NULL) {
1180                 tzInfo->defaultTZBuffer = NULL;
1181                 tzInfo->defaultTZFileSize = 0;
1182                 tzInfo->defaultTZFilePtr = NULL;
1183                 tzInfo->defaultTZstatus = FALSE;
1184                 tzInfo->defaultTZPosition = 0;
1185 
1186                 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1187 
1188                 /* Free previously allocated memory */
1189                 if (tzInfo->defaultTZBuffer != NULL) {
1190                     uprv_free(tzInfo->defaultTZBuffer);
1191                 }
1192                 if (tzInfo->defaultTZFilePtr != NULL) {
1193                     fclose(tzInfo->defaultTZFilePtr);
1194                 }
1195                 uprv_free(tzInfo);
1196             }
1197 
1198             if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1199                 return gTimeZoneBufferPtr;
1200             }
1201 #endif
1202         }
1203     }
1204     else {
1205         return gTimeZoneBufferPtr;
1206     }
1207 #endif
1208 #endif
1209 
1210 #ifdef U_TZNAME
1211 #if U_PLATFORM_USES_ONLY_WIN32_API
1212     /* The return value is free'd in timezone.cpp on Windows because
1213      * the other code path returns a pointer to a heap location. */
1214     return uprv_strdup(U_TZNAME[n]);
1215 #else
1216     /*
1217     U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1218     So we remap the abbreviation to an olson ID.
1219 
1220     Since Windows exposes a little more timezone information,
1221     we normally don't use this code on Windows because
1222     uprv_detectWindowsTimeZone should have already given the correct answer.
1223     */
1224     {
1225         struct tm juneSol, decemberSol;
1226         int daylightType;
1227         static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1228         static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1229 
1230         /* This probing will tell us when daylight savings occurs.  */
1231         localtime_r(&juneSolstice, &juneSol);
1232         localtime_r(&decemberSolstice, &decemberSol);
1233         if(decemberSol.tm_isdst > 0) {
1234           daylightType = U_DAYLIGHT_DECEMBER;
1235         } else if(juneSol.tm_isdst > 0) {
1236           daylightType = U_DAYLIGHT_JUNE;
1237         } else {
1238           daylightType = U_DAYLIGHT_NONE;
1239         }
1240         tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1241         if (tzid != NULL) {
1242             return tzid;
1243         }
1244     }
1245     return U_TZNAME[n];
1246 #endif
1247 #else
1248     return "";
1249 #endif
1250 }
1251 
1252 /* Get and set the ICU data directory --------------------------------------- */
1253 
1254 static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
1255 static char *gDataDirectory = NULL;
1256 
1257 UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
1258 static CharString *gTimeZoneFilesDirectory = NULL;
1259 
1260 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1261  static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
1262  static bool gCorrectedPOSIXLocaleHeapAllocated = false;
1263 #endif
1264 
putil_cleanup(void)1265 static UBool U_CALLCONV putil_cleanup(void)
1266 {
1267     if (gDataDirectory && *gDataDirectory) {
1268         uprv_free(gDataDirectory);
1269     }
1270     gDataDirectory = NULL;
1271     gDataDirInitOnce.reset();
1272 
1273     delete gTimeZoneFilesDirectory;
1274     gTimeZoneFilesDirectory = NULL;
1275     gTimeZoneFilesInitOnce.reset();
1276 
1277 #ifdef SEARCH_TZFILE
1278     delete gSearchTZFileResult;
1279     gSearchTZFileResult = NULL;
1280 #endif
1281 
1282 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1283     if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
1284         uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
1285         gCorrectedPOSIXLocale = NULL;
1286         gCorrectedPOSIXLocaleHeapAllocated = false;
1287     }
1288 #endif
1289     return TRUE;
1290 }
1291 
1292 /*
1293  * Set the data directory.
1294  *    Make a copy of the passed string, and set the global data dir to point to it.
1295  */
1296 U_CAPI void U_EXPORT2
u_setDataDirectory(const char * directory)1297 u_setDataDirectory(const char *directory) {
1298     char *newDataDir;
1299     int32_t length;
1300 
1301     if(directory==NULL || *directory==0) {
1302         /* A small optimization to prevent the malloc and copy when the
1303         shared library is used, and this is a way to make sure that NULL
1304         is never returned.
1305         */
1306         newDataDir = (char *)"";
1307     }
1308     else {
1309         length=(int32_t)uprv_strlen(directory);
1310         newDataDir = (char *)uprv_malloc(length + 2);
1311         /* Exit out if newDataDir could not be created. */
1312         if (newDataDir == NULL) {
1313             return;
1314         }
1315         uprv_strcpy(newDataDir, directory);
1316 
1317 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1318         {
1319             char *p;
1320             while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) {
1321                 *p = U_FILE_SEP_CHAR;
1322             }
1323         }
1324 #endif
1325     }
1326 
1327     if (gDataDirectory && *gDataDirectory) {
1328         uprv_free(gDataDirectory);
1329     }
1330     gDataDirectory = newDataDir;
1331     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1332 }
1333 
1334 U_CAPI UBool U_EXPORT2
uprv_pathIsAbsolute(const char * path)1335 uprv_pathIsAbsolute(const char *path)
1336 {
1337   if(!path || !*path) {
1338     return FALSE;
1339   }
1340 
1341   if(*path == U_FILE_SEP_CHAR) {
1342     return TRUE;
1343   }
1344 
1345 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1346   if(*path == U_FILE_ALT_SEP_CHAR) {
1347     return TRUE;
1348   }
1349 #endif
1350 
1351 #if U_PLATFORM_USES_ONLY_WIN32_API
1352   if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1353        ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1354       path[1] == ':' ) {
1355     return TRUE;
1356   }
1357 #endif
1358 
1359   return FALSE;
1360 }
1361 
1362 /* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1363    (needed for some Darwin ICU build environments) */
1364 #if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR
1365 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1366 #  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1367 # endif
1368 #endif
1369 
1370 #if defined(ICU_DATA_DIR_WINDOWS)
1371 // Helper function to get the ICU Data Directory under the Windows directory location.
getIcuDataDirectoryUnderWindowsDirectory(char * directoryBuffer,UINT bufferLength)1372 static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
1373 {
1374     wchar_t windowsPath[MAX_PATH];
1375     char windowsPathUtf8[MAX_PATH];
1376 
1377     UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
1378     if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
1379         // Convert UTF-16 to a UTF-8 string.
1380         UErrorCode status = U_ZERO_ERROR;
1381         int32_t windowsPathUtf8Len = 0;
1382         u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
1383             &windowsPathUtf8Len, reinterpret_cast<const UChar*>(windowsPath), -1, &status);
1384 
1385         if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
1386             (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
1387             // Ensure it always has a separator, so we can append the ICU data path.
1388             if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
1389                 windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
1390                 windowsPathUtf8[windowsPathUtf8Len] = '\0';
1391             }
1392             // Check if the concatenated string will fit.
1393             if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
1394                 uprv_strcpy(directoryBuffer, windowsPathUtf8);
1395                 uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
1396                 return TRUE;
1397             }
1398         }
1399     }
1400 
1401     return FALSE;
1402 }
1403 #endif
1404 
dataDirectoryInitFn()1405 static void U_CALLCONV dataDirectoryInitFn() {
1406     /* If we already have the directory, then return immediately. Will happen if user called
1407      * u_setDataDirectory().
1408      */
1409     if (gDataDirectory) {
1410         return;
1411     }
1412 
1413     const char *path = NULL;
1414 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1415     char datadir_path_buffer[PATH_MAX];
1416 #endif
1417 
1418     /*
1419     When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1420     override ICU's data with the ICU_DATA environment variable. This prevents
1421     problems where multiple custom copies of ICU's specific version of data
1422     are installed on a system. Either the application must define the data
1423     directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1424     ICU, set the data with udata_setCommonData or trust that all of the
1425     required data is contained in ICU's data library that contains
1426     the entry point defined by U_ICUDATA_ENTRY_POINT.
1427 
1428     There may also be some platforms where environment variables
1429     are not allowed.
1430     */
1431 #   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1432     /* First try to get the environment variable */
1433 #     if U_PLATFORM_HAS_WINUWP_API == 0  // Windows UWP does not support getenv
1434         path=getenv("ICU_DATA");
1435 #     endif
1436 #   endif
1437 
1438     /* ICU_DATA_DIR may be set as a compile option.
1439      * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1440      * and is used only when data is built in archive mode eliminating the need
1441      * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1442      * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1443      * set their own path.
1444      */
1445 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1446     if(path==NULL || *path==0) {
1447 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1448         const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1449 # endif
1450 # ifdef ICU_DATA_DIR
1451         path=ICU_DATA_DIR;
1452 # else
1453         path=U_ICU_DATA_DEFAULT_DIR;
1454 # endif
1455 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1456         if (prefix != NULL) {
1457             snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1458             path=datadir_path_buffer;
1459         }
1460 # endif
1461     }
1462 #endif
1463 
1464 #if defined(ICU_DATA_DIR_WINDOWS)
1465     char datadir_path_buffer[MAX_PATH];
1466     if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1467         path = datadir_path_buffer;
1468     }
1469 #endif
1470 
1471     if(path==NULL) {
1472         /* It looks really bad, set it to something. */
1473         path = "";
1474     }
1475 
1476     u_setDataDirectory(path);
1477     return;
1478 }
1479 
1480 U_CAPI const char * U_EXPORT2
u_getDataDirectory(void)1481 u_getDataDirectory(void) {
1482     umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
1483     return gDataDirectory;
1484 }
1485 
setTimeZoneFilesDir(const char * path,UErrorCode & status)1486 static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1487     if (U_FAILURE(status)) {
1488         return;
1489     }
1490     gTimeZoneFilesDirectory->clear();
1491     gTimeZoneFilesDirectory->append(path, status);
1492 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1493     char *p = gTimeZoneFilesDirectory->data();
1494     while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) {
1495         *p = U_FILE_SEP_CHAR;
1496     }
1497 #endif
1498 }
1499 
1500 #define TO_STRING(x) TO_STRING_2(x)
1501 #define TO_STRING_2(x) #x
1502 
TimeZoneDataDirInitFn(UErrorCode & status)1503 static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1504     U_ASSERT(gTimeZoneFilesDirectory == NULL);
1505     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1506     gTimeZoneFilesDirectory = new CharString();
1507     if (gTimeZoneFilesDirectory == NULL) {
1508         status = U_MEMORY_ALLOCATION_ERROR;
1509         return;
1510     }
1511 
1512     const char *dir = "";
1513 
1514 #if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
1515     char timezonefilesdir_path_buffer[PATH_MAX];
1516     const char *prefix = getenv(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR);
1517 #endif
1518 
1519 #if U_PLATFORM_HAS_WINUWP_API == 1
1520 // The UWP version does not support the environment variable setting.
1521 
1522 # if defined(ICU_DATA_DIR_WINDOWS)
1523     // When using the Windows system data, we can possibly pick up time zone data from the Windows directory.
1524     char datadir_path_buffer[MAX_PATH];
1525     if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1526         dir = datadir_path_buffer;
1527     }
1528 # endif
1529 
1530 #else
1531     dir = getenv("ICU_TIMEZONE_FILES_DIR");
1532 #endif // U_PLATFORM_HAS_WINUWP_API
1533 
1534 #if defined(U_TIMEZONE_FILES_DIR)
1535     if (dir == NULL) {
1536         // Build time configuration setting.
1537         dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1538     }
1539 #endif
1540 
1541     if (dir == NULL) {
1542         dir = "";
1543     }
1544 
1545 #if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
1546     if (prefix != NULL) {
1547         snprintf(timezonefilesdir_path_buffer, PATH_MAX, "%s%s", prefix, dir);
1548         dir = timezonefilesdir_path_buffer;
1549     }
1550 #endif
1551 
1552     setTimeZoneFilesDir(dir, status);
1553 }
1554 
1555 
1556 U_CAPI const char * U_EXPORT2
u_getTimeZoneFilesDirectory(UErrorCode * status)1557 u_getTimeZoneFilesDirectory(UErrorCode *status) {
1558     umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1559     return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1560 }
1561 
1562 U_CAPI void U_EXPORT2
u_setTimeZoneFilesDirectory(const char * path,UErrorCode * status)1563 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1564     umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1565     setTimeZoneFilesDir(path, *status);
1566 
1567     // Note: this function does some extra churn, first setting based on the
1568     //       environment, then immediately replacing with the value passed in.
1569     //       The logic is simpler that way, and performance shouldn't be an issue.
1570 }
1571 
1572 
1573 #if U_POSIX_LOCALE
1574 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1575  * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1576  * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1577  */
uprv_getPOSIXIDForCategory(int category)1578 static const char *uprv_getPOSIXIDForCategory(int category)
1579 {
1580     const char* posixID = NULL;
1581     if (category == LC_MESSAGES || category == LC_CTYPE) {
1582         /*
1583         * On Solaris two different calls to setlocale can result in
1584         * different values. Only get this value once.
1585         *
1586         * We must check this first because an application can set this.
1587         *
1588         * LC_ALL can't be used because it's platform dependent. The LANG
1589         * environment variable seems to affect LC_CTYPE variable by default.
1590         * Here is what setlocale(LC_ALL, NULL) can return.
1591         * HPUX can return 'C C C C C C C'
1592         * Solaris can return /en_US/C/C/C/C/C on the second try.
1593         * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1594         *
1595         * The default codepage detection also needs to use LC_CTYPE.
1596         *
1597         * Do not call setlocale(LC_*, "")! Using an empty string instead
1598         * of NULL, will modify the libc behavior.
1599         */
1600         posixID = setlocale(category, NULL);
1601         if ((posixID == 0)
1602             || (uprv_strcmp("C", posixID) == 0)
1603             || (uprv_strcmp("POSIX", posixID) == 0))
1604         {
1605             /* Maybe we got some garbage.  Try something more reasonable */
1606             posixID = getenv("LC_ALL");
1607             /* Solaris speaks POSIX -  See IEEE Std 1003.1-2008
1608              * This is needed to properly handle empty env. variables
1609              */
1610 #if U_PLATFORM == U_PF_SOLARIS
1611             if ((posixID == 0) || (posixID[0] == '\0')) {
1612                 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1613                 if ((posixID == 0) || (posixID[0] == '\0')) {
1614 #else
1615             if (posixID == 0) {
1616                 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1617                 if (posixID == 0) {
1618 #endif
1619                     posixID = getenv("LANG");
1620                 }
1621             }
1622         }
1623     }
1624     if ((posixID==0)
1625         || (uprv_strcmp("C", posixID) == 0)
1626         || (uprv_strcmp("POSIX", posixID) == 0))
1627     {
1628         /* Nothing worked.  Give it a nice POSIX default value. */
1629         posixID = "en_US_POSIX";
1630         // Note: this test will not catch 'C.UTF-8',
1631         // that will be handled in uprv_getDefaultLocaleID().
1632         // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
1633         // caller which expects to see "en_US_POSIX" in many branches.
1634     }
1635     return posixID;
1636 }
1637 
1638 /* Return just the POSIX id for the default locale, whatever happens to be in
1639  * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1640  */
1641 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1642 {
1643     static const char* posixID = NULL;
1644     if (posixID == 0) {
1645         posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1646     }
1647     return posixID;
1648 }
1649 
1650 #if !U_CHARSET_IS_UTF8
1651 /* Return just the POSIX id for the default codepage, whatever happens to be in
1652  * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1653  */
1654 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1655 {
1656     static const char* posixID = NULL;
1657     if (posixID == 0) {
1658         posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1659     }
1660     return posixID;
1661 }
1662 #endif
1663 #endif
1664 
1665 /* NOTE: The caller should handle thread safety */
1666 U_CAPI const char* U_EXPORT2
1667 uprv_getDefaultLocaleID()
1668 {
1669 #if U_POSIX_LOCALE
1670 /*
1671   Note that:  (a '!' means the ID is improper somehow)
1672      LC_ALL  ---->     default_loc          codepage
1673 --------------------------------------------------------
1674      ab.CD             ab                   CD
1675      ab@CD             ab__CD               -
1676      ab@CD.EF          ab__CD               EF
1677 
1678      ab_CD.EF@GH       ab_CD_GH             EF
1679 
1680 Some 'improper' ways to do the same as above:
1681   !  ab_CD@GH.EF       ab_CD_GH             EF
1682   !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1683   !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1684 
1685      _CD@GH            _CD_GH               -
1686      _CD.EF@GH         _CD_GH               EF
1687 
1688 The variant cannot have dots in it.
1689 The 'rightmost' variant (@xxx) wins.
1690 The leftmost codepage (.xxx) wins.
1691 */
1692     const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1693 
1694     /* Format: (no spaces)
1695     ll [ _CC ] [ . MM ] [ @ VV]
1696 
1697       l = lang, C = ctry, M = charmap, V = variant
1698     */
1699 
1700     if (gCorrectedPOSIXLocale != nullptr) {
1701         return gCorrectedPOSIXLocale;
1702     }
1703 
1704     // Copy the ID into owned memory.
1705     // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
1706     char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
1707     if (correctedPOSIXLocale == nullptr) {
1708         return nullptr;
1709     }
1710     uprv_strcpy(correctedPOSIXLocale, posixID);
1711 
1712     char *limit;
1713     if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
1714         *limit = 0;
1715     }
1716     if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
1717         *limit = 0;
1718     }
1719 
1720     if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
1721         || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
1722       // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
1723       // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
1724       uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
1725     }
1726 
1727     /* Note that we scan the *uncorrected* ID. */
1728     const char *p;
1729     if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
1730         p++;
1731 
1732         /* Take care of any special cases here.. */
1733         if (!uprv_strcmp(p, "nynorsk")) {
1734             p = "NY";
1735             /* Don't worry about no__NY. In practice, it won't appear. */
1736         }
1737 
1738         if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) {
1739             uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
1740         }
1741         else {
1742             uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1743         }
1744 
1745         const char *q;
1746         if ((q = uprv_strchr(p, '.')) != nullptr) {
1747             /* How big will the resulting string be? */
1748             int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1749             uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
1750             correctedPOSIXLocale[len] = 0;
1751         }
1752         else {
1753             /* Anything following the @ sign */
1754             uprv_strcat(correctedPOSIXLocale, p);
1755         }
1756 
1757         /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1758          * How about 'russian' -> 'ru'?
1759          * Many of the other locales using ISO codes will be handled by the
1760          * canonicalization functions in uloc_getDefault.
1761          */
1762     }
1763 
1764     if (gCorrectedPOSIXLocale == nullptr) {
1765         gCorrectedPOSIXLocale = correctedPOSIXLocale;
1766         gCorrectedPOSIXLocaleHeapAllocated = true;
1767         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1768         correctedPOSIXLocale = nullptr;
1769     }
1770     posixID = gCorrectedPOSIXLocale;
1771 
1772     if (correctedPOSIXLocale != nullptr) {  /* Was already set - clean up. */
1773         uprv_free(correctedPOSIXLocale);
1774     }
1775 
1776     return posixID;
1777 
1778 #elif U_PLATFORM_USES_ONLY_WIN32_API
1779 #define POSIX_LOCALE_CAPACITY 64
1780     UErrorCode status = U_ZERO_ERROR;
1781     char *correctedPOSIXLocale = nullptr;
1782 
1783     // If we have already figured this out just use the cached value
1784     if (gCorrectedPOSIXLocale != nullptr) {
1785         return gCorrectedPOSIXLocale;
1786     }
1787 
1788     // No cached value, need to determine the current value
1789     static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1790     int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
1791 
1792     // Now we should have a Windows locale name that needs converted to the POSIX style.
1793     if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
1794     {
1795         // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
1796         char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1797 
1798         int32_t i;
1799         for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
1800         {
1801             if (windowsLocale[i] == '_')
1802             {
1803                 modifiedWindowsLocale[i] = '-';
1804             }
1805             else
1806             {
1807                 modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
1808             }
1809 
1810             if (modifiedWindowsLocale[i] == '\0')
1811             {
1812                 break;
1813             }
1814         }
1815 
1816         if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
1817         {
1818             // Ran out of room, can't really happen, maybe we'll be lucky about a matching
1819             // locale when tags are dropped
1820             modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
1821         }
1822 
1823         // Now normalize the resulting name
1824         correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1825         /* TODO: Should we just exit on memory allocation failure? */
1826         if (correctedPOSIXLocale)
1827         {
1828             int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1829             if (U_SUCCESS(status))
1830             {
1831                 *(correctedPOSIXLocale + posixLen) = 0;
1832                 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1833                 gCorrectedPOSIXLocaleHeapAllocated = true;
1834                 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1835             }
1836             else
1837             {
1838                 uprv_free(correctedPOSIXLocale);
1839             }
1840         }
1841     }
1842 
1843     // If unable to find a locale we can agree upon, use en-US by default
1844     if (gCorrectedPOSIXLocale == nullptr) {
1845         gCorrectedPOSIXLocale = "en_US";
1846     }
1847     return gCorrectedPOSIXLocale;
1848 
1849 #elif U_PLATFORM == U_PF_OS400
1850     /* locales are process scoped and are by definition thread safe */
1851     static char correctedLocale[64];
1852     const  char *localeID = getenv("LC_ALL");
1853            char *p;
1854 
1855     if (localeID == NULL)
1856         localeID = getenv("LANG");
1857     if (localeID == NULL)
1858         localeID = setlocale(LC_ALL, NULL);
1859     /* Make sure we have something... */
1860     if (localeID == NULL)
1861         return "en_US_POSIX";
1862 
1863     /* Extract the locale name from the path. */
1864     if((p = uprv_strrchr(localeID, '/')) != NULL)
1865     {
1866         /* Increment p to start of locale name. */
1867         p++;
1868         localeID = p;
1869     }
1870 
1871     /* Copy to work location. */
1872     uprv_strcpy(correctedLocale, localeID);
1873 
1874     /* Strip off the '.locale' extension. */
1875     if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1876         *p = 0;
1877     }
1878 
1879     /* Upper case the locale name. */
1880     T_CString_toUpperCase(correctedLocale);
1881 
1882     /* See if we are using the POSIX locale.  Any of the
1883     * following are equivalent and use the same QLGPGCMA
1884     * (POSIX) locale.
1885     * QLGPGCMA2 means UCS2
1886     * QLGPGCMA_4 means UTF-32
1887     * QLGPGCMA_8 means UTF-8
1888     */
1889     if ((uprv_strcmp("C", correctedLocale) == 0) ||
1890         (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1891         (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1892     {
1893         uprv_strcpy(correctedLocale, "en_US_POSIX");
1894     }
1895     else
1896     {
1897         int16_t LocaleLen;
1898 
1899         /* Lower case the lang portion. */
1900         for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1901         {
1902             *p = uprv_tolower(*p);
1903         }
1904 
1905         /* Adjust for Euro.  After '_E' add 'URO'. */
1906         LocaleLen = uprv_strlen(correctedLocale);
1907         if (correctedLocale[LocaleLen - 2] == '_' &&
1908             correctedLocale[LocaleLen - 1] == 'E')
1909         {
1910             uprv_strcat(correctedLocale, "URO");
1911         }
1912 
1913         /* If using Lotus-based locale then convert to
1914          * equivalent non Lotus.
1915          */
1916         else if (correctedLocale[LocaleLen - 2] == '_' &&
1917             correctedLocale[LocaleLen - 1] == 'L')
1918         {
1919             correctedLocale[LocaleLen - 2] = 0;
1920         }
1921 
1922         /* There are separate simplified and traditional
1923          * locales called zh_HK_S and zh_HK_T.
1924          */
1925         else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1926         {
1927             uprv_strcpy(correctedLocale, "zh_HK");
1928         }
1929 
1930         /* A special zh_CN_GBK locale...
1931         */
1932         else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1933         {
1934             uprv_strcpy(correctedLocale, "zh_CN");
1935         }
1936 
1937     }
1938 
1939     return correctedLocale;
1940 #endif
1941 
1942 }
1943 
1944 #if !U_CHARSET_IS_UTF8
1945 #if U_POSIX_LOCALE
1946 /*
1947 Due to various platform differences, one platform may specify a charset,
1948 when they really mean a different charset. Remap the names so that they are
1949 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1950 here. Before adding anything to this function, please consider adding unique
1951 names to the ICU alias table in the data directory.
1952 */
1953 static const char*
1954 remapPlatformDependentCodepage(const char *locale, const char *name) {
1955     if (locale != NULL && *locale == 0) {
1956         /* Make sure that an empty locale is handled the same way. */
1957         locale = NULL;
1958     }
1959     if (name == NULL) {
1960         return NULL;
1961     }
1962 #if U_PLATFORM == U_PF_AIX
1963     if (uprv_strcmp(name, "IBM-943") == 0) {
1964         /* Use the ASCII compatible ibm-943 */
1965         name = "Shift-JIS";
1966     }
1967     else if (uprv_strcmp(name, "IBM-1252") == 0) {
1968         /* Use the windows-1252 that contains the Euro */
1969         name = "IBM-5348";
1970     }
1971 #elif U_PLATFORM == U_PF_SOLARIS
1972     if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1973         /* Solaris underspecifies the "EUC" name. */
1974         if (uprv_strcmp(locale, "zh_CN") == 0) {
1975             name = "EUC-CN";
1976         }
1977         else if (uprv_strcmp(locale, "zh_TW") == 0) {
1978             name = "EUC-TW";
1979         }
1980         else if (uprv_strcmp(locale, "ko_KR") == 0) {
1981             name = "EUC-KR";
1982         }
1983     }
1984     else if (uprv_strcmp(name, "eucJP") == 0) {
1985         /*
1986         ibm-954 is the best match.
1987         ibm-33722 is the default for eucJP (similar to Windows).
1988         */
1989         name = "eucjis";
1990     }
1991     else if (uprv_strcmp(name, "646") == 0) {
1992         /*
1993          * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1994          * ISO-8859-1 instead of US-ASCII(646).
1995          */
1996         name = "ISO-8859-1";
1997     }
1998 #elif U_PLATFORM_IS_DARWIN_BASED
1999     if (locale == NULL && *name == 0) {
2000         /*
2001         No locale was specified, and an empty name was passed in.
2002         This usually indicates that nl_langinfo didn't return valid information.
2003         Mac OS X uses UTF-8 by default (especially the locale data and console).
2004         */
2005         name = "UTF-8";
2006     }
2007     else if (uprv_strcmp(name, "CP949") == 0) {
2008         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2009         name = "EUC-KR";
2010     }
2011     else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
2012         /*
2013          * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2014          */
2015         name = "UTF-8";
2016     }
2017 #elif U_PLATFORM == U_PF_BSD
2018     if (uprv_strcmp(name, "CP949") == 0) {
2019         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2020         name = "EUC-KR";
2021     }
2022 #elif U_PLATFORM == U_PF_HPUX
2023     if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
2024         /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
2025         /* zh_TW.big5 is not the same charset as zh_HK.big5! */
2026         name = "hkbig5";
2027     }
2028     else if (uprv_strcmp(name, "eucJP") == 0) {
2029         /*
2030         ibm-1350 is the best match, but unavailable.
2031         ibm-954 is mostly a superset of ibm-1350.
2032         ibm-33722 is the default for eucJP (similar to Windows).
2033         */
2034         name = "eucjis";
2035     }
2036 #elif U_PLATFORM == U_PF_LINUX
2037     if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
2038         /* Linux underspecifies the "EUC" name. */
2039         if (uprv_strcmp(locale, "korean") == 0) {
2040             name = "EUC-KR";
2041         }
2042         else if (uprv_strcmp(locale, "japanese") == 0) {
2043             /* See comment below about eucJP */
2044             name = "eucjis";
2045         }
2046     }
2047     else if (uprv_strcmp(name, "eucjp") == 0) {
2048         /*
2049         ibm-1350 is the best match, but unavailable.
2050         ibm-954 is mostly a superset of ibm-1350.
2051         ibm-33722 is the default for eucJP (similar to Windows).
2052         */
2053         name = "eucjis";
2054     }
2055     else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
2056             (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
2057         /*
2058          * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2059          */
2060         name = "UTF-8";
2061     }
2062     /*
2063      * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
2064      * it by falling back to 'US-ASCII' when NULL is returned from this
2065      * function. So, we don't have to worry about it here.
2066      */
2067 #endif
2068     /* return NULL when "" is passed in */
2069     if (*name == 0) {
2070         name = NULL;
2071     }
2072     return name;
2073 }
2074 
2075 static const char*
2076 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
2077 {
2078     char localeBuf[100];
2079     const char *name = NULL;
2080     char *variant = NULL;
2081 
2082     if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
2083         size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
2084         uprv_strncpy(localeBuf, localeName, localeCapacity);
2085         localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
2086         name = uprv_strncpy(buffer, name+1, buffCapacity);
2087         buffer[buffCapacity-1] = 0; /* ensure NULL termination */
2088         if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
2089             *variant = 0;
2090         }
2091         name = remapPlatformDependentCodepage(localeBuf, name);
2092     }
2093     return name;
2094 }
2095 #endif
2096 
2097 static const char*
2098 int_getDefaultCodepage()
2099 {
2100 #if U_PLATFORM == U_PF_OS400
2101     uint32_t ccsid = 37; /* Default to ibm-37 */
2102     static char codepage[64];
2103     Qwc_JOBI0400_t jobinfo;
2104     Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
2105 
2106     EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
2107         "*                         ", "                ", &error);
2108 
2109     if (error.Bytes_Available == 0) {
2110         if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
2111             ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
2112         }
2113         else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
2114             ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
2115         }
2116         /* else use the default */
2117     }
2118     sprintf(codepage,"ibm-%d", ccsid);
2119     return codepage;
2120 
2121 #elif U_PLATFORM == U_PF_OS390
2122     static char codepage[64];
2123 
2124     strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
2125     strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
2126     codepage[63] = 0; /* NULL terminate */
2127 
2128     return codepage;
2129 
2130 #elif U_PLATFORM_USES_ONLY_WIN32_API
2131     static char codepage[64];
2132     DWORD codepageNumber = 0;
2133 
2134 #if U_PLATFORM_HAS_WINUWP_API == 1
2135     // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
2136     // have folks use Unicode than a "system" code page, however this is the same
2137     // codepage as the system default locale codepage.  (FWIW, the system locale is
2138     // ONLY used for codepage, it should never be used for anything else)
2139     GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
2140         (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
2141 #else
2142     // Win32 apps can call GetACP
2143     codepageNumber = GetACP();
2144 #endif
2145     // Special case for UTF-8
2146     if (codepageNumber == 65001)
2147     {
2148         return "UTF-8";
2149     }
2150     // Windows codepages can look like windows-1252, so format the found number
2151     // the numbers are eclectic, however all valid system code pages, besides UTF-8
2152     // are between 3 and 19999
2153     if (codepageNumber > 0 && codepageNumber < 20000)
2154     {
2155         sprintf(codepage, "windows-%ld", codepageNumber);
2156         return codepage;
2157     }
2158     // If the codepage number call failed then return UTF-8
2159     return "UTF-8";
2160 
2161 #elif U_POSIX_LOCALE
2162     static char codesetName[100];
2163     const char *localeName = NULL;
2164     const char *name = NULL;
2165 
2166     localeName = uprv_getPOSIXIDForDefaultCodepage();
2167     uprv_memset(codesetName, 0, sizeof(codesetName));
2168     /* On Solaris nl_langinfo returns C locale values unless setlocale
2169      * was called earlier.
2170      */
2171 #if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
2172     /* When available, check nl_langinfo first because it usually gives more
2173        useful names. It depends on LC_CTYPE.
2174        nl_langinfo may use the same buffer as setlocale. */
2175     {
2176         const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
2177 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
2178         /*
2179          * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
2180          * instead of ASCII.
2181          */
2182         if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
2183             codeset = remapPlatformDependentCodepage(localeName, codeset);
2184         } else
2185 #endif
2186         {
2187             codeset = remapPlatformDependentCodepage(NULL, codeset);
2188         }
2189 
2190         if (codeset != NULL) {
2191             uprv_strncpy(codesetName, codeset, sizeof(codesetName));
2192             codesetName[sizeof(codesetName)-1] = 0;
2193             return codesetName;
2194         }
2195     }
2196 #endif
2197 
2198     /* Use setlocale in a nice way, and then check some environment variables.
2199        Maybe the application used setlocale already.
2200     */
2201     uprv_memset(codesetName, 0, sizeof(codesetName));
2202     name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
2203     if (name) {
2204         /* if we can find the codeset name from setlocale, return that. */
2205         return name;
2206     }
2207 
2208     if (*codesetName == 0)
2209     {
2210         /* Everything failed. Return US ASCII (ISO 646). */
2211         (void)uprv_strcpy(codesetName, "US-ASCII");
2212     }
2213     return codesetName;
2214 #else
2215     return "US-ASCII";
2216 #endif
2217 }
2218 
2219 
2220 U_CAPI const char*  U_EXPORT2
2221 uprv_getDefaultCodepage()
2222 {
2223     static char const  *name = NULL;
2224     umtx_lock(NULL);
2225     if (name == NULL) {
2226         name = int_getDefaultCodepage();
2227     }
2228     umtx_unlock(NULL);
2229     return name;
2230 }
2231 #endif  /* !U_CHARSET_IS_UTF8 */
2232 
2233 
2234 /* end of platform-specific implementation -------------- */
2235 
2236 /* version handling --------------------------------------------------------- */
2237 
2238 U_CAPI void U_EXPORT2
2239 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2240     char *end;
2241     uint16_t part=0;
2242 
2243     if(versionArray==NULL) {
2244         return;
2245     }
2246 
2247     if(versionString!=NULL) {
2248         for(;;) {
2249             versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2250             if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2251                 break;
2252             }
2253             versionString=end+1;
2254         }
2255     }
2256 
2257     while(part<U_MAX_VERSION_LENGTH) {
2258         versionArray[part++]=0;
2259     }
2260 }
2261 
2262 U_CAPI void U_EXPORT2
2263 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2264     if(versionArray!=NULL && versionString!=NULL) {
2265         char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2266         int32_t len = u_strlen(versionString);
2267         if(len>U_MAX_VERSION_STRING_LENGTH) {
2268             len = U_MAX_VERSION_STRING_LENGTH;
2269         }
2270         u_UCharsToChars(versionString, versionChars, len);
2271         versionChars[len]=0;
2272         u_versionFromString(versionArray, versionChars);
2273     }
2274 }
2275 
2276 U_CAPI void U_EXPORT2
2277 u_versionToString(const UVersionInfo versionArray, char *versionString) {
2278     uint16_t count, part;
2279     uint8_t field;
2280 
2281     if(versionString==NULL) {
2282         return;
2283     }
2284 
2285     if(versionArray==NULL) {
2286         versionString[0]=0;
2287         return;
2288     }
2289 
2290     /* count how many fields need to be written */
2291     for(count=4; count>0 && versionArray[count-1]==0; --count) {
2292     }
2293 
2294     if(count <= 1) {
2295         count = 2;
2296     }
2297 
2298     /* write the first part */
2299     /* write the decimal field value */
2300     field=versionArray[0];
2301     if(field>=100) {
2302         *versionString++=(char)('0'+field/100);
2303         field%=100;
2304     }
2305     if(field>=10) {
2306         *versionString++=(char)('0'+field/10);
2307         field%=10;
2308     }
2309     *versionString++=(char)('0'+field);
2310 
2311     /* write the following parts */
2312     for(part=1; part<count; ++part) {
2313         /* write a dot first */
2314         *versionString++=U_VERSION_DELIMITER;
2315 
2316         /* write the decimal field value */
2317         field=versionArray[part];
2318         if(field>=100) {
2319             *versionString++=(char)('0'+field/100);
2320             field%=100;
2321         }
2322         if(field>=10) {
2323             *versionString++=(char)('0'+field/10);
2324             field%=10;
2325         }
2326         *versionString++=(char)('0'+field);
2327     }
2328 
2329     /* NUL-terminate */
2330     *versionString=0;
2331 }
2332 
2333 U_CAPI void U_EXPORT2
2334 u_getVersion(UVersionInfo versionArray) {
2335     (void)copyright;   // Suppress unused variable warning from clang.
2336     u_versionFromString(versionArray, U_ICU_VERSION);
2337 }
2338 
2339 /**
2340  * icucfg.h dependent code
2341  */
2342 
2343 #if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2344 
2345 #if HAVE_DLFCN_H
2346 #ifdef __MVS__
2347 #ifndef __SUSV3
2348 #define __SUSV3 1
2349 #endif
2350 #endif
2351 #include <dlfcn.h>
2352 #endif /* HAVE_DLFCN_H */
2353 
2354 U_CAPI void * U_EXPORT2
2355 uprv_dl_open(const char *libName, UErrorCode *status) {
2356   void *ret = NULL;
2357   if(U_FAILURE(*status)) return ret;
2358   ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2359   if(ret==NULL) {
2360 #ifdef U_TRACE_DYLOAD
2361     printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2362 #endif
2363     *status = U_MISSING_RESOURCE_ERROR;
2364   }
2365   return ret;
2366 }
2367 
2368 U_CAPI void U_EXPORT2
2369 uprv_dl_close(void *lib, UErrorCode *status) {
2370   if(U_FAILURE(*status)) return;
2371   dlclose(lib);
2372 }
2373 
2374 U_CAPI UVoidFunction* U_EXPORT2
2375 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2376   union {
2377       UVoidFunction *fp;
2378       void *vp;
2379   } uret;
2380   uret.fp = NULL;
2381   if(U_FAILURE(*status)) return uret.fp;
2382   uret.vp = dlsym(lib, sym);
2383   if(uret.vp == NULL) {
2384 #ifdef U_TRACE_DYLOAD
2385     printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2386 #endif
2387     *status = U_MISSING_RESOURCE_ERROR;
2388   }
2389   return uret.fp;
2390 }
2391 
2392 #elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
2393 
2394 /* Windows API implementation. */
2395 // Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
2396 
2397 U_CAPI void * U_EXPORT2
2398 uprv_dl_open(const char *libName, UErrorCode *status) {
2399   HMODULE lib = NULL;
2400 
2401   if(U_FAILURE(*status)) return NULL;
2402 
2403   lib = LoadLibraryA(libName);
2404 
2405   if(lib==NULL) {
2406     *status = U_MISSING_RESOURCE_ERROR;
2407   }
2408 
2409   return (void*)lib;
2410 }
2411 
2412 U_CAPI void U_EXPORT2
2413 uprv_dl_close(void *lib, UErrorCode *status) {
2414   HMODULE handle = (HMODULE)lib;
2415   if(U_FAILURE(*status)) return;
2416 
2417   FreeLibrary(handle);
2418 
2419   return;
2420 }
2421 
2422 U_CAPI UVoidFunction* U_EXPORT2
2423 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2424   HMODULE handle = (HMODULE)lib;
2425   UVoidFunction* addr = NULL;
2426 
2427   if(U_FAILURE(*status) || lib==NULL) return NULL;
2428 
2429   addr = (UVoidFunction*)GetProcAddress(handle, sym);
2430 
2431   if(addr==NULL) {
2432     DWORD lastError = GetLastError();
2433     if(lastError == ERROR_PROC_NOT_FOUND) {
2434       *status = U_MISSING_RESOURCE_ERROR;
2435     } else {
2436       *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2437     }
2438   }
2439 
2440   return addr;
2441 }
2442 
2443 #else
2444 
2445 /* No dynamic loading, null (nonexistent) implementation. */
2446 
2447 U_CAPI void * U_EXPORT2
2448 uprv_dl_open(const char *libName, UErrorCode *status) {
2449     (void)libName;
2450     if(U_FAILURE(*status)) return NULL;
2451     *status = U_UNSUPPORTED_ERROR;
2452     return NULL;
2453 }
2454 
2455 U_CAPI void U_EXPORT2
2456 uprv_dl_close(void *lib, UErrorCode *status) {
2457     (void)lib;
2458     if(U_FAILURE(*status)) return;
2459     *status = U_UNSUPPORTED_ERROR;
2460     return;
2461 }
2462 
2463 U_CAPI UVoidFunction* U_EXPORT2
2464 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2465   (void)lib;
2466   (void)sym;
2467   if(U_SUCCESS(*status)) {
2468     *status = U_UNSUPPORTED_ERROR;
2469   }
2470   return (UVoidFunction*)NULL;
2471 }
2472 
2473 #endif
2474 
2475 /*
2476  * Hey, Emacs, please set the following:
2477  *
2478  * Local Variables:
2479  * indent-tabs-mode: nil
2480  * End:
2481  *
2482  */
2483