1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 1997-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 *
11 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
12 *
13 * Date Name Description
14 * 04/14/97 aliu Creation.
15 * 04/24/97 aliu Added getDefaultDataDirectory() and
16 * getDefaultLocaleID().
17 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
18 * for assumed case. Non-UNIX platforms must be
19 * special-cased. Rewrote numeric methods dealing
20 * with NaN and Infinity to be platform independent
21 * over all IEEE 754 platforms.
22 * 05/13/97 aliu Restored sign of timezone
23 * (semantics are hours West of GMT)
24 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
25 * nextDouble..
26 * 07/22/98 stephen Added remainder, max, min, trunc
27 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
28 * 08/24/98 stephen Added longBitsFromDouble
29 * 09/08/98 stephen Minor changes for Mac Port
30 * 03/02/99 stephen Removed openFile(). Added AS400 support.
31 * Fixed EBCDIC tables
32 * 04/15/99 stephen Converted to C.
33 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
34 * 08/04/99 jeffrey R. Added OS/2 changes
35 * 11/15/99 helena Integrated S/390 IEEE support.
36 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
37 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
38 * 01/03/08 Steven L. Fake Time Support
39 ******************************************************************************
40 */
41
42 // Defines _XOPEN_SOURCE for access to POSIX functions.
43 // Must be before any other #includes.
44 #include "uposixdefs.h"
45
46 // First, the platform type. Need this for U_PLATFORM.
47 #include "unicode/platform.h"
48
49 #if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
50 /* tzset isn't defined in strict ANSI on MinGW. */
51 #undef __STRICT_ANSI__
52 #endif
53
54 /*
55 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
56 */
57 #include <time.h>
58
59 #if !U_PLATFORM_USES_ONLY_WIN32_API
60 #include <sys/time.h>
61 #endif
62
63 /* include the rest of the ICU headers */
64 #include "unicode/putil.h"
65 #include "unicode/ustring.h"
66 #include "putilimp.h"
67 #include "uassert.h"
68 #include "umutex.h"
69 #include "cmemory.h"
70 #include "cstring.h"
71 #include "locmap.h"
72 #include "ucln_cmn.h"
73 #include "charstr.h"
74
75 /* Include standard headers. */
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <math.h>
80 #include <locale.h>
81 #include <float.h>
82
83 #ifndef U_COMMON_IMPLEMENTATION
84 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu
85 #endif
86
87
88 /* include system headers */
89 #if U_PLATFORM_USES_ONLY_WIN32_API
90 /*
91 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
92 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
93 * to use native APIs as much as possible?
94 */
95 #ifndef WIN32_LEAN_AND_MEAN
96 # define WIN32_LEAN_AND_MEAN
97 #endif
98 # define VC_EXTRALEAN
99 # define NOUSER
100 # define NOSERVICE
101 # define NOIME
102 # define NOMCX
103 # include <windows.h>
104 # include "unicode/uloc.h"
105 # include "wintz.h"
106 #elif U_PLATFORM == U_PF_OS400
107 # include <float.h>
108 # include <qusec.h> /* error code structure */
109 # include <qusrjobi.h>
110 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
111 # include <mih/testptr.h> /* For uprv_maximumPtr */
112 #elif U_PLATFORM == U_PF_OS390
113 # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
114 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
115 # include <limits.h>
116 # include <unistd.h>
117 # if U_PLATFORM == U_PF_SOLARIS
118 # ifndef _XPG4_2
119 # define _XPG4_2
120 # endif
121 # elif U_PLATFORM == U_PF_ANDROID
122 # include <sys/system_properties.h>
123 # include <dlfcn.h>
124 # endif
125 #elif U_PLATFORM == U_PF_QNX
126 # include <sys/neutrino.h>
127 #endif
128
129
130 /*
131 * Only include langinfo.h if we have a way to get the codeset. If we later
132 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
133 *
134 */
135
136 #if U_HAVE_NL_LANGINFO_CODESET
137 #include <langinfo.h>
138 #endif
139
140 /**
141 * Simple things (presence of functions, etc) should just go in configure.in and be added to
142 * icucfg.h via autoheader.
143 */
144 #if U_PLATFORM_IMPLEMENTS_POSIX
145 # if U_PLATFORM == U_PF_OS400
146 # define HAVE_DLFCN_H 0
147 # define HAVE_DLOPEN 0
148 # else
149 # ifndef HAVE_DLFCN_H
150 # define HAVE_DLFCN_H 1
151 # endif
152 # ifndef HAVE_DLOPEN
153 # define HAVE_DLOPEN 1
154 # endif
155 # endif
156 # ifndef HAVE_GETTIMEOFDAY
157 # define HAVE_GETTIMEOFDAY 1
158 # endif
159 #else
160 # define HAVE_DLFCN_H 0
161 # define HAVE_DLOPEN 0
162 # define HAVE_GETTIMEOFDAY 0
163 #endif
164
165 U_NAMESPACE_USE
166
167 /* Define the extension for data files, again... */
168 #define DATA_TYPE "dat"
169
170 /* Leave this copyright notice here! */
171 static const char copyright[] = U_COPYRIGHT_STRING;
172
173 /* floating point implementations ------------------------------------------- */
174
175 /* We return QNAN rather than SNAN*/
176 #define SIGN 0x80000000U
177
178 /* Make it easy to define certain types of constants */
179 typedef union {
180 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
181 double d64;
182 } BitPatternConversion;
183 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
184 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
185
186 /*---------------------------------------------------------------------------
187 Platform utilities
188 Our general strategy is to assume we're on a POSIX platform. Platforms which
189 are non-POSIX must declare themselves so. The default POSIX implementation
190 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
191 functions).
192 ---------------------------------------------------------------------------*/
193
194 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
195 # undef U_POSIX_LOCALE
196 #else
197 # define U_POSIX_LOCALE 1
198 #endif
199
200 /*
201 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
202 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
203 */
204 #if !IEEE_754
205 static char*
u_topNBytesOfDouble(double * d,int n)206 u_topNBytesOfDouble(double* d, int n)
207 {
208 #if U_IS_BIG_ENDIAN
209 return (char*)d;
210 #else
211 return (char*)(d + 1) - n;
212 #endif
213 }
214
215 static char*
u_bottomNBytesOfDouble(double * d,int n)216 u_bottomNBytesOfDouble(double* d, int n)
217 {
218 #if U_IS_BIG_ENDIAN
219 return (char*)(d + 1) - n;
220 #else
221 return (char*)d;
222 #endif
223 }
224 #endif /* !IEEE_754 */
225
226 #if IEEE_754
227 static UBool
u_signBit(double d)228 u_signBit(double d) {
229 uint8_t hiByte;
230 #if U_IS_BIG_ENDIAN
231 hiByte = *(uint8_t *)&d;
232 #else
233 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
234 #endif
235 return (hiByte & 0x80) != 0;
236 }
237 #endif
238
239
240
241 #if defined (U_DEBUG_FAKETIME)
242 /* Override the clock to test things without having to move the system clock.
243 * Assumes POSIX gettimeofday() will function
244 */
245 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
246 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
247 UBool fakeClock_set = false; /** True if fake clock has spun up **/
248
getUTCtime_real()249 static UDate getUTCtime_real() {
250 struct timeval posixTime;
251 gettimeofday(&posixTime, nullptr);
252 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
253 }
254
getUTCtime_fake()255 static UDate getUTCtime_fake() {
256 static UMutex fakeClockMutex;
257 umtx_lock(&fakeClockMutex);
258 if(!fakeClock_set) {
259 UDate real = getUTCtime_real();
260 const char *fake_start = getenv("U_FAKETIME_START");
261 if((fake_start!=nullptr) && (fake_start[0]!=0)) {
262 sscanf(fake_start,"%lf",&fakeClock_t0);
263 fakeClock_dt = fakeClock_t0 - real;
264 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
265 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
266 fakeClock_t0, fake_start, fakeClock_dt, real);
267 } else {
268 fakeClock_dt = 0;
269 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
270 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
271 }
272 fakeClock_set = true;
273 }
274 umtx_unlock(&fakeClockMutex);
275
276 return getUTCtime_real() + fakeClock_dt;
277 }
278 #endif
279
280 #if U_PLATFORM_USES_ONLY_WIN32_API
281 typedef union {
282 int64_t int64;
283 FILETIME fileTime;
284 } FileTimeConversion; /* This is like a ULARGE_INTEGER */
285
286 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
287 #define EPOCH_BIAS INT64_C(116444736000000000)
288 #define HECTONANOSECOND_PER_MILLISECOND 10000
289
290 #endif
291
292 /*---------------------------------------------------------------------------
293 Universal Implementations
294 These are designed to work on all platforms. Try these, and if they
295 don't work on your platform, then special case your platform with new
296 implementations.
297 ---------------------------------------------------------------------------*/
298
299 U_CAPI UDate U_EXPORT2
uprv_getUTCtime()300 uprv_getUTCtime()
301 {
302 #if defined(U_DEBUG_FAKETIME)
303 return getUTCtime_fake(); /* Hook for overriding the clock */
304 #else
305 return uprv_getRawUTCtime();
306 #endif
307 }
308
309 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
310 U_CAPI UDate U_EXPORT2
uprv_getRawUTCtime()311 uprv_getRawUTCtime()
312 {
313 #if U_PLATFORM_USES_ONLY_WIN32_API
314
315 FileTimeConversion winTime;
316 GetSystemTimeAsFileTime(&winTime.fileTime);
317 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
318 #else
319
320 #if HAVE_GETTIMEOFDAY
321 struct timeval posixTime;
322 gettimeofday(&posixTime, nullptr);
323 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
324 #else
325 time_t epochtime;
326 time(&epochtime);
327 return (UDate)epochtime * U_MILLIS_PER_SECOND;
328 #endif
329
330 #endif
331 }
332
333 /*-----------------------------------------------------------------------------
334 IEEE 754
335 These methods detect and return NaN and infinity values for doubles
336 conforming to IEEE 754. Platforms which support this standard include X86,
337 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
338 If this doesn't work on your platform, you have non-IEEE floating-point, and
339 will need to code your own versions. A naive implementation is to return 0.0
340 for getNaN and getInfinity, and false for isNaN and isInfinite.
341 ---------------------------------------------------------------------------*/
342
343 U_CAPI UBool U_EXPORT2
uprv_isNaN(double number)344 uprv_isNaN(double number)
345 {
346 #if IEEE_754
347 BitPatternConversion convertedNumber;
348 convertedNumber.d64 = number;
349 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
350 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
351
352 #elif U_PLATFORM == U_PF_OS390
353 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
354 sizeof(uint32_t));
355 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
356 sizeof(uint32_t));
357
358 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
359 (lowBits == 0x00000000L);
360
361 #else
362 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
363 /* you'll need to replace this default implementation with what's correct*/
364 /* for your platform.*/
365 return number != number;
366 #endif
367 }
368
369 U_CAPI UBool U_EXPORT2
uprv_isInfinite(double number)370 uprv_isInfinite(double number)
371 {
372 #if IEEE_754
373 BitPatternConversion convertedNumber;
374 convertedNumber.d64 = number;
375 /* Infinity is exactly 0x7FF0000000000000U. */
376 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
377 #elif U_PLATFORM == U_PF_OS390
378 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
379 sizeof(uint32_t));
380 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
381 sizeof(uint32_t));
382
383 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
384
385 #else
386 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
387 /* value, you'll need to replace this default implementation with what's*/
388 /* correct for your platform.*/
389 return number == (2.0 * number);
390 #endif
391 }
392
393 U_CAPI UBool U_EXPORT2
uprv_isPositiveInfinity(double number)394 uprv_isPositiveInfinity(double number)
395 {
396 #if IEEE_754 || U_PLATFORM == U_PF_OS390
397 return (UBool)(number > 0 && uprv_isInfinite(number));
398 #else
399 return uprv_isInfinite(number);
400 #endif
401 }
402
403 U_CAPI UBool U_EXPORT2
uprv_isNegativeInfinity(double number)404 uprv_isNegativeInfinity(double number)
405 {
406 #if IEEE_754 || U_PLATFORM == U_PF_OS390
407 return (UBool)(number < 0 && uprv_isInfinite(number));
408
409 #else
410 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
411 sizeof(uint32_t));
412 return((highBits & SIGN) && uprv_isInfinite(number));
413
414 #endif
415 }
416
417 U_CAPI double U_EXPORT2
uprv_getNaN()418 uprv_getNaN()
419 {
420 #if IEEE_754 || U_PLATFORM == U_PF_OS390
421 return gNan.d64;
422 #else
423 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
424 /* you'll need to replace this default implementation with what's correct*/
425 /* for your platform.*/
426 return 0.0;
427 #endif
428 }
429
430 U_CAPI double U_EXPORT2
uprv_getInfinity()431 uprv_getInfinity()
432 {
433 #if IEEE_754 || U_PLATFORM == U_PF_OS390
434 return gInf.d64;
435 #else
436 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
437 /* value, you'll need to replace this default implementation with what's*/
438 /* correct for your platform.*/
439 return 0.0;
440 #endif
441 }
442
443 U_CAPI double U_EXPORT2
uprv_floor(double x)444 uprv_floor(double x)
445 {
446 return floor(x);
447 }
448
449 U_CAPI double U_EXPORT2
uprv_ceil(double x)450 uprv_ceil(double x)
451 {
452 return ceil(x);
453 }
454
455 U_CAPI double U_EXPORT2
uprv_round(double x)456 uprv_round(double x)
457 {
458 return uprv_floor(x + 0.5);
459 }
460
461 U_CAPI double U_EXPORT2
uprv_fabs(double x)462 uprv_fabs(double x)
463 {
464 return fabs(x);
465 }
466
467 U_CAPI double U_EXPORT2
uprv_modf(double x,double * y)468 uprv_modf(double x, double* y)
469 {
470 return modf(x, y);
471 }
472
473 U_CAPI double U_EXPORT2
uprv_fmod(double x,double y)474 uprv_fmod(double x, double y)
475 {
476 return fmod(x, y);
477 }
478
479 U_CAPI double U_EXPORT2
uprv_pow(double x,double y)480 uprv_pow(double x, double y)
481 {
482 /* This is declared as "double pow(double x, double y)" */
483 return pow(x, y);
484 }
485
486 U_CAPI double U_EXPORT2
uprv_pow10(int32_t x)487 uprv_pow10(int32_t x)
488 {
489 return pow(10.0, (double)x);
490 }
491
492 U_CAPI double U_EXPORT2
uprv_fmax(double x,double y)493 uprv_fmax(double x, double y)
494 {
495 #if IEEE_754
496 /* first handle NaN*/
497 if(uprv_isNaN(x) || uprv_isNaN(y))
498 return uprv_getNaN();
499
500 /* check for -0 and 0*/
501 if(x == 0.0 && y == 0.0 && u_signBit(x))
502 return y;
503
504 #endif
505
506 /* this should work for all flt point w/o NaN and Inf special cases */
507 return (x > y ? x : y);
508 }
509
510 U_CAPI double U_EXPORT2
uprv_fmin(double x,double y)511 uprv_fmin(double x, double y)
512 {
513 #if IEEE_754
514 /* first handle NaN*/
515 if(uprv_isNaN(x) || uprv_isNaN(y))
516 return uprv_getNaN();
517
518 /* check for -0 and 0*/
519 if(x == 0.0 && y == 0.0 && u_signBit(y))
520 return y;
521
522 #endif
523
524 /* this should work for all flt point w/o NaN and Inf special cases */
525 return (x > y ? y : x);
526 }
527
528 U_CAPI UBool U_EXPORT2
uprv_add32_overflow(int32_t a,int32_t b,int32_t * res)529 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
530 // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
531 // This function could be optimized by calling one of those primitives.
532 auto a64 = static_cast<int64_t>(a);
533 auto b64 = static_cast<int64_t>(b);
534 int64_t res64 = a64 + b64;
535 *res = static_cast<int32_t>(res64);
536 return res64 != *res;
537 }
538
539 U_CAPI UBool U_EXPORT2
uprv_mul32_overflow(int32_t a,int32_t b,int32_t * res)540 uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
541 // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
542 // This function could be optimized by calling one of those primitives.
543 auto a64 = static_cast<int64_t>(a);
544 auto b64 = static_cast<int64_t>(b);
545 int64_t res64 = a64 * b64;
546 *res = static_cast<int32_t>(res64);
547 return res64 != *res;
548 }
549
550 /**
551 * Truncates the given double.
552 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
553 * This is different than calling floor() or ceil():
554 * floor(3.3) = 3, floor(-3.3) = -4
555 * ceil(3.3) = 4, ceil(-3.3) = -3
556 */
557 U_CAPI double U_EXPORT2
uprv_trunc(double d)558 uprv_trunc(double d)
559 {
560 #if IEEE_754
561 /* handle error cases*/
562 if(uprv_isNaN(d))
563 return uprv_getNaN();
564 if(uprv_isInfinite(d))
565 return uprv_getInfinity();
566
567 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
568 return ceil(d);
569 else
570 return floor(d);
571
572 #else
573 return d >= 0 ? floor(d) : ceil(d);
574
575 #endif
576 }
577
578 /**
579 * Return the largest positive number that can be represented by an integer
580 * type of arbitrary bit length.
581 */
582 U_CAPI double U_EXPORT2
uprv_maxMantissa()583 uprv_maxMantissa()
584 {
585 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
586 }
587
588 U_CAPI double U_EXPORT2
uprv_log(double d)589 uprv_log(double d)
590 {
591 return log(d);
592 }
593
594 U_CAPI void * U_EXPORT2
uprv_maximumPtr(void * base)595 uprv_maximumPtr(void * base)
596 {
597 #if U_PLATFORM == U_PF_OS400
598 /*
599 * With the provided function we should never be out of range of a given segment
600 * (a traditional/typical segment that is). Our segments have 5 bytes for the
601 * id and 3 bytes for the offset. The key is that the casting takes care of
602 * only retrieving the offset portion minus x1000. Hence, the smallest offset
603 * seen in a program is x001000 and when casted to an int would be 0.
604 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
605 *
606 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
607 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
608 * This function determines the activation based on the pointer that is passed in and
609 * calculates the appropriate maximum available size for
610 * each pointer type (TERASPACE and non-TERASPACE)
611 *
612 * Unlike other operating systems, the pointer model isn't determined at
613 * compile time on i5/OS.
614 */
615 if ((base != nullptr) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
616 /* if it is a TERASPACE pointer the max is 2GB - 4k */
617 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
618 }
619 /* otherwise 16MB since nullptr ptr is not checkable or the ptr is not TERASPACE */
620 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
621
622 #else
623 return U_MAX_PTR(base);
624 #endif
625 }
626
627 /*---------------------------------------------------------------------------
628 Platform-specific Implementations
629 Try these, and if they don't work on your platform, then special case your
630 platform with new implementations.
631 ---------------------------------------------------------------------------*/
632
633 /* Generic time zone layer -------------------------------------------------- */
634
635 /* Time zone utilities */
636 U_CAPI void U_EXPORT2
uprv_tzset()637 uprv_tzset()
638 {
639 #if defined(U_TZSET)
640 U_TZSET();
641 #else
642 /* no initialization*/
643 #endif
644 }
645
646 U_CAPI int32_t U_EXPORT2
uprv_timezone()647 uprv_timezone()
648 {
649 #ifdef U_TIMEZONE
650 return U_TIMEZONE;
651 #else
652 time_t t, t1, t2;
653 struct tm tmrec;
654 int32_t tdiff = 0;
655
656 time(&t);
657 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
658 #if U_PLATFORM != U_PF_IPHONE
659 UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
660 #endif
661 t1 = mktime(&tmrec); /* local time in seconds*/
662 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
663 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
664 tdiff = t2 - t1;
665
666 #if U_PLATFORM != U_PF_IPHONE
667 /* imitate NT behaviour, which returns same timezone offset to GMT for
668 winter and summer.
669 This does not work on all platforms. For instance, on glibc on Linux
670 and on Mac OS 10.5, tdiff calculated above remains the same
671 regardless of whether DST is in effect or not. iOS is another
672 platform where this does not work. Linux + glibc and Mac OS 10.5
673 have U_TIMEZONE defined so that this code is not reached.
674 */
675 if (dst_checked)
676 tdiff += 3600;
677 #endif
678 return tdiff;
679 #endif
680 }
681
682 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
683 some platforms need to have it declared here. */
684
685 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
686 /* RS6000 and others reject char **tzname. */
687 extern U_IMPORT char *U_TZNAME[];
688 #endif
689
690 #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
691 /* These platforms are likely to use Olson timezone IDs. */
692 /* common targets of the symbolic link at TZDEFAULT are:
693 * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
694 * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
695 * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
696 * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
697 * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
698 * To avoid checking lots of paths, just check that the target path
699 * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
700 */
701
702 #define CHECK_LOCALTIME_LINK 1
703 #if U_PLATFORM_IS_DARWIN_BASED
704 #include <tzfile.h>
705 #define TZZONEINFO (TZDIR "/")
706 #elif U_PLATFORM == U_PF_SOLARIS
707 #define TZDEFAULT "/etc/localtime"
708 #define TZZONEINFO "/usr/share/lib/zoneinfo/"
709 #define TZ_ENV_CHECK "localtime"
710 #else
711 #define TZDEFAULT "/etc/localtime"
712 #define TZZONEINFO "/usr/share/zoneinfo/"
713 #endif
714 #define TZZONEINFOTAIL "/zoneinfo/"
715 #if U_HAVE_DIRENT_H
716 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
717 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
718 symlinked to /etc/localtime, which makes searchForTZFile return
719 'localtime' when it's the first match. */
720 #define TZFILE_SKIP2 "localtime"
721 #define SEARCH_TZFILE
722 #include <dirent.h> /* Needed to search through system timezone files */
723 #endif
724 static char gTimeZoneBuffer[PATH_MAX];
725 static const char *gTimeZoneBufferPtr = nullptr;
726 #endif
727
728 #if !U_PLATFORM_USES_ONLY_WIN32_API
729 #define isNonDigit(ch) (ch < '0' || '9' < ch)
730 #define isDigit(ch) ('0' <= ch && ch <= '9')
isValidOlsonID(const char * id)731 static UBool isValidOlsonID(const char *id) {
732 int32_t idx = 0;
733 int32_t idxMax = 0;
734
735 /* Determine if this is something like Iceland (Olson ID)
736 or AST4ADT (non-Olson ID) */
737 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
738 idx++;
739 }
740
741 /* Allow at maximum 2 numbers at the end of the id to support zone id's
742 like GMT+11. */
743 idxMax = idx + 2;
744 while (id[idx] && isDigit(id[idx]) && idx < idxMax) {
745 idx++;
746 }
747
748 /* If we went through the whole string, then it might be okay.
749 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
750 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
751 The rest of the time it could be an Olson ID. George */
752 return (UBool)(id[idx] == 0
753 || uprv_strcmp(id, "PST8PDT") == 0
754 || uprv_strcmp(id, "MST7MDT") == 0
755 || uprv_strcmp(id, "CST6CDT") == 0
756 || uprv_strcmp(id, "EST5EDT") == 0);
757 }
758
759 /* On some Unix-like OS, 'posix' subdirectory in
760 /usr/share/zoneinfo replicates the top-level contents. 'right'
761 subdirectory has the same set of files, but individual files
762 are different from those in the top-level directory or 'posix'
763 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
764 has files for UTC.
765 When the first match for /etc/localtime is in either of them
766 (usually in posix because 'right' has different file contents),
767 or TZ environment variable points to one of them, createTimeZone
768 fails because, say, 'posix/America/New_York' is not an Olson
769 timezone id ('America/New_York' is). So, we have to skip
770 'posix/' and 'right/' at the beginning. */
skipZoneIDPrefix(const char ** id)771 static void skipZoneIDPrefix(const char** id) {
772 if (uprv_strncmp(*id, "posix/", 6) == 0
773 || uprv_strncmp(*id, "right/", 6) == 0)
774 {
775 *id += 6;
776 }
777 }
778 #endif
779
780 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
781
782 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
783 typedef struct OffsetZoneMapping {
784 int32_t offsetSeconds;
785 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
786 const char *stdID;
787 const char *dstID;
788 const char *olsonID;
789 } OffsetZoneMapping;
790
791 enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
792
793 /*
794 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
795 and maps it to an Olson ID.
796 Before adding anything to this list, take a look at
797 icu/source/tools/tzcode/tz.alias
798 Sometimes no daylight savings (0) is important to define due to aliases.
799 This list can be tested with icu/source/test/compat/tzone.pl
800 More values could be added to daylightType to increase precision.
801 */
802 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
803 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
804 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
805 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
806 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
807 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
808 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
809 {-36000, 2, "EST", "EST", "Australia/Sydney"},
810 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
811 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
812 {-34200, 2, "CST", "CST", "Australia/South"},
813 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
814 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
815 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
816 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
817 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
818 {-28800, 2, "WST", "WST", "Australia/West"},
819 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
820 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
821 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
822 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
823 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
824 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
825 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
826 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
827 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
828 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
829 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
830 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
831 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
832 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
833 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
834 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
835 {0, 1, "GMT", "IST", "Europe/Dublin"},
836 {0, 1, "GMT", "BST", "Europe/London"},
837 {0, 0, "WET", "WEST", "Africa/Casablanca"},
838 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
839 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
840 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
841 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
842 {10800, 2, "UYT", "UYST", "America/Montevideo"},
843 {10800, 1, "WGT", "WGST", "America/Godthab"},
844 {10800, 2, "BRT", "BRST", "Brazil/East"},
845 {12600, 1, "NST", "NDT", "America/St_Johns"},
846 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
847 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
848 {14400, 2, "CLT", "CLST", "Chile/Continental"},
849 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
850 {14400, 2, "PYT", "PYST", "America/Asuncion"},
851 {18000, 1, "CST", "CDT", "America/Havana"},
852 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
853 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
854 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
855 {21600, 0, "CST", "CDT", "America/Guatemala"},
856 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
857 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
858 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
859 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
860 {32400, 1, "AKST", "AKDT", "US/Alaska"},
861 {36000, 1, "HAST", "HADT", "US/Aleutian"}
862 };
863
864 /*#define DEBUG_TZNAME*/
865
remapShortTimeZone(const char * stdID,const char * dstID,int32_t daylightType,int32_t offset)866 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
867 {
868 int32_t idx;
869 #ifdef DEBUG_TZNAME
870 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
871 #endif
872 for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
873 {
874 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
875 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
876 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
877 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
878 {
879 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
880 }
881 }
882 return nullptr;
883 }
884 #endif
885
886 #ifdef SEARCH_TZFILE
887 #define MAX_READ_SIZE 512
888
889 typedef struct DefaultTZInfo {
890 char* defaultTZBuffer;
891 int64_t defaultTZFileSize;
892 FILE* defaultTZFilePtr;
893 UBool defaultTZstatus;
894 int32_t defaultTZPosition;
895 } DefaultTZInfo;
896
897 /*
898 * This method compares the two files given to see if they are a match.
899 * It is currently use to compare two TZ files.
900 */
compareBinaryFiles(const char * defaultTZFileName,const char * TZFileName,DefaultTZInfo * tzInfo)901 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
902 FILE* file;
903 int64_t sizeFile;
904 int64_t sizeFileLeft;
905 int32_t sizeFileRead;
906 int32_t sizeFileToRead;
907 char bufferFile[MAX_READ_SIZE];
908 UBool result = true;
909
910 if (tzInfo->defaultTZFilePtr == nullptr) {
911 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
912 }
913 file = fopen(TZFileName, "r");
914
915 tzInfo->defaultTZPosition = 0; /* reset position to begin search */
916
917 if (file != nullptr && tzInfo->defaultTZFilePtr != nullptr) {
918 /* First check that the file size are equal. */
919 if (tzInfo->defaultTZFileSize == 0) {
920 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
921 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
922 }
923 fseek(file, 0, SEEK_END);
924 sizeFile = ftell(file);
925 sizeFileLeft = sizeFile;
926
927 if (sizeFile != tzInfo->defaultTZFileSize) {
928 result = false;
929 } else {
930 /* Store the data from the files in separate buffers and
931 * compare each byte to determine equality.
932 */
933 if (tzInfo->defaultTZBuffer == nullptr) {
934 rewind(tzInfo->defaultTZFilePtr);
935 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
936 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
937 }
938 rewind(file);
939 while(sizeFileLeft > 0) {
940 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
941 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
942
943 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
944 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
945 result = false;
946 break;
947 }
948 sizeFileLeft -= sizeFileRead;
949 tzInfo->defaultTZPosition += sizeFileRead;
950 }
951 }
952 } else {
953 result = false;
954 }
955
956 if (file != nullptr) {
957 fclose(file);
958 }
959
960 return result;
961 }
962
963
964 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
965 #define SKIP1 "."
966 #define SKIP2 ".."
967 static UBool U_CALLCONV putil_cleanup();
968 static CharString *gSearchTZFileResult = nullptr;
969
970 /*
971 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
972 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
973 */
searchForTZFile(const char * path,DefaultTZInfo * tzInfo)974 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
975 DIR* dirp = nullptr;
976 struct dirent* dirEntry = nullptr;
977 char* result = nullptr;
978 UErrorCode status = U_ZERO_ERROR;
979
980 /* Save the current path */
981 CharString curpath(path, -1, status);
982 if (U_FAILURE(status)) {
983 goto cleanupAndReturn;
984 }
985
986 dirp = opendir(path);
987 if (dirp == nullptr) {
988 goto cleanupAndReturn;
989 }
990
991 if (gSearchTZFileResult == nullptr) {
992 gSearchTZFileResult = new CharString;
993 if (gSearchTZFileResult == nullptr) {
994 goto cleanupAndReturn;
995 }
996 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
997 }
998
999 /* Check each entry in the directory. */
1000 while((dirEntry = readdir(dirp)) != nullptr) {
1001 const char* dirName = dirEntry->d_name;
1002 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0
1003 && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
1004 /* Create a newpath with the new entry to test each entry in the directory. */
1005 CharString newpath(curpath, status);
1006 newpath.append(dirName, -1, status);
1007 if (U_FAILURE(status)) {
1008 break;
1009 }
1010
1011 DIR* subDirp = nullptr;
1012 if ((subDirp = opendir(newpath.data())) != nullptr) {
1013 /* If this new path is a directory, make a recursive call with the newpath. */
1014 closedir(subDirp);
1015 newpath.append('/', status);
1016 if (U_FAILURE(status)) {
1017 break;
1018 }
1019 result = searchForTZFile(newpath.data(), tzInfo);
1020 /*
1021 Have to get out here. Otherwise, we'd keep looking
1022 and return the first match in the top-level directory
1023 if there's a match in the top-level. If not, this function
1024 would return nullptr and set gTimeZoneBufferPtr to nullptr in initDefault().
1025 It worked without this in most cases because we have a fallback of calling
1026 localtime_r to figure out the default timezone.
1027 */
1028 if (result != nullptr)
1029 break;
1030 } else {
1031 if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
1032 int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
1033 if (amountToSkip > newpath.length()) {
1034 amountToSkip = newpath.length();
1035 }
1036 const char* zoneid = newpath.data() + amountToSkip;
1037 skipZoneIDPrefix(&zoneid);
1038 gSearchTZFileResult->clear();
1039 gSearchTZFileResult->append(zoneid, -1, status);
1040 if (U_FAILURE(status)) {
1041 break;
1042 }
1043 result = gSearchTZFileResult->data();
1044 /* Get out after the first one found. */
1045 break;
1046 }
1047 }
1048 }
1049 }
1050
1051 cleanupAndReturn:
1052 if (dirp) {
1053 closedir(dirp);
1054 }
1055 return result;
1056 }
1057 #endif
1058
1059 #if U_PLATFORM == U_PF_ANDROID
1060 typedef int(system_property_read_callback)(const prop_info* info,
1061 void (*callback)(void* cookie,
1062 const char* name,
1063 const char* value,
1064 uint32_t serial),
1065 void* cookie);
1066 typedef int(system_property_get)(const char*, char*);
1067
1068 static char gAndroidTimeZone[PROP_VALUE_MAX] = { '\0' };
1069
u_property_read(void * cookie,const char * name,const char * value,uint32_t serial)1070 static void u_property_read(void* cookie, const char* name, const char* value,
1071 uint32_t serial) {
1072 uprv_strcpy((char* )cookie, value);
1073 }
1074 #endif
1075
1076 U_CAPI void U_EXPORT2
uprv_tzname_clear_cache()1077 uprv_tzname_clear_cache()
1078 {
1079 #if U_PLATFORM == U_PF_ANDROID
1080 /* Android's timezone is stored in system property. */
1081 gAndroidTimeZone[0] = '\0';
1082 void* libc = dlopen("libc.so", RTLD_NOLOAD);
1083 if (libc) {
1084 /* Android API 26+ has new API to get system property and old API
1085 * (__system_property_get) is deprecated */
1086 system_property_read_callback* property_read_callback =
1087 (system_property_read_callback*)dlsym(
1088 libc, "__system_property_read_callback");
1089 if (property_read_callback) {
1090 const prop_info* info =
1091 __system_property_find("persist.sys.timezone");
1092 if (info) {
1093 property_read_callback(info, &u_property_read, gAndroidTimeZone);
1094 }
1095 } else {
1096 system_property_get* property_get =
1097 (system_property_get*)dlsym(libc, "__system_property_get");
1098 if (property_get) {
1099 property_get("persist.sys.timezone", gAndroidTimeZone);
1100 }
1101 }
1102 dlclose(libc);
1103 }
1104 #endif
1105
1106 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1107 gTimeZoneBufferPtr = nullptr;
1108 #endif
1109 }
1110
1111 U_CAPI const char* U_EXPORT2
uprv_tzname(int n)1112 uprv_tzname(int n)
1113 {
1114 (void)n; // Avoid unreferenced parameter warning.
1115 const char *tzid = nullptr;
1116 #if U_PLATFORM_USES_ONLY_WIN32_API
1117 tzid = uprv_detectWindowsTimeZone();
1118
1119 if (tzid != nullptr) {
1120 return tzid;
1121 }
1122
1123 #ifndef U_TZNAME
1124 // The return value is free'd in timezone.cpp on Windows because
1125 // the other code path returns a pointer to a heap location.
1126 // If we don't have a name already, then tzname wouldn't be any
1127 // better, so just fall back.
1128 return uprv_strdup("");
1129 #endif // !U_TZNAME
1130
1131 #else
1132
1133 /*#if U_PLATFORM_IS_DARWIN_BASED
1134 int ret;
1135
1136 tzid = getenv("TZFILE");
1137 if (tzid != nullptr) {
1138 return tzid;
1139 }
1140 #endif*/
1141
1142 /* This code can be temporarily disabled to test tzname resolution later on. */
1143 #ifndef DEBUG_TZNAME
1144 #if U_PLATFORM == U_PF_ANDROID
1145 tzid = gAndroidTimeZone;
1146 #else
1147 tzid = getenv("TZ");
1148 #endif
1149 if (tzid != nullptr && isValidOlsonID(tzid)
1150 #if U_PLATFORM == U_PF_SOLARIS
1151 /* Don't misinterpret TZ "localtime" on Solaris as a time zone name. */
1152 && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1153 #endif
1154 ) {
1155 /* The colon forces tzset() to treat the remainder as zoneinfo path */
1156 if (tzid[0] == ':') {
1157 tzid++;
1158 }
1159 /* This might be a good Olson ID. */
1160 skipZoneIDPrefix(&tzid);
1161 return tzid;
1162 }
1163 /* else U_TZNAME will give a better result. */
1164 #endif
1165
1166 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1167 /* Caller must handle threading issues */
1168 if (gTimeZoneBufferPtr == nullptr) {
1169 /*
1170 This is a trick to look at the name of the link to get the Olson ID
1171 because the tzfile contents is underspecified.
1172 This isn't guaranteed to work because it may not be a symlink.
1173 */
1174 char *ret = realpath(TZDEFAULT, gTimeZoneBuffer);
1175 if (ret != nullptr && uprv_strcmp(TZDEFAULT, gTimeZoneBuffer) != 0) {
1176 int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
1177 const char *tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
1178 // MacOS14 has the realpath as something like
1179 // /usr/share/zoneinfo.default/Australia/Melbourne
1180 // which will not have "/zoneinfo/" in the path.
1181 // Therefore if we fail, we fall back to read the link which is
1182 // /var/db/timezone/zoneinfo/Australia/Melbourne
1183 // We also fall back to reading the link if the realpath leads to something like
1184 // /usr/share/zoneinfo/posixrules
1185 if (tzZoneInfoTailPtr == nullptr ||
1186 uprv_strcmp(tzZoneInfoTailPtr + tzZoneInfoTailLen, "posixrules") == 0) {
1187 ssize_t size = readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
1188 if (size > 0) {
1189 gTimeZoneBuffer[size] = 0;
1190 tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
1191 }
1192 }
1193 if (tzZoneInfoTailPtr != nullptr) {
1194 tzZoneInfoTailPtr += tzZoneInfoTailLen;
1195 skipZoneIDPrefix(&tzZoneInfoTailPtr);
1196 if (isValidOlsonID(tzZoneInfoTailPtr)) {
1197 return (gTimeZoneBufferPtr = tzZoneInfoTailPtr);
1198 }
1199 }
1200 } else {
1201 #if defined(SEARCH_TZFILE)
1202 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1203 if (tzInfo != nullptr) {
1204 tzInfo->defaultTZBuffer = nullptr;
1205 tzInfo->defaultTZFileSize = 0;
1206 tzInfo->defaultTZFilePtr = nullptr;
1207 tzInfo->defaultTZstatus = false;
1208 tzInfo->defaultTZPosition = 0;
1209
1210 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1211
1212 /* Free previously allocated memory */
1213 if (tzInfo->defaultTZBuffer != nullptr) {
1214 uprv_free(tzInfo->defaultTZBuffer);
1215 }
1216 if (tzInfo->defaultTZFilePtr != nullptr) {
1217 fclose(tzInfo->defaultTZFilePtr);
1218 }
1219 uprv_free(tzInfo);
1220 }
1221
1222 if (gTimeZoneBufferPtr != nullptr && isValidOlsonID(gTimeZoneBufferPtr)) {
1223 return gTimeZoneBufferPtr;
1224 }
1225 #endif
1226 }
1227 }
1228 else {
1229 return gTimeZoneBufferPtr;
1230 }
1231 #endif
1232 #endif
1233
1234 #ifdef U_TZNAME
1235 #if U_PLATFORM_USES_ONLY_WIN32_API
1236 /* The return value is free'd in timezone.cpp on Windows because
1237 * the other code path returns a pointer to a heap location. */
1238 return uprv_strdup(U_TZNAME[n]);
1239 #else
1240 /*
1241 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1242 So we remap the abbreviation to an olson ID.
1243
1244 Since Windows exposes a little more timezone information,
1245 we normally don't use this code on Windows because
1246 uprv_detectWindowsTimeZone should have already given the correct answer.
1247 */
1248 {
1249 struct tm juneSol, decemberSol;
1250 int daylightType;
1251 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1252 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1253
1254 /* This probing will tell us when daylight savings occurs. */
1255 localtime_r(&juneSolstice, &juneSol);
1256 localtime_r(&decemberSolstice, &decemberSol);
1257 if(decemberSol.tm_isdst > 0) {
1258 daylightType = U_DAYLIGHT_DECEMBER;
1259 } else if(juneSol.tm_isdst > 0) {
1260 daylightType = U_DAYLIGHT_JUNE;
1261 } else {
1262 daylightType = U_DAYLIGHT_NONE;
1263 }
1264 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1265 if (tzid != nullptr) {
1266 return tzid;
1267 }
1268 }
1269 return U_TZNAME[n];
1270 #endif
1271 #else
1272 return "";
1273 #endif
1274 }
1275
1276 /* Get and set the ICU data directory --------------------------------------- */
1277
1278 static icu::UInitOnce gDataDirInitOnce {};
1279 static char *gDataDirectory = nullptr;
1280
1281 UInitOnce gTimeZoneFilesInitOnce {};
1282 static CharString *gTimeZoneFilesDirectory = nullptr;
1283
1284 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1285 static const char *gCorrectedPOSIXLocale = nullptr; /* Sometimes heap allocated */
1286 static bool gCorrectedPOSIXLocaleHeapAllocated = false;
1287 #endif
1288
putil_cleanup()1289 static UBool U_CALLCONV putil_cleanup()
1290 {
1291 if (gDataDirectory && *gDataDirectory) {
1292 uprv_free(gDataDirectory);
1293 }
1294 gDataDirectory = nullptr;
1295 gDataDirInitOnce.reset();
1296
1297 delete gTimeZoneFilesDirectory;
1298 gTimeZoneFilesDirectory = nullptr;
1299 gTimeZoneFilesInitOnce.reset();
1300
1301 #ifdef SEARCH_TZFILE
1302 delete gSearchTZFileResult;
1303 gSearchTZFileResult = nullptr;
1304 #endif
1305
1306 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1307 if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
1308 uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
1309 gCorrectedPOSIXLocale = nullptr;
1310 gCorrectedPOSIXLocaleHeapAllocated = false;
1311 }
1312 #endif
1313 return true;
1314 }
1315
1316 /*
1317 * Set the data directory.
1318 * Make a copy of the passed string, and set the global data dir to point to it.
1319 */
1320 U_CAPI void U_EXPORT2
u_setDataDirectory(const char * directory)1321 u_setDataDirectory(const char *directory) {
1322 char *newDataDir;
1323 int32_t length;
1324
1325 if(directory==nullptr || *directory==0) {
1326 /* A small optimization to prevent the malloc and copy when the
1327 shared library is used, and this is a way to make sure that nullptr
1328 is never returned.
1329 */
1330 newDataDir = (char *)"";
1331 }
1332 else {
1333 length=(int32_t)uprv_strlen(directory);
1334 newDataDir = (char *)uprv_malloc(length + 2);
1335 /* Exit out if newDataDir could not be created. */
1336 if (newDataDir == nullptr) {
1337 return;
1338 }
1339 uprv_strcpy(newDataDir, directory);
1340
1341 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1342 {
1343 char *p;
1344 while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != nullptr) {
1345 *p = U_FILE_SEP_CHAR;
1346 }
1347 }
1348 #endif
1349 }
1350
1351 if (gDataDirectory && *gDataDirectory) {
1352 uprv_free(gDataDirectory);
1353 }
1354 gDataDirectory = newDataDir;
1355 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1356 }
1357
1358 U_CAPI UBool U_EXPORT2
uprv_pathIsAbsolute(const char * path)1359 uprv_pathIsAbsolute(const char *path)
1360 {
1361 if(!path || !*path) {
1362 return false;
1363 }
1364
1365 if(*path == U_FILE_SEP_CHAR) {
1366 return true;
1367 }
1368
1369 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1370 if(*path == U_FILE_ALT_SEP_CHAR) {
1371 return true;
1372 }
1373 #endif
1374
1375 #if U_PLATFORM_USES_ONLY_WIN32_API
1376 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1377 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1378 path[1] == ':' ) {
1379 return true;
1380 }
1381 #endif
1382
1383 return false;
1384 }
1385
1386 /* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1387 (needed for some Darwin ICU build environments) */
1388 #if U_PLATFORM_IS_DARWIN_BASED && defined(TARGET_OS_SIMULATOR) && TARGET_OS_SIMULATOR
1389 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1390 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1391 # endif
1392 #endif
1393
1394 #if defined(ICU_DATA_DIR_WINDOWS)
1395 // Helper function to get the ICU Data Directory under the Windows directory location.
getIcuDataDirectoryUnderWindowsDirectory(char * directoryBuffer,UINT bufferLength)1396 static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
1397 {
1398 wchar_t windowsPath[MAX_PATH];
1399 char windowsPathUtf8[MAX_PATH];
1400
1401 UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
1402 if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
1403 // Convert UTF-16 to a UTF-8 string.
1404 UErrorCode status = U_ZERO_ERROR;
1405 int32_t windowsPathUtf8Len = 0;
1406 u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
1407 &windowsPathUtf8Len, reinterpret_cast<const char16_t*>(windowsPath), -1, &status);
1408
1409 if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
1410 (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
1411 // Ensure it always has a separator, so we can append the ICU data path.
1412 if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
1413 windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
1414 windowsPathUtf8[windowsPathUtf8Len] = '\0';
1415 }
1416 // Check if the concatenated string will fit.
1417 if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
1418 uprv_strcpy(directoryBuffer, windowsPathUtf8);
1419 uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
1420 return true;
1421 }
1422 }
1423 }
1424
1425 return false;
1426 }
1427 #endif
1428
dataDirectoryInitFn()1429 static void U_CALLCONV dataDirectoryInitFn() {
1430 /* If we already have the directory, then return immediately. Will happen if user called
1431 * u_setDataDirectory().
1432 */
1433 if (gDataDirectory) {
1434 return;
1435 }
1436
1437 const char *path = nullptr;
1438 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1439 char datadir_path_buffer[PATH_MAX];
1440 #endif
1441
1442 /*
1443 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1444 override ICU's data with the ICU_DATA environment variable. This prevents
1445 problems where multiple custom copies of ICU's specific version of data
1446 are installed on a system. Either the application must define the data
1447 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1448 ICU, set the data with udata_setCommonData or trust that all of the
1449 required data is contained in ICU's data library that contains
1450 the entry point defined by U_ICUDATA_ENTRY_POINT.
1451
1452 There may also be some platforms where environment variables
1453 are not allowed.
1454 */
1455 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1456 /* First try to get the environment variable */
1457 # if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv
1458 path=getenv("ICU_DATA");
1459 # endif
1460 # endif
1461
1462 /* ICU_DATA_DIR may be set as a compile option.
1463 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1464 * and is used only when data is built in archive mode eliminating the need
1465 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1466 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1467 * set their own path.
1468 */
1469 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1470 if(path==nullptr || *path==0) {
1471 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1472 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1473 # endif
1474 # ifdef ICU_DATA_DIR
1475 path=ICU_DATA_DIR;
1476 # else
1477 path=U_ICU_DATA_DEFAULT_DIR;
1478 # endif
1479 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1480 if (prefix != nullptr) {
1481 snprintf(datadir_path_buffer, sizeof(datadir_path_buffer), "%s%s", prefix, path);
1482 path=datadir_path_buffer;
1483 }
1484 # endif
1485 }
1486 #endif
1487
1488 #if defined(ICU_DATA_DIR_WINDOWS)
1489 char datadir_path_buffer[MAX_PATH];
1490 if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1491 path = datadir_path_buffer;
1492 }
1493 #endif
1494
1495 if(path==nullptr) {
1496 /* It looks really bad, set it to something. */
1497 path = "";
1498 }
1499
1500 u_setDataDirectory(path);
1501 return;
1502 }
1503
1504 U_CAPI const char * U_EXPORT2
u_getDataDirectory()1505 u_getDataDirectory() {
1506 umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
1507 return gDataDirectory;
1508 }
1509
setTimeZoneFilesDir(const char * path,UErrorCode & status)1510 static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1511 if (U_FAILURE(status)) {
1512 return;
1513 }
1514 gTimeZoneFilesDirectory->clear();
1515 gTimeZoneFilesDirectory->append(path, status);
1516 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1517 char *p = gTimeZoneFilesDirectory->data();
1518 while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != nullptr) {
1519 *p = U_FILE_SEP_CHAR;
1520 }
1521 #endif
1522 }
1523
1524 #define TO_STRING(x) TO_STRING_2(x)
1525 #define TO_STRING_2(x) #x
1526
TimeZoneDataDirInitFn(UErrorCode & status)1527 static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1528 U_ASSERT(gTimeZoneFilesDirectory == nullptr);
1529 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1530 gTimeZoneFilesDirectory = new CharString();
1531 if (gTimeZoneFilesDirectory == nullptr) {
1532 status = U_MEMORY_ALLOCATION_ERROR;
1533 return;
1534 }
1535
1536 const char *dir = "";
1537
1538 #if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
1539 char timezonefilesdir_path_buffer[PATH_MAX];
1540 const char *prefix = getenv(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR);
1541 #endif
1542
1543 #if U_PLATFORM_HAS_WINUWP_API == 1
1544 // The UWP version does not support the environment variable setting.
1545
1546 # if defined(ICU_DATA_DIR_WINDOWS)
1547 // When using the Windows system data, we can possibly pick up time zone data from the Windows directory.
1548 char datadir_path_buffer[MAX_PATH];
1549 if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1550 dir = datadir_path_buffer;
1551 }
1552 # endif
1553
1554 #else
1555 dir = getenv("ICU_TIMEZONE_FILES_DIR");
1556 #endif // U_PLATFORM_HAS_WINUWP_API
1557
1558 #if defined(U_TIMEZONE_FILES_DIR)
1559 if (dir == nullptr) {
1560 // Build time configuration setting.
1561 dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1562 }
1563 #endif
1564
1565 if (dir == nullptr) {
1566 dir = "";
1567 }
1568
1569 #if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
1570 if (prefix != nullptr) {
1571 snprintf(timezonefilesdir_path_buffer, sizeof(timezonefilesdir_path_buffer), "%s%s", prefix, dir);
1572 dir = timezonefilesdir_path_buffer;
1573 }
1574 #endif
1575
1576 setTimeZoneFilesDir(dir, status);
1577 }
1578
1579
1580 U_CAPI const char * U_EXPORT2
u_getTimeZoneFilesDirectory(UErrorCode * status)1581 u_getTimeZoneFilesDirectory(UErrorCode *status) {
1582 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1583 return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1584 }
1585
1586 U_CAPI void U_EXPORT2
u_setTimeZoneFilesDirectory(const char * path,UErrorCode * status)1587 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1588 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1589 setTimeZoneFilesDir(path, *status);
1590
1591 // Note: this function does some extra churn, first setting based on the
1592 // environment, then immediately replacing with the value passed in.
1593 // The logic is simpler that way, and performance shouldn't be an issue.
1594 }
1595
1596
1597 #if U_POSIX_LOCALE
1598 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1599 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1600 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1601 */
uprv_getPOSIXIDForCategory(int category)1602 static const char *uprv_getPOSIXIDForCategory(int category)
1603 {
1604 const char* posixID = nullptr;
1605 if (category == LC_MESSAGES || category == LC_CTYPE) {
1606 /*
1607 * On Solaris two different calls to setlocale can result in
1608 * different values. Only get this value once.
1609 *
1610 * We must check this first because an application can set this.
1611 *
1612 * LC_ALL can't be used because it's platform dependent. The LANG
1613 * environment variable seems to affect LC_CTYPE variable by default.
1614 * Here is what setlocale(LC_ALL, nullptr) can return.
1615 * HPUX can return 'C C C C C C C'
1616 * Solaris can return /en_US/C/C/C/C/C on the second try.
1617 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1618 *
1619 * The default codepage detection also needs to use LC_CTYPE.
1620 *
1621 * Do not call setlocale(LC_*, "")! Using an empty string instead
1622 * of nullptr, will modify the libc behavior.
1623 */
1624 posixID = setlocale(category, nullptr);
1625 if ((posixID == 0)
1626 || (uprv_strcmp("C", posixID) == 0)
1627 || (uprv_strcmp("POSIX", posixID) == 0))
1628 {
1629 /* Maybe we got some garbage. Try something more reasonable */
1630 posixID = getenv("LC_ALL");
1631 /* Solaris speaks POSIX - See IEEE Std 1003.1-2008
1632 * This is needed to properly handle empty env. variables
1633 */
1634 #if U_PLATFORM == U_PF_SOLARIS
1635 if ((posixID == 0) || (posixID[0] == '\0')) {
1636 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1637 if ((posixID == 0) || (posixID[0] == '\0')) {
1638 #else
1639 if (posixID == 0) {
1640 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1641 if (posixID == 0) {
1642 #endif
1643 posixID = getenv("LANG");
1644 }
1645 }
1646 }
1647 }
1648 if ((posixID==0)
1649 || (uprv_strcmp("C", posixID) == 0)
1650 || (uprv_strcmp("POSIX", posixID) == 0))
1651 {
1652 /* Nothing worked. Give it a nice POSIX default value. */
1653 posixID = "en_US_POSIX";
1654 // Note: this test will not catch 'C.UTF-8',
1655 // that will be handled in uprv_getDefaultLocaleID().
1656 // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
1657 // caller which expects to see "en_US_POSIX" in many branches.
1658 }
1659 return posixID;
1660 }
1661
1662 /* Return just the POSIX id for the default locale, whatever happens to be in
1663 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1664 */
1665 static const char *uprv_getPOSIXIDForDefaultLocale()
1666 {
1667 static const char* posixID = nullptr;
1668 if (posixID == 0) {
1669 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1670 }
1671 return posixID;
1672 }
1673
1674 #if !U_CHARSET_IS_UTF8
1675 /* Return just the POSIX id for the default codepage, whatever happens to be in
1676 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1677 */
1678 static const char *uprv_getPOSIXIDForDefaultCodepage()
1679 {
1680 static const char* posixID = nullptr;
1681 if (posixID == 0) {
1682 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1683 }
1684 return posixID;
1685 }
1686 #endif
1687 #endif
1688
1689 /* NOTE: The caller should handle thread safety */
1690 U_CAPI const char* U_EXPORT2
1691 uprv_getDefaultLocaleID()
1692 {
1693 #if U_POSIX_LOCALE
1694 /*
1695 Note that: (a '!' means the ID is improper somehow)
1696 LC_ALL ----> default_loc codepage
1697 --------------------------------------------------------
1698 ab.CD ab CD
1699 ab@CD ab__CD -
1700 ab@CD.EF ab__CD EF
1701
1702 ab_CD.EF@GH ab_CD_GH EF
1703
1704 Some 'improper' ways to do the same as above:
1705 ! ab_CD@GH.EF ab_CD_GH EF
1706 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1707 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1708
1709 _CD@GH _CD_GH -
1710 _CD.EF@GH _CD_GH EF
1711
1712 The variant cannot have dots in it.
1713 The 'rightmost' variant (@xxx) wins.
1714 The leftmost codepage (.xxx) wins.
1715 */
1716 const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1717
1718 /* Format: (no spaces)
1719 ll [ _CC ] [ . MM ] [ @ VV]
1720
1721 l = lang, C = ctry, M = charmap, V = variant
1722 */
1723
1724 if (gCorrectedPOSIXLocale != nullptr) {
1725 return gCorrectedPOSIXLocale;
1726 }
1727
1728 // Copy the ID into owned memory.
1729 // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
1730 char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
1731 if (correctedPOSIXLocale == nullptr) {
1732 return nullptr;
1733 }
1734 uprv_strcpy(correctedPOSIXLocale, posixID);
1735
1736 char *limit;
1737 if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
1738 *limit = 0;
1739 }
1740 if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
1741 *limit = 0;
1742 }
1743
1744 if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
1745 || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
1746 // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
1747 // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
1748 uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
1749 }
1750
1751 /* Note that we scan the *uncorrected* ID. */
1752 const char *p;
1753 if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
1754 p++;
1755
1756 /* Take care of any special cases here.. */
1757 if (!uprv_strcmp(p, "nynorsk")) {
1758 p = "NY";
1759 /* Don't worry about no__NY. In practice, it won't appear. */
1760 }
1761
1762 if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) {
1763 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
1764 }
1765 else {
1766 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1767 }
1768
1769 const char *q;
1770 if ((q = uprv_strchr(p, '.')) != nullptr) {
1771 /* How big will the resulting string be? */
1772 int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1773 uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
1774 correctedPOSIXLocale[len] = 0;
1775 }
1776 else {
1777 /* Anything following the @ sign */
1778 uprv_strcat(correctedPOSIXLocale, p);
1779 }
1780
1781 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1782 * How about 'russian' -> 'ru'?
1783 * Many of the other locales using ISO codes will be handled by the
1784 * canonicalization functions in uloc_getDefault.
1785 */
1786 }
1787
1788 if (gCorrectedPOSIXLocale == nullptr) {
1789 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1790 gCorrectedPOSIXLocaleHeapAllocated = true;
1791 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1792 correctedPOSIXLocale = nullptr;
1793 }
1794 posixID = gCorrectedPOSIXLocale;
1795
1796 if (correctedPOSIXLocale != nullptr) { /* Was already set - clean up. */
1797 uprv_free(correctedPOSIXLocale);
1798 }
1799
1800 return posixID;
1801
1802 #elif U_PLATFORM_USES_ONLY_WIN32_API
1803 #define POSIX_LOCALE_CAPACITY 64
1804 UErrorCode status = U_ZERO_ERROR;
1805 char *correctedPOSIXLocale = nullptr;
1806
1807 // If we have already figured this out just use the cached value
1808 if (gCorrectedPOSIXLocale != nullptr) {
1809 return gCorrectedPOSIXLocale;
1810 }
1811
1812 // No cached value, need to determine the current value
1813 static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1814 int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
1815
1816 // Now we should have a Windows locale name that needs converted to the POSIX style.
1817 if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
1818 {
1819 // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
1820 char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1821
1822 int32_t i;
1823 for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
1824 {
1825 if (windowsLocale[i] == '_')
1826 {
1827 modifiedWindowsLocale[i] = '-';
1828 }
1829 else
1830 {
1831 modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
1832 }
1833
1834 if (modifiedWindowsLocale[i] == '\0')
1835 {
1836 break;
1837 }
1838 }
1839
1840 if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
1841 {
1842 // Ran out of room, can't really happen, maybe we'll be lucky about a matching
1843 // locale when tags are dropped
1844 modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
1845 }
1846
1847 // Now normalize the resulting name
1848 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1849 /* TODO: Should we just exit on memory allocation failure? */
1850 if (correctedPOSIXLocale)
1851 {
1852 int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1853 if (U_SUCCESS(status))
1854 {
1855 *(correctedPOSIXLocale + posixLen) = 0;
1856 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1857 gCorrectedPOSIXLocaleHeapAllocated = true;
1858 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1859 }
1860 else
1861 {
1862 uprv_free(correctedPOSIXLocale);
1863 }
1864 }
1865 }
1866
1867 // If unable to find a locale we can agree upon, use en-US by default
1868 if (gCorrectedPOSIXLocale == nullptr) {
1869 gCorrectedPOSIXLocale = "en_US";
1870 }
1871 return gCorrectedPOSIXLocale;
1872
1873 #elif U_PLATFORM == U_PF_OS400
1874 /* locales are process scoped and are by definition thread safe */
1875 static char correctedLocale[64];
1876 const char *localeID = getenv("LC_ALL");
1877 char *p;
1878
1879 if (localeID == nullptr)
1880 localeID = getenv("LANG");
1881 if (localeID == nullptr)
1882 localeID = setlocale(LC_ALL, nullptr);
1883 /* Make sure we have something... */
1884 if (localeID == nullptr)
1885 return "en_US_POSIX";
1886
1887 /* Extract the locale name from the path. */
1888 if((p = uprv_strrchr(localeID, '/')) != nullptr)
1889 {
1890 /* Increment p to start of locale name. */
1891 p++;
1892 localeID = p;
1893 }
1894
1895 /* Copy to work location. */
1896 uprv_strcpy(correctedLocale, localeID);
1897
1898 /* Strip off the '.locale' extension. */
1899 if((p = uprv_strchr(correctedLocale, '.')) != nullptr) {
1900 *p = 0;
1901 }
1902
1903 /* Upper case the locale name. */
1904 T_CString_toUpperCase(correctedLocale);
1905
1906 /* See if we are using the POSIX locale. Any of the
1907 * following are equivalent and use the same QLGPGCMA
1908 * (POSIX) locale.
1909 * QLGPGCMA2 means UCS2
1910 * QLGPGCMA_4 means UTF-32
1911 * QLGPGCMA_8 means UTF-8
1912 */
1913 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1914 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1915 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1916 {
1917 uprv_strcpy(correctedLocale, "en_US_POSIX");
1918 }
1919 else
1920 {
1921 int16_t LocaleLen;
1922
1923 /* Lower case the lang portion. */
1924 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1925 {
1926 *p = uprv_tolower(*p);
1927 }
1928
1929 /* Adjust for Euro. After '_E' add 'URO'. */
1930 LocaleLen = uprv_strlen(correctedLocale);
1931 if (correctedLocale[LocaleLen - 2] == '_' &&
1932 correctedLocale[LocaleLen - 1] == 'E')
1933 {
1934 uprv_strcat(correctedLocale, "URO");
1935 }
1936
1937 /* If using Lotus-based locale then convert to
1938 * equivalent non Lotus.
1939 */
1940 else if (correctedLocale[LocaleLen - 2] == '_' &&
1941 correctedLocale[LocaleLen - 1] == 'L')
1942 {
1943 correctedLocale[LocaleLen - 2] = 0;
1944 }
1945
1946 /* There are separate simplified and traditional
1947 * locales called zh_HK_S and zh_HK_T.
1948 */
1949 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1950 {
1951 uprv_strcpy(correctedLocale, "zh_HK");
1952 }
1953
1954 /* A special zh_CN_GBK locale...
1955 */
1956 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1957 {
1958 uprv_strcpy(correctedLocale, "zh_CN");
1959 }
1960
1961 }
1962
1963 return correctedLocale;
1964 #endif
1965
1966 }
1967
1968 #if !U_CHARSET_IS_UTF8
1969 #if U_POSIX_LOCALE
1970 /*
1971 Due to various platform differences, one platform may specify a charset,
1972 when they really mean a different charset. Remap the names so that they are
1973 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1974 here. Before adding anything to this function, please consider adding unique
1975 names to the ICU alias table in the data directory.
1976 */
1977 static const char*
1978 remapPlatformDependentCodepage(const char *locale, const char *name) {
1979 if (locale != nullptr && *locale == 0) {
1980 /* Make sure that an empty locale is handled the same way. */
1981 locale = nullptr;
1982 }
1983 if (name == nullptr) {
1984 return nullptr;
1985 }
1986 #if U_PLATFORM == U_PF_AIX
1987 if (uprv_strcmp(name, "IBM-943") == 0) {
1988 /* Use the ASCII compatible ibm-943 */
1989 name = "Shift-JIS";
1990 }
1991 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1992 /* Use the windows-1252 that contains the Euro */
1993 name = "IBM-5348";
1994 }
1995 #elif U_PLATFORM == U_PF_SOLARIS
1996 if (locale != nullptr && uprv_strcmp(name, "EUC") == 0) {
1997 /* Solaris underspecifies the "EUC" name. */
1998 if (uprv_strcmp(locale, "zh_CN") == 0) {
1999 name = "EUC-CN";
2000 }
2001 else if (uprv_strcmp(locale, "zh_TW") == 0) {
2002 name = "EUC-TW";
2003 }
2004 else if (uprv_strcmp(locale, "ko_KR") == 0) {
2005 name = "EUC-KR";
2006 }
2007 }
2008 else if (uprv_strcmp(name, "eucJP") == 0) {
2009 /*
2010 ibm-954 is the best match.
2011 ibm-33722 is the default for eucJP (similar to Windows).
2012 */
2013 name = "eucjis";
2014 }
2015 else if (uprv_strcmp(name, "646") == 0) {
2016 /*
2017 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
2018 * ISO-8859-1 instead of US-ASCII(646).
2019 */
2020 name = "ISO-8859-1";
2021 }
2022 #elif U_PLATFORM_IS_DARWIN_BASED
2023 if (locale == nullptr && *name == 0) {
2024 /*
2025 No locale was specified, and an empty name was passed in.
2026 This usually indicates that nl_langinfo didn't return valid information.
2027 Mac OS X uses UTF-8 by default (especially the locale data and console).
2028 */
2029 name = "UTF-8";
2030 }
2031 else if (uprv_strcmp(name, "CP949") == 0) {
2032 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2033 name = "EUC-KR";
2034 }
2035 else if (locale != nullptr && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
2036 /*
2037 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2038 */
2039 name = "UTF-8";
2040 }
2041 #elif U_PLATFORM == U_PF_BSD
2042 if (uprv_strcmp(name, "CP949") == 0) {
2043 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2044 name = "EUC-KR";
2045 }
2046 #elif U_PLATFORM == U_PF_HPUX
2047 if (locale != nullptr && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
2048 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
2049 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
2050 name = "hkbig5";
2051 }
2052 else if (uprv_strcmp(name, "eucJP") == 0) {
2053 /*
2054 ibm-1350 is the best match, but unavailable.
2055 ibm-954 is mostly a superset of ibm-1350.
2056 ibm-33722 is the default for eucJP (similar to Windows).
2057 */
2058 name = "eucjis";
2059 }
2060 #elif U_PLATFORM == U_PF_LINUX
2061 if (locale != nullptr && uprv_strcmp(name, "euc") == 0) {
2062 /* Linux underspecifies the "EUC" name. */
2063 if (uprv_strcmp(locale, "korean") == 0) {
2064 name = "EUC-KR";
2065 }
2066 else if (uprv_strcmp(locale, "japanese") == 0) {
2067 /* See comment below about eucJP */
2068 name = "eucjis";
2069 }
2070 }
2071 else if (uprv_strcmp(name, "eucjp") == 0) {
2072 /*
2073 ibm-1350 is the best match, but unavailable.
2074 ibm-954 is mostly a superset of ibm-1350.
2075 ibm-33722 is the default for eucJP (similar to Windows).
2076 */
2077 name = "eucjis";
2078 }
2079 else if (locale != nullptr && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
2080 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
2081 /*
2082 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2083 */
2084 name = "UTF-8";
2085 }
2086 /*
2087 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
2088 * it by falling back to 'US-ASCII' when nullptr is returned from this
2089 * function. So, we don't have to worry about it here.
2090 */
2091 #endif
2092 /* return nullptr when "" is passed in */
2093 if (*name == 0) {
2094 name = nullptr;
2095 }
2096 return name;
2097 }
2098
2099 static const char*
2100 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
2101 {
2102 char localeBuf[100];
2103 const char *name = nullptr;
2104 char *variant = nullptr;
2105
2106 if (localeName != nullptr && (name = (uprv_strchr(localeName, '.'))) != nullptr) {
2107 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
2108 uprv_strncpy(localeBuf, localeName, localeCapacity);
2109 localeBuf[localeCapacity-1] = 0; /* ensure NUL termination */
2110 name = uprv_strncpy(buffer, name+1, buffCapacity);
2111 buffer[buffCapacity-1] = 0; /* ensure NUL termination */
2112 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != nullptr) {
2113 *variant = 0;
2114 }
2115 name = remapPlatformDependentCodepage(localeBuf, name);
2116 }
2117 return name;
2118 }
2119 #endif
2120
2121 static const char*
2122 int_getDefaultCodepage()
2123 {
2124 #if U_PLATFORM == U_PF_OS400
2125 uint32_t ccsid = 37; /* Default to ibm-37 */
2126 static char codepage[64];
2127 Qwc_JOBI0400_t jobinfo;
2128 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
2129
2130 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
2131 "* ", " ", &error);
2132
2133 if (error.Bytes_Available == 0) {
2134 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
2135 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
2136 }
2137 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
2138 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
2139 }
2140 /* else use the default */
2141 }
2142 snprintf(codepage, sizeof(codepage), "ibm-%d", ccsid);
2143 return codepage;
2144
2145 #elif U_PLATFORM == U_PF_OS390
2146 static char codepage[64];
2147
2148 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
2149 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
2150 codepage[63] = 0; /* NUL terminate */
2151
2152 return codepage;
2153
2154 #elif U_PLATFORM_USES_ONLY_WIN32_API
2155 static char codepage[64];
2156 DWORD codepageNumber = 0;
2157
2158 #if U_PLATFORM_HAS_WINUWP_API == 1
2159 // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
2160 // have folks use Unicode than a "system" code page, however this is the same
2161 // codepage as the system default locale codepage. (FWIW, the system locale is
2162 // ONLY used for codepage, it should never be used for anything else)
2163 GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
2164 (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
2165 #else
2166 // Win32 apps can call GetACP
2167 codepageNumber = GetACP();
2168 #endif
2169 // Special case for UTF-8
2170 if (codepageNumber == 65001)
2171 {
2172 return "UTF-8";
2173 }
2174 // Windows codepages can look like windows-1252, so format the found number
2175 // the numbers are eclectic, however all valid system code pages, besides UTF-8
2176 // are between 3 and 19999
2177 if (codepageNumber > 0 && codepageNumber < 20000)
2178 {
2179 snprintf(codepage, sizeof(codepage), "windows-%ld", codepageNumber);
2180 return codepage;
2181 }
2182 // If the codepage number call failed then return UTF-8
2183 return "UTF-8";
2184
2185 #elif U_POSIX_LOCALE
2186 static char codesetName[100];
2187 const char *localeName = nullptr;
2188 const char *name = nullptr;
2189
2190 localeName = uprv_getPOSIXIDForDefaultCodepage();
2191 uprv_memset(codesetName, 0, sizeof(codesetName));
2192 /* On Solaris nl_langinfo returns C locale values unless setlocale
2193 * was called earlier.
2194 */
2195 #if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
2196 /* When available, check nl_langinfo first because it usually gives more
2197 useful names. It depends on LC_CTYPE.
2198 nl_langinfo may use the same buffer as setlocale. */
2199 {
2200 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
2201 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
2202 /*
2203 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
2204 * instead of ASCII.
2205 */
2206 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
2207 codeset = remapPlatformDependentCodepage(localeName, codeset);
2208 } else
2209 #endif
2210 {
2211 codeset = remapPlatformDependentCodepage(nullptr, codeset);
2212 }
2213
2214 if (codeset != nullptr) {
2215 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
2216 codesetName[sizeof(codesetName)-1] = 0;
2217 return codesetName;
2218 }
2219 }
2220 #endif
2221
2222 /* Use setlocale in a nice way, and then check some environment variables.
2223 Maybe the application used setlocale already.
2224 */
2225 uprv_memset(codesetName, 0, sizeof(codesetName));
2226 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
2227 if (name) {
2228 /* if we can find the codeset name from setlocale, return that. */
2229 return name;
2230 }
2231
2232 if (*codesetName == 0)
2233 {
2234 /* Everything failed. Return US ASCII (ISO 646). */
2235 (void)uprv_strcpy(codesetName, "US-ASCII");
2236 }
2237 return codesetName;
2238 #else
2239 return "US-ASCII";
2240 #endif
2241 }
2242
2243
2244 U_CAPI const char* U_EXPORT2
2245 uprv_getDefaultCodepage()
2246 {
2247 static char const *name = nullptr;
2248 umtx_lock(nullptr);
2249 if (name == nullptr) {
2250 name = int_getDefaultCodepage();
2251 }
2252 umtx_unlock(nullptr);
2253 return name;
2254 }
2255 #endif /* !U_CHARSET_IS_UTF8 */
2256
2257
2258 /* end of platform-specific implementation -------------- */
2259
2260 /* version handling --------------------------------------------------------- */
2261
2262 U_CAPI void U_EXPORT2
2263 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2264 char *end;
2265 uint16_t part=0;
2266
2267 if(versionArray==nullptr) {
2268 return;
2269 }
2270
2271 if(versionString!=nullptr) {
2272 for(;;) {
2273 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2274 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2275 break;
2276 }
2277 versionString=end+1;
2278 }
2279 }
2280
2281 while(part<U_MAX_VERSION_LENGTH) {
2282 versionArray[part++]=0;
2283 }
2284 }
2285
2286 U_CAPI void U_EXPORT2
2287 u_versionFromUString(UVersionInfo versionArray, const char16_t *versionString) {
2288 if(versionArray!=nullptr && versionString!=nullptr) {
2289 char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2290 int32_t len = u_strlen(versionString);
2291 if(len>U_MAX_VERSION_STRING_LENGTH) {
2292 len = U_MAX_VERSION_STRING_LENGTH;
2293 }
2294 u_UCharsToChars(versionString, versionChars, len);
2295 versionChars[len]=0;
2296 u_versionFromString(versionArray, versionChars);
2297 }
2298 }
2299
2300 U_CAPI void U_EXPORT2
2301 u_versionToString(const UVersionInfo versionArray, char *versionString) {
2302 uint16_t count, part;
2303 uint8_t field;
2304
2305 if(versionString==nullptr) {
2306 return;
2307 }
2308
2309 if(versionArray==nullptr) {
2310 versionString[0]=0;
2311 return;
2312 }
2313
2314 /* count how many fields need to be written */
2315 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2316 }
2317
2318 if(count <= 1) {
2319 count = 2;
2320 }
2321
2322 /* write the first part */
2323 /* write the decimal field value */
2324 field=versionArray[0];
2325 if(field>=100) {
2326 *versionString++=(char)('0'+field/100);
2327 field%=100;
2328 }
2329 if(field>=10) {
2330 *versionString++=(char)('0'+field/10);
2331 field%=10;
2332 }
2333 *versionString++=(char)('0'+field);
2334
2335 /* write the following parts */
2336 for(part=1; part<count; ++part) {
2337 /* write a dot first */
2338 *versionString++=U_VERSION_DELIMITER;
2339
2340 /* write the decimal field value */
2341 field=versionArray[part];
2342 if(field>=100) {
2343 *versionString++=(char)('0'+field/100);
2344 field%=100;
2345 }
2346 if(field>=10) {
2347 *versionString++=(char)('0'+field/10);
2348 field%=10;
2349 }
2350 *versionString++=(char)('0'+field);
2351 }
2352
2353 /* NUL-terminate */
2354 *versionString=0;
2355 }
2356
2357 U_CAPI void U_EXPORT2
2358 u_getVersion(UVersionInfo versionArray) {
2359 (void)copyright; // Suppress unused variable warning from clang.
2360 u_versionFromString(versionArray, U_ICU_VERSION);
2361 }
2362
2363 /**
2364 * icucfg.h dependent code
2365 */
2366
2367 #if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2368
2369 #if HAVE_DLFCN_H
2370 #ifdef __MVS__
2371 #ifndef __SUSV3
2372 #define __SUSV3 1
2373 #endif
2374 #endif
2375 #include <dlfcn.h>
2376 #endif /* HAVE_DLFCN_H */
2377
2378 U_CAPI void * U_EXPORT2
2379 uprv_dl_open(const char *libName, UErrorCode *status) {
2380 void *ret = nullptr;
2381 if(U_FAILURE(*status)) return ret;
2382 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2383 if(ret==nullptr) {
2384 #ifdef U_TRACE_DYLOAD
2385 printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2386 #endif
2387 *status = U_MISSING_RESOURCE_ERROR;
2388 }
2389 return ret;
2390 }
2391
2392 U_CAPI void U_EXPORT2
2393 uprv_dl_close(void *lib, UErrorCode *status) {
2394 if(U_FAILURE(*status)) return;
2395 dlclose(lib);
2396 }
2397
2398 U_CAPI UVoidFunction* U_EXPORT2
2399 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2400 union {
2401 UVoidFunction *fp;
2402 void *vp;
2403 } uret;
2404 uret.fp = nullptr;
2405 if(U_FAILURE(*status)) return uret.fp;
2406 uret.vp = dlsym(lib, sym);
2407 if(uret.vp == nullptr) {
2408 #ifdef U_TRACE_DYLOAD
2409 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2410 #endif
2411 *status = U_MISSING_RESOURCE_ERROR;
2412 }
2413 return uret.fp;
2414 }
2415
2416 #elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
2417
2418 /* Windows API implementation. */
2419 // Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
2420
2421 U_CAPI void * U_EXPORT2
2422 uprv_dl_open(const char *libName, UErrorCode *status) {
2423 HMODULE lib = nullptr;
2424
2425 if(U_FAILURE(*status)) return nullptr;
2426
2427 lib = LoadLibraryA(libName);
2428
2429 if(lib==nullptr) {
2430 *status = U_MISSING_RESOURCE_ERROR;
2431 }
2432
2433 return (void*)lib;
2434 }
2435
2436 U_CAPI void U_EXPORT2
2437 uprv_dl_close(void *lib, UErrorCode *status) {
2438 HMODULE handle = (HMODULE)lib;
2439 if(U_FAILURE(*status)) return;
2440
2441 FreeLibrary(handle);
2442
2443 return;
2444 }
2445
2446 U_CAPI UVoidFunction* U_EXPORT2
2447 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2448 HMODULE handle = (HMODULE)lib;
2449 UVoidFunction* addr = nullptr;
2450
2451 if(U_FAILURE(*status) || lib==nullptr) return nullptr;
2452
2453 addr = (UVoidFunction*)GetProcAddress(handle, sym);
2454
2455 if(addr==nullptr) {
2456 DWORD lastError = GetLastError();
2457 if(lastError == ERROR_PROC_NOT_FOUND) {
2458 *status = U_MISSING_RESOURCE_ERROR;
2459 } else {
2460 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2461 }
2462 }
2463
2464 return addr;
2465 }
2466
2467 #else
2468
2469 /* No dynamic loading, null (nonexistent) implementation. */
2470
2471 U_CAPI void * U_EXPORT2
2472 uprv_dl_open(const char *libName, UErrorCode *status) {
2473 (void)libName;
2474 if(U_FAILURE(*status)) return nullptr;
2475 *status = U_UNSUPPORTED_ERROR;
2476 return nullptr;
2477 }
2478
2479 U_CAPI void U_EXPORT2
2480 uprv_dl_close(void *lib, UErrorCode *status) {
2481 (void)lib;
2482 if(U_FAILURE(*status)) return;
2483 *status = U_UNSUPPORTED_ERROR;
2484 return;
2485 }
2486
2487 U_CAPI UVoidFunction* U_EXPORT2
2488 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2489 (void)lib;
2490 (void)sym;
2491 if(U_SUCCESS(*status)) {
2492 *status = U_UNSUPPORTED_ERROR;
2493 }
2494 return (UVoidFunction*)nullptr;
2495 }
2496
2497 #endif
2498
2499 /*
2500 * Hey, Emacs, please set the following:
2501 *
2502 * Local Variables:
2503 * indent-tabs-mode: nil
2504 * End:
2505 *
2506 */
2507