1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1997-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10 *
11 * Date Name Description
12 * 04/14/97 aliu Creation.
13 * 04/24/97 aliu Added getDefaultDataDirectory() and
14 * getDefaultLocaleID().
15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
16 * for assumed case. Non-UNIX platforms must be
17 * special-cased. Rewrote numeric methods dealing
18 * with NaN and Infinity to be platform independent
19 * over all IEEE 754 platforms.
20 * 05/13/97 aliu Restored sign of timezone
21 * (semantics are hours West of GMT)
22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23 * nextDouble..
24 * 07/22/98 stephen Added remainder, max, min, trunc
25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26 * 08/24/98 stephen Added longBitsFromDouble
27 * 09/08/98 stephen Minor changes for Mac Port
28 * 03/02/99 stephen Removed openFile(). Added AS400 support.
29 * Fixed EBCDIC tables
30 * 04/15/99 stephen Converted to C.
31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32 * 08/04/99 jeffrey R. Added OS/2 changes
33 * 11/15/99 helena Integrated S/390 IEEE support.
34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36 * 01/03/08 Steven L. Fake Time Support
37 ******************************************************************************
38 */
39
40 // Defines _XOPEN_SOURCE for access to POSIX functions.
41 // Must be before any other #includes.
42 #include "uposixdefs.h"
43
44 /* include ICU headers */
45 #include "unicode/utypes.h"
46 #include "unicode/putil.h"
47 #include "unicode/ustring.h"
48 #include "putilimp.h"
49 #include "uassert.h"
50 #include "umutex.h"
51 #include "cmemory.h"
52 #include "cstring.h"
53 #include "locmap.h"
54 #include "ucln_cmn.h"
55 #include "charstr.h"
56
57 /* Include standard headers. */
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <math.h>
62 #include <locale.h>
63 #include <float.h>
64
65 #ifndef U_COMMON_IMPLEMENTATION
66 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
67 #endif
68
69
70 /* include system headers */
71 #if U_PLATFORM_USES_ONLY_WIN32_API
72 /*
73 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
74 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
75 * to use native APIs as much as possible?
76 */
77 # define WIN32_LEAN_AND_MEAN
78 # define VC_EXTRALEAN
79 # define NOUSER
80 # define NOSERVICE
81 # define NOIME
82 # define NOMCX
83 # include <windows.h>
84 # include "wintz.h"
85 #elif U_PLATFORM == U_PF_OS400
86 # include <float.h>
87 # include <qusec.h> /* error code structure */
88 # include <qusrjobi.h>
89 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
90 # include <mih/testptr.h> /* For uprv_maximumPtr */
91 #elif U_PLATFORM == U_PF_OS390
92 # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
93 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
94 # include <limits.h>
95 # include <unistd.h>
96 # if U_PLATFORM == U_PF_SOLARIS
97 # ifndef _XPG4_2
98 # define _XPG4_2
99 # endif
100 # endif
101 #elif U_PLATFORM == U_PF_QNX
102 # include <sys/neutrino.h>
103 #endif
104
105 #if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__)
106 /* tzset isn't defined in strict ANSI on Cygwin and MinGW. */
107 #undef __STRICT_ANSI__
108 #endif
109
110 /*
111 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
112 */
113 #include <time.h>
114
115 #if !U_PLATFORM_USES_ONLY_WIN32_API
116 #include <sys/time.h>
117 #endif
118
119 /*
120 * Only include langinfo.h if we have a way to get the codeset. If we later
121 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
122 *
123 */
124
125 #if U_HAVE_NL_LANGINFO_CODESET
126 #include <langinfo.h>
127 #endif
128
129 /**
130 * Simple things (presence of functions, etc) should just go in configure.in and be added to
131 * icucfg.h via autoheader.
132 */
133 #if U_PLATFORM_IMPLEMENTS_POSIX
134 # if U_PLATFORM == U_PF_OS400
135 # define HAVE_DLFCN_H 0
136 # define HAVE_DLOPEN 0
137 # else
138 # ifndef HAVE_DLFCN_H
139 # define HAVE_DLFCN_H 1
140 # endif
141 # ifndef HAVE_DLOPEN
142 # define HAVE_DLOPEN 1
143 # endif
144 # endif
145 # ifndef HAVE_GETTIMEOFDAY
146 # define HAVE_GETTIMEOFDAY 1
147 # endif
148 #else
149 # define HAVE_DLFCN_H 0
150 # define HAVE_DLOPEN 0
151 # define HAVE_GETTIMEOFDAY 0
152 #endif
153
154 U_NAMESPACE_USE
155
156 /* Define the extension for data files, again... */
157 #define DATA_TYPE "dat"
158
159 /* Leave this copyright notice here! */
160 static const char copyright[] = U_COPYRIGHT_STRING;
161
162 /* floating point implementations ------------------------------------------- */
163
164 /* We return QNAN rather than SNAN*/
165 #define SIGN 0x80000000U
166
167 /* Make it easy to define certain types of constants */
168 typedef union {
169 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
170 double d64;
171 } BitPatternConversion;
172 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
173 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
174
175 /*---------------------------------------------------------------------------
176 Platform utilities
177 Our general strategy is to assume we're on a POSIX platform. Platforms which
178 are non-POSIX must declare themselves so. The default POSIX implementation
179 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
180 functions).
181 ---------------------------------------------------------------------------*/
182
183 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
184 # undef U_POSIX_LOCALE
185 #else
186 # define U_POSIX_LOCALE 1
187 #endif
188
189 /*
190 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
191 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
192 */
193 #if !IEEE_754
194 static char*
u_topNBytesOfDouble(double * d,int n)195 u_topNBytesOfDouble(double* d, int n)
196 {
197 #if U_IS_BIG_ENDIAN
198 return (char*)d;
199 #else
200 return (char*)(d + 1) - n;
201 #endif
202 }
203
204 static char*
u_bottomNBytesOfDouble(double * d,int n)205 u_bottomNBytesOfDouble(double* d, int n)
206 {
207 #if U_IS_BIG_ENDIAN
208 return (char*)(d + 1) - n;
209 #else
210 return (char*)d;
211 #endif
212 }
213 #endif /* !IEEE_754 */
214
215 #if IEEE_754
216 static UBool
u_signBit(double d)217 u_signBit(double d) {
218 uint8_t hiByte;
219 #if U_IS_BIG_ENDIAN
220 hiByte = *(uint8_t *)&d;
221 #else
222 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
223 #endif
224 return (hiByte & 0x80) != 0;
225 }
226 #endif
227
228
229
230 #if defined (U_DEBUG_FAKETIME)
231 /* Override the clock to test things without having to move the system clock.
232 * Assumes POSIX gettimeofday() will function
233 */
234 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
235 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
236 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
237 static UMutex fakeClockMutex = U_MUTEX_INTIALIZER;
238
getUTCtime_real()239 static UDate getUTCtime_real() {
240 struct timeval posixTime;
241 gettimeofday(&posixTime, NULL);
242 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
243 }
244
getUTCtime_fake()245 static UDate getUTCtime_fake() {
246 umtx_lock(&fakeClockMutex);
247 if(!fakeClock_set) {
248 UDate real = getUTCtime_real();
249 const char *fake_start = getenv("U_FAKETIME_START");
250 if((fake_start!=NULL) && (fake_start[0]!=0)) {
251 sscanf(fake_start,"%lf",&fakeClock_t0);
252 fakeClock_dt = fakeClock_t0 - real;
253 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
254 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
255 fakeClock_t0, fake_start, fakeClock_dt, real);
256 } else {
257 fakeClock_dt = 0;
258 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
259 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
260 }
261 fakeClock_set = TRUE;
262 }
263 umtx_unlock(&fakeClockMutex);
264
265 return getUTCtime_real() + fakeClock_dt;
266 }
267 #endif
268
269 #if U_PLATFORM_USES_ONLY_WIN32_API
270 typedef union {
271 int64_t int64;
272 FILETIME fileTime;
273 } FileTimeConversion; /* This is like a ULARGE_INTEGER */
274
275 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
276 #define EPOCH_BIAS INT64_C(116444736000000000)
277 #define HECTONANOSECOND_PER_MILLISECOND 10000
278
279 #endif
280
281 /*---------------------------------------------------------------------------
282 Universal Implementations
283 These are designed to work on all platforms. Try these, and if they
284 don't work on your platform, then special case your platform with new
285 implementations.
286 ---------------------------------------------------------------------------*/
287
288 U_CAPI UDate U_EXPORT2
uprv_getUTCtime()289 uprv_getUTCtime()
290 {
291 #if defined(U_DEBUG_FAKETIME)
292 return getUTCtime_fake(); /* Hook for overriding the clock */
293 #else
294 return uprv_getRawUTCtime();
295 #endif
296 }
297
298 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
299 U_CAPI UDate U_EXPORT2
uprv_getRawUTCtime()300 uprv_getRawUTCtime()
301 {
302 #if U_PLATFORM_USES_ONLY_WIN32_API
303
304 FileTimeConversion winTime;
305 GetSystemTimeAsFileTime(&winTime.fileTime);
306 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
307 #else
308
309 #if HAVE_GETTIMEOFDAY
310 struct timeval posixTime;
311 gettimeofday(&posixTime, NULL);
312 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
313 #else
314 time_t epochtime;
315 time(&epochtime);
316 return (UDate)epochtime * U_MILLIS_PER_SECOND;
317 #endif
318
319 #endif
320 }
321
322 /*-----------------------------------------------------------------------------
323 IEEE 754
324 These methods detect and return NaN and infinity values for doubles
325 conforming to IEEE 754. Platforms which support this standard include X86,
326 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
327 If this doesn't work on your platform, you have non-IEEE floating-point, and
328 will need to code your own versions. A naive implementation is to return 0.0
329 for getNaN and getInfinity, and false for isNaN and isInfinite.
330 ---------------------------------------------------------------------------*/
331
332 U_CAPI UBool U_EXPORT2
uprv_isNaN(double number)333 uprv_isNaN(double number)
334 {
335 #if IEEE_754
336 BitPatternConversion convertedNumber;
337 convertedNumber.d64 = number;
338 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
339 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
340
341 #elif U_PLATFORM == U_PF_OS390
342 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
343 sizeof(uint32_t));
344 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
345 sizeof(uint32_t));
346
347 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
348 (lowBits == 0x00000000L);
349
350 #else
351 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
352 /* you'll need to replace this default implementation with what's correct*/
353 /* for your platform.*/
354 return number != number;
355 #endif
356 }
357
358 U_CAPI UBool U_EXPORT2
uprv_isInfinite(double number)359 uprv_isInfinite(double number)
360 {
361 #if IEEE_754
362 BitPatternConversion convertedNumber;
363 convertedNumber.d64 = number;
364 /* Infinity is exactly 0x7FF0000000000000U. */
365 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
366 #elif U_PLATFORM == U_PF_OS390
367 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
368 sizeof(uint32_t));
369 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
370 sizeof(uint32_t));
371
372 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
373
374 #else
375 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
376 /* value, you'll need to replace this default implementation with what's*/
377 /* correct for your platform.*/
378 return number == (2.0 * number);
379 #endif
380 }
381
382 U_CAPI UBool U_EXPORT2
uprv_isPositiveInfinity(double number)383 uprv_isPositiveInfinity(double number)
384 {
385 #if IEEE_754 || U_PLATFORM == U_PF_OS390
386 return (UBool)(number > 0 && uprv_isInfinite(number));
387 #else
388 return uprv_isInfinite(number);
389 #endif
390 }
391
392 U_CAPI UBool U_EXPORT2
uprv_isNegativeInfinity(double number)393 uprv_isNegativeInfinity(double number)
394 {
395 #if IEEE_754 || U_PLATFORM == U_PF_OS390
396 return (UBool)(number < 0 && uprv_isInfinite(number));
397
398 #else
399 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
400 sizeof(uint32_t));
401 return((highBits & SIGN) && uprv_isInfinite(number));
402
403 #endif
404 }
405
406 U_CAPI double U_EXPORT2
uprv_getNaN()407 uprv_getNaN()
408 {
409 #if IEEE_754 || U_PLATFORM == U_PF_OS390
410 return gNan.d64;
411 #else
412 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
413 /* you'll need to replace this default implementation with what's correct*/
414 /* for your platform.*/
415 return 0.0;
416 #endif
417 }
418
419 U_CAPI double U_EXPORT2
uprv_getInfinity()420 uprv_getInfinity()
421 {
422 #if IEEE_754 || U_PLATFORM == U_PF_OS390
423 return gInf.d64;
424 #else
425 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
426 /* value, you'll need to replace this default implementation with what's*/
427 /* correct for your platform.*/
428 return 0.0;
429 #endif
430 }
431
432 U_CAPI double U_EXPORT2
uprv_floor(double x)433 uprv_floor(double x)
434 {
435 return floor(x);
436 }
437
438 U_CAPI double U_EXPORT2
uprv_ceil(double x)439 uprv_ceil(double x)
440 {
441 return ceil(x);
442 }
443
444 U_CAPI double U_EXPORT2
uprv_round(double x)445 uprv_round(double x)
446 {
447 return uprv_floor(x + 0.5);
448 }
449
450 U_CAPI double U_EXPORT2
uprv_fabs(double x)451 uprv_fabs(double x)
452 {
453 return fabs(x);
454 }
455
456 U_CAPI double U_EXPORT2
uprv_modf(double x,double * y)457 uprv_modf(double x, double* y)
458 {
459 return modf(x, y);
460 }
461
462 U_CAPI double U_EXPORT2
uprv_fmod(double x,double y)463 uprv_fmod(double x, double y)
464 {
465 return fmod(x, y);
466 }
467
468 U_CAPI double U_EXPORT2
uprv_pow(double x,double y)469 uprv_pow(double x, double y)
470 {
471 /* This is declared as "double pow(double x, double y)" */
472 return pow(x, y);
473 }
474
475 U_CAPI double U_EXPORT2
uprv_pow10(int32_t x)476 uprv_pow10(int32_t x)
477 {
478 return pow(10.0, (double)x);
479 }
480
481 U_CAPI double U_EXPORT2
uprv_fmax(double x,double y)482 uprv_fmax(double x, double y)
483 {
484 #if IEEE_754
485 /* first handle NaN*/
486 if(uprv_isNaN(x) || uprv_isNaN(y))
487 return uprv_getNaN();
488
489 /* check for -0 and 0*/
490 if(x == 0.0 && y == 0.0 && u_signBit(x))
491 return y;
492
493 #endif
494
495 /* this should work for all flt point w/o NaN and Inf special cases */
496 return (x > y ? x : y);
497 }
498
499 U_CAPI double U_EXPORT2
uprv_fmin(double x,double y)500 uprv_fmin(double x, double y)
501 {
502 #if IEEE_754
503 /* first handle NaN*/
504 if(uprv_isNaN(x) || uprv_isNaN(y))
505 return uprv_getNaN();
506
507 /* check for -0 and 0*/
508 if(x == 0.0 && y == 0.0 && u_signBit(y))
509 return y;
510
511 #endif
512
513 /* this should work for all flt point w/o NaN and Inf special cases */
514 return (x > y ? y : x);
515 }
516
517 /**
518 * Truncates the given double.
519 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
520 * This is different than calling floor() or ceil():
521 * floor(3.3) = 3, floor(-3.3) = -4
522 * ceil(3.3) = 4, ceil(-3.3) = -3
523 */
524 U_CAPI double U_EXPORT2
uprv_trunc(double d)525 uprv_trunc(double d)
526 {
527 #if IEEE_754
528 /* handle error cases*/
529 if(uprv_isNaN(d))
530 return uprv_getNaN();
531 if(uprv_isInfinite(d))
532 return uprv_getInfinity();
533
534 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
535 return ceil(d);
536 else
537 return floor(d);
538
539 #else
540 return d >= 0 ? floor(d) : ceil(d);
541
542 #endif
543 }
544
545 /**
546 * Return the largest positive number that can be represented by an integer
547 * type of arbitrary bit length.
548 */
549 U_CAPI double U_EXPORT2
uprv_maxMantissa(void)550 uprv_maxMantissa(void)
551 {
552 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
553 }
554
555 U_CAPI double U_EXPORT2
uprv_log(double d)556 uprv_log(double d)
557 {
558 return log(d);
559 }
560
561 U_CAPI void * U_EXPORT2
uprv_maximumPtr(void * base)562 uprv_maximumPtr(void * base)
563 {
564 #if U_PLATFORM == U_PF_OS400
565 /*
566 * With the provided function we should never be out of range of a given segment
567 * (a traditional/typical segment that is). Our segments have 5 bytes for the
568 * id and 3 bytes for the offset. The key is that the casting takes care of
569 * only retrieving the offset portion minus x1000. Hence, the smallest offset
570 * seen in a program is x001000 and when casted to an int would be 0.
571 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
572 *
573 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
574 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
575 * This function determines the activation based on the pointer that is passed in and
576 * calculates the appropriate maximum available size for
577 * each pointer type (TERASPACE and non-TERASPACE)
578 *
579 * Unlike other operating systems, the pointer model isn't determined at
580 * compile time on i5/OS.
581 */
582 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
583 /* if it is a TERASPACE pointer the max is 2GB - 4k */
584 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
585 }
586 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
587 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
588
589 #else
590 return U_MAX_PTR(base);
591 #endif
592 }
593
594 /*---------------------------------------------------------------------------
595 Platform-specific Implementations
596 Try these, and if they don't work on your platform, then special case your
597 platform with new implementations.
598 ---------------------------------------------------------------------------*/
599
600 /* Generic time zone layer -------------------------------------------------- */
601
602 /* Time zone utilities */
603 U_CAPI void U_EXPORT2
uprv_tzset()604 uprv_tzset()
605 {
606 #if defined(U_TZSET)
607 U_TZSET();
608 #else
609 /* no initialization*/
610 #endif
611 }
612
613 U_CAPI int32_t U_EXPORT2
uprv_timezone()614 uprv_timezone()
615 {
616 #ifdef U_TIMEZONE
617 return U_TIMEZONE;
618 #else
619 time_t t, t1, t2;
620 struct tm tmrec;
621 int32_t tdiff = 0;
622
623 time(&t);
624 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
625 #if U_PLATFORM != U_PF_IPHONE
626 UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
627 #endif
628 t1 = mktime(&tmrec); /* local time in seconds*/
629 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
630 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
631 tdiff = t2 - t1;
632
633 #if U_PLATFORM != U_PF_IPHONE
634 /* imitate NT behaviour, which returns same timezone offset to GMT for
635 winter and summer.
636 This does not work on all platforms. For instance, on glibc on Linux
637 and on Mac OS 10.5, tdiff calculated above remains the same
638 regardless of whether DST is in effect or not. iOS is another
639 platform where this does not work. Linux + glibc and Mac OS 10.5
640 have U_TIMEZONE defined so that this code is not reached.
641 */
642 if (dst_checked)
643 tdiff += 3600;
644 #endif
645 return tdiff;
646 #endif
647 }
648
649 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
650 some platforms need to have it declared here. */
651
652 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API))
653 /* RS6000 and others reject char **tzname. */
654 extern U_IMPORT char *U_TZNAME[];
655 #endif
656
657 #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
658 /* These platforms are likely to use Olson timezone IDs. */
659 #define CHECK_LOCALTIME_LINK 1
660 #if U_PLATFORM_IS_DARWIN_BASED
661 #include <tzfile.h>
662 #define TZZONEINFO (TZDIR "/")
663 #elif U_PLATFORM == U_PF_SOLARIS
664 #define TZDEFAULT "/etc/localtime"
665 #define TZZONEINFO "/usr/share/lib/zoneinfo/"
666 #define TZZONEINFO2 "../usr/share/lib/zoneinfo/"
667 #define TZ_ENV_CHECK "localtime"
668 #else
669 #define TZDEFAULT "/etc/localtime"
670 #define TZZONEINFO "/usr/share/zoneinfo/"
671 #endif
672 #if U_HAVE_DIRENT_H
673 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
674 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
675 symlinked to /etc/localtime, which makes searchForTZFile return
676 'localtime' when it's the first match. */
677 #define TZFILE_SKIP2 "localtime"
678 #define SEARCH_TZFILE
679 #include <dirent.h> /* Needed to search through system timezone files */
680 #endif
681 static char gTimeZoneBuffer[PATH_MAX];
682 static char *gTimeZoneBufferPtr = NULL;
683 #endif
684
685 #if !U_PLATFORM_USES_ONLY_WIN32_API
686 #define isNonDigit(ch) (ch < '0' || '9' < ch)
isValidOlsonID(const char * id)687 static UBool isValidOlsonID(const char *id) {
688 int32_t idx = 0;
689
690 /* Determine if this is something like Iceland (Olson ID)
691 or AST4ADT (non-Olson ID) */
692 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
693 idx++;
694 }
695
696 /* If we went through the whole string, then it might be okay.
697 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
698 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
699 The rest of the time it could be an Olson ID. George */
700 return (UBool)(id[idx] == 0
701 || uprv_strcmp(id, "PST8PDT") == 0
702 || uprv_strcmp(id, "MST7MDT") == 0
703 || uprv_strcmp(id, "CST6CDT") == 0
704 || uprv_strcmp(id, "EST5EDT") == 0);
705 }
706
707 /* On some Unix-like OS, 'posix' subdirectory in
708 /usr/share/zoneinfo replicates the top-level contents. 'right'
709 subdirectory has the same set of files, but individual files
710 are different from those in the top-level directory or 'posix'
711 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
712 has files for UTC.
713 When the first match for /etc/localtime is in either of them
714 (usually in posix because 'right' has different file contents),
715 or TZ environment variable points to one of them, createTimeZone
716 fails because, say, 'posix/America/New_York' is not an Olson
717 timezone id ('America/New_York' is). So, we have to skip
718 'posix/' and 'right/' at the beginning. */
skipZoneIDPrefix(const char ** id)719 static void skipZoneIDPrefix(const char** id) {
720 if (uprv_strncmp(*id, "posix/", 6) == 0
721 || uprv_strncmp(*id, "right/", 6) == 0)
722 {
723 *id += 6;
724 }
725 }
726 #endif
727
728 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
729
730 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
731 typedef struct OffsetZoneMapping {
732 int32_t offsetSeconds;
733 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
734 const char *stdID;
735 const char *dstID;
736 const char *olsonID;
737 } OffsetZoneMapping;
738
739 enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
740
741 /*
742 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
743 and maps it to an Olson ID.
744 Before adding anything to this list, take a look at
745 icu/source/tools/tzcode/tz.alias
746 Sometimes no daylight savings (0) is important to define due to aliases.
747 This list can be tested with icu/source/test/compat/tzone.pl
748 More values could be added to daylightType to increase precision.
749 */
750 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
751 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
752 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
753 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
754 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
755 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
756 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
757 {-36000, 2, "EST", "EST", "Australia/Sydney"},
758 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
759 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
760 {-34200, 2, "CST", "CST", "Australia/South"},
761 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
762 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
763 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
764 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
765 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
766 {-28800, 2, "WST", "WST", "Australia/West"},
767 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
768 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
769 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
770 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
771 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
772 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
773 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
774 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
775 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
776 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
777 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
778 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
779 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
780 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
781 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
782 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
783 {0, 1, "GMT", "IST", "Europe/Dublin"},
784 {0, 1, "GMT", "BST", "Europe/London"},
785 {0, 0, "WET", "WEST", "Africa/Casablanca"},
786 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
787 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
788 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
789 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
790 {10800, 2, "UYT", "UYST", "America/Montevideo"},
791 {10800, 1, "WGT", "WGST", "America/Godthab"},
792 {10800, 2, "BRT", "BRST", "Brazil/East"},
793 {12600, 1, "NST", "NDT", "America/St_Johns"},
794 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
795 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
796 {14400, 2, "CLT", "CLST", "Chile/Continental"},
797 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
798 {14400, 2, "PYT", "PYST", "America/Asuncion"},
799 {18000, 1, "CST", "CDT", "America/Havana"},
800 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
801 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
802 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
803 {21600, 0, "CST", "CDT", "America/Guatemala"},
804 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
805 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
806 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
807 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
808 {32400, 1, "AKST", "AKDT", "US/Alaska"},
809 {36000, 1, "HAST", "HADT", "US/Aleutian"}
810 };
811
812 /*#define DEBUG_TZNAME*/
813
remapShortTimeZone(const char * stdID,const char * dstID,int32_t daylightType,int32_t offset)814 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
815 {
816 int32_t idx;
817 #ifdef DEBUG_TZNAME
818 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
819 #endif
820 for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
821 {
822 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
823 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
824 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
825 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
826 {
827 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
828 }
829 }
830 return NULL;
831 }
832 #endif
833
834 #ifdef SEARCH_TZFILE
835 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
836 #define MAX_READ_SIZE 512
837
838 typedef struct DefaultTZInfo {
839 char* defaultTZBuffer;
840 int64_t defaultTZFileSize;
841 FILE* defaultTZFilePtr;
842 UBool defaultTZstatus;
843 int32_t defaultTZPosition;
844 } DefaultTZInfo;
845
846 /*
847 * This method compares the two files given to see if they are a match.
848 * It is currently use to compare two TZ files.
849 */
compareBinaryFiles(const char * defaultTZFileName,const char * TZFileName,DefaultTZInfo * tzInfo)850 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
851 FILE* file;
852 int64_t sizeFile;
853 int64_t sizeFileLeft;
854 int32_t sizeFileRead;
855 int32_t sizeFileToRead;
856 char bufferFile[MAX_READ_SIZE];
857 UBool result = TRUE;
858
859 if (tzInfo->defaultTZFilePtr == NULL) {
860 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
861 }
862 file = fopen(TZFileName, "r");
863
864 tzInfo->defaultTZPosition = 0; /* reset position to begin search */
865
866 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
867 /* First check that the file size are equal. */
868 if (tzInfo->defaultTZFileSize == 0) {
869 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
870 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
871 }
872 fseek(file, 0, SEEK_END);
873 sizeFile = ftell(file);
874 sizeFileLeft = sizeFile;
875
876 if (sizeFile != tzInfo->defaultTZFileSize) {
877 result = FALSE;
878 } else {
879 /* Store the data from the files in seperate buffers and
880 * compare each byte to determine equality.
881 */
882 if (tzInfo->defaultTZBuffer == NULL) {
883 rewind(tzInfo->defaultTZFilePtr);
884 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
885 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
886 }
887 rewind(file);
888 while(sizeFileLeft > 0) {
889 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
890 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
891
892 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
893 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
894 result = FALSE;
895 break;
896 }
897 sizeFileLeft -= sizeFileRead;
898 tzInfo->defaultTZPosition += sizeFileRead;
899 }
900 }
901 } else {
902 result = FALSE;
903 }
904
905 if (file != NULL) {
906 fclose(file);
907 }
908
909 return result;
910 }
911 /*
912 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
913 */
914 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
915 #define SKIP1 "."
916 #define SKIP2 ".."
917 static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
searchForTZFile(const char * path,DefaultTZInfo * tzInfo)918 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
919 char curpath[MAX_PATH_SIZE];
920 DIR* dirp = opendir(path);
921 DIR* subDirp = NULL;
922 struct dirent* dirEntry = NULL;
923
924 char* result = NULL;
925 if (dirp == NULL) {
926 return result;
927 }
928
929 /* Save the current path */
930 uprv_memset(curpath, 0, MAX_PATH_SIZE);
931 uprv_strcpy(curpath, path);
932
933 /* Check each entry in the directory. */
934 while((dirEntry = readdir(dirp)) != NULL) {
935 const char* dirName = dirEntry->d_name;
936 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
937 /* Create a newpath with the new entry to test each entry in the directory. */
938 char newpath[MAX_PATH_SIZE];
939 uprv_strcpy(newpath, curpath);
940 uprv_strcat(newpath, dirName);
941
942 if ((subDirp = opendir(newpath)) != NULL) {
943 /* If this new path is a directory, make a recursive call with the newpath. */
944 closedir(subDirp);
945 uprv_strcat(newpath, "/");
946 result = searchForTZFile(newpath, tzInfo);
947 /*
948 Have to get out here. Otherwise, we'd keep looking
949 and return the first match in the top-level directory
950 if there's a match in the top-level. If not, this function
951 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
952 It worked without this in most cases because we have a fallback of calling
953 localtime_r to figure out the default timezone.
954 */
955 if (result != NULL)
956 break;
957 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
958 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
959 const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
960 skipZoneIDPrefix(&zoneid);
961 uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
962 result = SEARCH_TZFILE_RESULT;
963 /* Get out after the first one found. */
964 break;
965 }
966 }
967 }
968 }
969 closedir(dirp);
970 return result;
971 }
972 #endif
973 U_CAPI const char* U_EXPORT2
uprv_tzname(int n)974 uprv_tzname(int n)
975 {
976 const char *tzid = NULL;
977 #if U_PLATFORM_USES_ONLY_WIN32_API
978 tzid = uprv_detectWindowsTimeZone();
979
980 if (tzid != NULL) {
981 return tzid;
982 }
983 #else
984
985 /*#if U_PLATFORM_IS_DARWIN_BASED
986 int ret;
987
988 tzid = getenv("TZFILE");
989 if (tzid != NULL) {
990 return tzid;
991 }
992 #endif*/
993
994 /* This code can be temporarily disabled to test tzname resolution later on. */
995 #ifndef DEBUG_TZNAME
996 tzid = getenv("TZ");
997 if (tzid != NULL && isValidOlsonID(tzid)
998 #if U_PLATFORM == U_PF_SOLARIS
999 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1000 && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1001 #endif
1002 ) {
1003 /* This might be a good Olson ID. */
1004 skipZoneIDPrefix(&tzid);
1005 return tzid;
1006 }
1007 /* else U_TZNAME will give a better result. */
1008 #endif
1009
1010 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1011 /* Caller must handle threading issues */
1012 if (gTimeZoneBufferPtr == NULL) {
1013 /*
1014 This is a trick to look at the name of the link to get the Olson ID
1015 because the tzfile contents is underspecified.
1016 This isn't guaranteed to work because it may not be a symlink.
1017 */
1018 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
1019 if (0 < ret) {
1020 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1021 gTimeZoneBuffer[ret] = 0;
1022 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1023 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1024 {
1025 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1026 }
1027 #if U_PLATFORM == U_PF_SOLARIS
1028 else
1029 {
1030 tzZoneInfoLen = uprv_strlen(TZZONEINFO2);
1031 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO2, tzZoneInfoLen) == 0
1032 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1033 {
1034 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1035 }
1036 }
1037 #endif
1038 } else {
1039 #if defined(SEARCH_TZFILE)
1040 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1041 if (tzInfo != NULL) {
1042 tzInfo->defaultTZBuffer = NULL;
1043 tzInfo->defaultTZFileSize = 0;
1044 tzInfo->defaultTZFilePtr = NULL;
1045 tzInfo->defaultTZstatus = FALSE;
1046 tzInfo->defaultTZPosition = 0;
1047
1048 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1049
1050 /* Free previously allocated memory */
1051 if (tzInfo->defaultTZBuffer != NULL) {
1052 uprv_free(tzInfo->defaultTZBuffer);
1053 }
1054 if (tzInfo->defaultTZFilePtr != NULL) {
1055 fclose(tzInfo->defaultTZFilePtr);
1056 }
1057 uprv_free(tzInfo);
1058 }
1059
1060 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1061 return gTimeZoneBufferPtr;
1062 }
1063 #endif
1064 }
1065 }
1066 else {
1067 return gTimeZoneBufferPtr;
1068 }
1069 #endif
1070 #endif
1071
1072 #ifdef U_TZNAME
1073 #if U_PLATFORM_USES_ONLY_WIN32_API
1074 /* The return value is free'd in timezone.cpp on Windows because
1075 * the other code path returns a pointer to a heap location. */
1076 return uprv_strdup(U_TZNAME[n]);
1077 #else
1078 /*
1079 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1080 So we remap the abbreviation to an olson ID.
1081
1082 Since Windows exposes a little more timezone information,
1083 we normally don't use this code on Windows because
1084 uprv_detectWindowsTimeZone should have already given the correct answer.
1085 */
1086 {
1087 struct tm juneSol, decemberSol;
1088 int daylightType;
1089 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1090 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1091
1092 /* This probing will tell us when daylight savings occurs. */
1093 localtime_r(&juneSolstice, &juneSol);
1094 localtime_r(&decemberSolstice, &decemberSol);
1095 if(decemberSol.tm_isdst > 0) {
1096 daylightType = U_DAYLIGHT_DECEMBER;
1097 } else if(juneSol.tm_isdst > 0) {
1098 daylightType = U_DAYLIGHT_JUNE;
1099 } else {
1100 daylightType = U_DAYLIGHT_NONE;
1101 }
1102 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1103 if (tzid != NULL) {
1104 return tzid;
1105 }
1106 }
1107 return U_TZNAME[n];
1108 #endif
1109 #else
1110 return "";
1111 #endif
1112 }
1113
1114 /* Get and set the ICU data directory --------------------------------------- */
1115
1116 static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
1117 static char *gDataDirectory = NULL;
1118
1119 UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
1120 static CharString *gTimeZoneFilesDirectory = NULL;
1121
1122 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1123 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1124 #endif
1125
putil_cleanup(void)1126 static UBool U_CALLCONV putil_cleanup(void)
1127 {
1128 if (gDataDirectory && *gDataDirectory) {
1129 uprv_free(gDataDirectory);
1130 }
1131 gDataDirectory = NULL;
1132 gDataDirInitOnce.reset();
1133
1134 delete gTimeZoneFilesDirectory;
1135 gTimeZoneFilesDirectory = NULL;
1136 gTimeZoneFilesInitOnce.reset();
1137
1138 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1139 if (gCorrectedPOSIXLocale) {
1140 uprv_free(gCorrectedPOSIXLocale);
1141 gCorrectedPOSIXLocale = NULL;
1142 }
1143 #endif
1144 return TRUE;
1145 }
1146
1147 /*
1148 * Set the data directory.
1149 * Make a copy of the passed string, and set the global data dir to point to it.
1150 */
1151 U_CAPI void U_EXPORT2
u_setDataDirectory(const char * directory)1152 u_setDataDirectory(const char *directory) {
1153 char *newDataDir;
1154 int32_t length;
1155
1156 if(directory==NULL || *directory==0) {
1157 /* A small optimization to prevent the malloc and copy when the
1158 shared library is used, and this is a way to make sure that NULL
1159 is never returned.
1160 */
1161 newDataDir = (char *)"";
1162 }
1163 else {
1164 length=(int32_t)uprv_strlen(directory);
1165 newDataDir = (char *)uprv_malloc(length + 2);
1166 /* Exit out if newDataDir could not be created. */
1167 if (newDataDir == NULL) {
1168 return;
1169 }
1170 uprv_strcpy(newDataDir, directory);
1171
1172 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1173 {
1174 char *p;
1175 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1176 *p = U_FILE_SEP_CHAR;
1177 }
1178 }
1179 #endif
1180 }
1181
1182 if (gDataDirectory && *gDataDirectory) {
1183 uprv_free(gDataDirectory);
1184 }
1185 gDataDirectory = newDataDir;
1186 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1187 }
1188
1189 U_CAPI UBool U_EXPORT2
uprv_pathIsAbsolute(const char * path)1190 uprv_pathIsAbsolute(const char *path)
1191 {
1192 if(!path || !*path) {
1193 return FALSE;
1194 }
1195
1196 if(*path == U_FILE_SEP_CHAR) {
1197 return TRUE;
1198 }
1199
1200 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1201 if(*path == U_FILE_ALT_SEP_CHAR) {
1202 return TRUE;
1203 }
1204 #endif
1205
1206 #if U_PLATFORM_USES_ONLY_WIN32_API
1207 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1208 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1209 path[1] == ':' ) {
1210 return TRUE;
1211 }
1212 #endif
1213
1214 return FALSE;
1215 }
1216
1217 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1218 until some client wrapper makefiles are updated */
1219 #if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
1220 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1221 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1222 # endif
1223 #endif
1224
dataDirectoryInitFn()1225 static void U_CALLCONV dataDirectoryInitFn() {
1226 /* If we already have the directory, then return immediately. Will happen if user called
1227 * u_setDataDirectory().
1228 */
1229 if (gDataDirectory) {
1230 return;
1231 }
1232
1233 const char *path = NULL;
1234 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1235 char datadir_path_buffer[PATH_MAX];
1236 #endif
1237
1238 /*
1239 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1240 override ICU's data with the ICU_DATA environment variable. This prevents
1241 problems where multiple custom copies of ICU's specific version of data
1242 are installed on a system. Either the application must define the data
1243 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1244 ICU, set the data with udata_setCommonData or trust that all of the
1245 required data is contained in ICU's data library that contains
1246 the entry point defined by U_ICUDATA_ENTRY_POINT.
1247
1248 There may also be some platforms where environment variables
1249 are not allowed.
1250 */
1251 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1252 /* First try to get the environment variable */
1253 path=getenv("ICU_DATA");
1254 # endif
1255
1256 /* ICU_DATA_DIR may be set as a compile option.
1257 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1258 * and is used only when data is built in archive mode eliminating the need
1259 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1260 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1261 * set their own path.
1262 */
1263 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1264 if(path==NULL || *path==0) {
1265 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1266 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1267 # endif
1268 # ifdef ICU_DATA_DIR
1269 path=ICU_DATA_DIR;
1270 # else
1271 path=U_ICU_DATA_DEFAULT_DIR;
1272 # endif
1273 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1274 if (prefix != NULL) {
1275 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1276 path=datadir_path_buffer;
1277 }
1278 # endif
1279 }
1280 #endif
1281
1282 if(path==NULL) {
1283 /* It looks really bad, set it to something. */
1284 path = "";
1285 }
1286
1287 u_setDataDirectory(path);
1288 return;
1289 }
1290
1291 U_CAPI const char * U_EXPORT2
u_getDataDirectory(void)1292 u_getDataDirectory(void) {
1293 umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
1294 return gDataDirectory;
1295 }
1296
setTimeZoneFilesDir(const char * path,UErrorCode & status)1297 static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1298 if (U_FAILURE(status)) {
1299 return;
1300 }
1301 gTimeZoneFilesDirectory->clear();
1302 gTimeZoneFilesDirectory->append(path, status);
1303 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1304 char *p = gTimeZoneFilesDirectory->data();
1305 while (p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) {
1306 *p = U_FILE_SEP_CHAR;
1307 }
1308 #endif
1309 }
1310
1311 #define TO_STRING(x) TO_STRING_2(x)
1312 #define TO_STRING_2(x) #x
1313
TimeZoneDataDirInitFn(UErrorCode & status)1314 static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1315 U_ASSERT(gTimeZoneFilesDirectory == NULL);
1316 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1317 gTimeZoneFilesDirectory = new CharString();
1318 if (gTimeZoneFilesDirectory == NULL) {
1319 status = U_MEMORY_ALLOCATION_ERROR;
1320 return;
1321 }
1322 const char *dir = getenv("ICU_TIMEZONE_FILES_DIR");
1323 #if defined(U_TIMEZONE_FILES_DIR)
1324 if (dir == NULL) {
1325 dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1326 }
1327 #endif
1328 if (dir == NULL) {
1329 dir = "";
1330 }
1331 setTimeZoneFilesDir(dir, status);
1332 }
1333
1334
1335 U_CAPI const char * U_EXPORT2
u_getTimeZoneFilesDirectory(UErrorCode * status)1336 u_getTimeZoneFilesDirectory(UErrorCode *status) {
1337 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1338 return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1339 }
1340
1341 U_CAPI void U_EXPORT2
u_setTimeZoneFilesDirectory(const char * path,UErrorCode * status)1342 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1343 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1344 setTimeZoneFilesDir(path, *status);
1345
1346 // Note: this function does some extra churn, first setting based on the
1347 // environment, then immediately replacing with the value passed in.
1348 // The logic is simpler that way, and performance shouldn't be an issue.
1349 }
1350
1351
1352 #if U_POSIX_LOCALE
1353 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1354 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1355 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1356 */
uprv_getPOSIXIDForCategory(int category)1357 static const char *uprv_getPOSIXIDForCategory(int category)
1358 {
1359 const char* posixID = NULL;
1360 if (category == LC_MESSAGES || category == LC_CTYPE) {
1361 /*
1362 * On Solaris two different calls to setlocale can result in
1363 * different values. Only get this value once.
1364 *
1365 * We must check this first because an application can set this.
1366 *
1367 * LC_ALL can't be used because it's platform dependent. The LANG
1368 * environment variable seems to affect LC_CTYPE variable by default.
1369 * Here is what setlocale(LC_ALL, NULL) can return.
1370 * HPUX can return 'C C C C C C C'
1371 * Solaris can return /en_US/C/C/C/C/C on the second try.
1372 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1373 *
1374 * The default codepage detection also needs to use LC_CTYPE.
1375 *
1376 * Do not call setlocale(LC_*, "")! Using an empty string instead
1377 * of NULL, will modify the libc behavior.
1378 */
1379 posixID = setlocale(category, NULL);
1380 if ((posixID == 0)
1381 || (uprv_strcmp("C", posixID) == 0)
1382 || (uprv_strcmp("POSIX", posixID) == 0))
1383 {
1384 /* Maybe we got some garbage. Try something more reasonable */
1385 posixID = getenv("LC_ALL");
1386 if (posixID == 0) {
1387 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1388 if (posixID == 0) {
1389 posixID = getenv("LANG");
1390 }
1391 }
1392 }
1393 }
1394 if ((posixID==0)
1395 || (uprv_strcmp("C", posixID) == 0)
1396 || (uprv_strcmp("POSIX", posixID) == 0))
1397 {
1398 /* Nothing worked. Give it a nice POSIX default value. */
1399 posixID = "en_US_POSIX";
1400 }
1401 return posixID;
1402 }
1403
1404 /* Return just the POSIX id for the default locale, whatever happens to be in
1405 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1406 */
uprv_getPOSIXIDForDefaultLocale(void)1407 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1408 {
1409 static const char* posixID = NULL;
1410 if (posixID == 0) {
1411 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1412 }
1413 return posixID;
1414 }
1415
1416 #if !U_CHARSET_IS_UTF8
1417 /* Return just the POSIX id for the default codepage, whatever happens to be in
1418 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1419 */
uprv_getPOSIXIDForDefaultCodepage(void)1420 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1421 {
1422 static const char* posixID = NULL;
1423 if (posixID == 0) {
1424 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1425 }
1426 return posixID;
1427 }
1428 #endif
1429 #endif
1430
1431 /* NOTE: The caller should handle thread safety */
1432 U_CAPI const char* U_EXPORT2
uprv_getDefaultLocaleID()1433 uprv_getDefaultLocaleID()
1434 {
1435 #if U_POSIX_LOCALE
1436 /*
1437 Note that: (a '!' means the ID is improper somehow)
1438 LC_ALL ----> default_loc codepage
1439 --------------------------------------------------------
1440 ab.CD ab CD
1441 ab@CD ab__CD -
1442 ab@CD.EF ab__CD EF
1443
1444 ab_CD.EF@GH ab_CD_GH EF
1445
1446 Some 'improper' ways to do the same as above:
1447 ! ab_CD@GH.EF ab_CD_GH EF
1448 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1449 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1450
1451 _CD@GH _CD_GH -
1452 _CD.EF@GH _CD_GH EF
1453
1454 The variant cannot have dots in it.
1455 The 'rightmost' variant (@xxx) wins.
1456 The leftmost codepage (.xxx) wins.
1457 */
1458 char *correctedPOSIXLocale = 0;
1459 const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1460 const char *p;
1461 const char *q;
1462 int32_t len;
1463
1464 /* Format: (no spaces)
1465 ll [ _CC ] [ . MM ] [ @ VV]
1466
1467 l = lang, C = ctry, M = charmap, V = variant
1468 */
1469
1470 if (gCorrectedPOSIXLocale != NULL) {
1471 return gCorrectedPOSIXLocale;
1472 }
1473
1474 if ((p = uprv_strchr(posixID, '.')) != NULL) {
1475 /* assume new locale can't be larger than old one? */
1476 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1477 /* Exit on memory allocation error. */
1478 if (correctedPOSIXLocale == NULL) {
1479 return NULL;
1480 }
1481 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1482 correctedPOSIXLocale[p-posixID] = 0;
1483
1484 /* do not copy after the @ */
1485 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1486 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1487 }
1488 }
1489
1490 /* Note that we scan the *uncorrected* ID. */
1491 if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1492 if (correctedPOSIXLocale == NULL) {
1493 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1494 /* Exit on memory allocation error. */
1495 if (correctedPOSIXLocale == NULL) {
1496 return NULL;
1497 }
1498 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1499 correctedPOSIXLocale[p-posixID] = 0;
1500 }
1501 p++;
1502
1503 /* Take care of any special cases here.. */
1504 if (!uprv_strcmp(p, "nynorsk")) {
1505 p = "NY";
1506 /* Don't worry about no__NY. In practice, it won't appear. */
1507 }
1508
1509 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1510 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1511 }
1512 else {
1513 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1514 }
1515
1516 if ((q = uprv_strchr(p, '.')) != NULL) {
1517 /* How big will the resulting string be? */
1518 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1519 uprv_strncat(correctedPOSIXLocale, p, q-p);
1520 correctedPOSIXLocale[len] = 0;
1521 }
1522 else {
1523 /* Anything following the @ sign */
1524 uprv_strcat(correctedPOSIXLocale, p);
1525 }
1526
1527 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1528 * How about 'russian' -> 'ru'?
1529 * Many of the other locales using ISO codes will be handled by the
1530 * canonicalization functions in uloc_getDefault.
1531 */
1532 }
1533
1534 /* Was a correction made? */
1535 if (correctedPOSIXLocale != NULL) {
1536 posixID = correctedPOSIXLocale;
1537 }
1538 else {
1539 /* copy it, just in case the original pointer goes away. See j2395 */
1540 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1541 /* Exit on memory allocation error. */
1542 if (correctedPOSIXLocale == NULL) {
1543 return NULL;
1544 }
1545 posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1546 }
1547
1548 if (gCorrectedPOSIXLocale == NULL) {
1549 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1550 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1551 correctedPOSIXLocale = NULL;
1552 }
1553
1554 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
1555 uprv_free(correctedPOSIXLocale);
1556 }
1557
1558 return posixID;
1559
1560 #elif U_PLATFORM_USES_ONLY_WIN32_API
1561 #define POSIX_LOCALE_CAPACITY 64
1562 UErrorCode status = U_ZERO_ERROR;
1563 char *correctedPOSIXLocale = 0;
1564
1565 if (gCorrectedPOSIXLocale != NULL) {
1566 return gCorrectedPOSIXLocale;
1567 }
1568
1569 LCID id = GetThreadLocale();
1570 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1571 if (correctedPOSIXLocale) {
1572 int32_t posixLen = uprv_convertToPosix(id, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1573 if (U_SUCCESS(status)) {
1574 *(correctedPOSIXLocale + posixLen) = 0;
1575 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1576 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1577 } else {
1578 uprv_free(correctedPOSIXLocale);
1579 }
1580 }
1581
1582 if (gCorrectedPOSIXLocale == NULL) {
1583 return "en_US";
1584 }
1585 return gCorrectedPOSIXLocale;
1586
1587 #elif U_PLATFORM == U_PF_OS400
1588 /* locales are process scoped and are by definition thread safe */
1589 static char correctedLocale[64];
1590 const char *localeID = getenv("LC_ALL");
1591 char *p;
1592
1593 if (localeID == NULL)
1594 localeID = getenv("LANG");
1595 if (localeID == NULL)
1596 localeID = setlocale(LC_ALL, NULL);
1597 /* Make sure we have something... */
1598 if (localeID == NULL)
1599 return "en_US_POSIX";
1600
1601 /* Extract the locale name from the path. */
1602 if((p = uprv_strrchr(localeID, '/')) != NULL)
1603 {
1604 /* Increment p to start of locale name. */
1605 p++;
1606 localeID = p;
1607 }
1608
1609 /* Copy to work location. */
1610 uprv_strcpy(correctedLocale, localeID);
1611
1612 /* Strip off the '.locale' extension. */
1613 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1614 *p = 0;
1615 }
1616
1617 /* Upper case the locale name. */
1618 T_CString_toUpperCase(correctedLocale);
1619
1620 /* See if we are using the POSIX locale. Any of the
1621 * following are equivalent and use the same QLGPGCMA
1622 * (POSIX) locale.
1623 * QLGPGCMA2 means UCS2
1624 * QLGPGCMA_4 means UTF-32
1625 * QLGPGCMA_8 means UTF-8
1626 */
1627 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1628 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1629 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1630 {
1631 uprv_strcpy(correctedLocale, "en_US_POSIX");
1632 }
1633 else
1634 {
1635 int16_t LocaleLen;
1636
1637 /* Lower case the lang portion. */
1638 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1639 {
1640 *p = uprv_tolower(*p);
1641 }
1642
1643 /* Adjust for Euro. After '_E' add 'URO'. */
1644 LocaleLen = uprv_strlen(correctedLocale);
1645 if (correctedLocale[LocaleLen - 2] == '_' &&
1646 correctedLocale[LocaleLen - 1] == 'E')
1647 {
1648 uprv_strcat(correctedLocale, "URO");
1649 }
1650
1651 /* If using Lotus-based locale then convert to
1652 * equivalent non Lotus.
1653 */
1654 else if (correctedLocale[LocaleLen - 2] == '_' &&
1655 correctedLocale[LocaleLen - 1] == 'L')
1656 {
1657 correctedLocale[LocaleLen - 2] = 0;
1658 }
1659
1660 /* There are separate simplified and traditional
1661 * locales called zh_HK_S and zh_HK_T.
1662 */
1663 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1664 {
1665 uprv_strcpy(correctedLocale, "zh_HK");
1666 }
1667
1668 /* A special zh_CN_GBK locale...
1669 */
1670 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1671 {
1672 uprv_strcpy(correctedLocale, "zh_CN");
1673 }
1674
1675 }
1676
1677 return correctedLocale;
1678 #endif
1679
1680 }
1681
1682 #if !U_CHARSET_IS_UTF8
1683 #if U_POSIX_LOCALE
1684 /*
1685 Due to various platform differences, one platform may specify a charset,
1686 when they really mean a different charset. Remap the names so that they are
1687 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1688 here. Before adding anything to this function, please consider adding unique
1689 names to the ICU alias table in the data directory.
1690 */
1691 static const char*
remapPlatformDependentCodepage(const char * locale,const char * name)1692 remapPlatformDependentCodepage(const char *locale, const char *name) {
1693 if (locale != NULL && *locale == 0) {
1694 /* Make sure that an empty locale is handled the same way. */
1695 locale = NULL;
1696 }
1697 if (name == NULL) {
1698 return NULL;
1699 }
1700 #if U_PLATFORM == U_PF_AIX
1701 if (uprv_strcmp(name, "IBM-943") == 0) {
1702 /* Use the ASCII compatible ibm-943 */
1703 name = "Shift-JIS";
1704 }
1705 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1706 /* Use the windows-1252 that contains the Euro */
1707 name = "IBM-5348";
1708 }
1709 #elif U_PLATFORM == U_PF_SOLARIS
1710 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1711 /* Solaris underspecifies the "EUC" name. */
1712 if (uprv_strcmp(locale, "zh_CN") == 0) {
1713 name = "EUC-CN";
1714 }
1715 else if (uprv_strcmp(locale, "zh_TW") == 0) {
1716 name = "EUC-TW";
1717 }
1718 else if (uprv_strcmp(locale, "ko_KR") == 0) {
1719 name = "EUC-KR";
1720 }
1721 }
1722 else if (uprv_strcmp(name, "eucJP") == 0) {
1723 /*
1724 ibm-954 is the best match.
1725 ibm-33722 is the default for eucJP (similar to Windows).
1726 */
1727 name = "eucjis";
1728 }
1729 else if (uprv_strcmp(name, "646") == 0) {
1730 /*
1731 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1732 * ISO-8859-1 instead of US-ASCII(646).
1733 */
1734 name = "ISO-8859-1";
1735 }
1736 #elif U_PLATFORM_IS_DARWIN_BASED
1737 if (locale == NULL && *name == 0) {
1738 /*
1739 No locale was specified, and an empty name was passed in.
1740 This usually indicates that nl_langinfo didn't return valid information.
1741 Mac OS X uses UTF-8 by default (especially the locale data and console).
1742 */
1743 name = "UTF-8";
1744 }
1745 else if (uprv_strcmp(name, "CP949") == 0) {
1746 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1747 name = "EUC-KR";
1748 }
1749 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1750 /*
1751 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1752 */
1753 name = "UTF-8";
1754 }
1755 #elif U_PLATFORM == U_PF_BSD
1756 if (uprv_strcmp(name, "CP949") == 0) {
1757 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1758 name = "EUC-KR";
1759 }
1760 #elif U_PLATFORM == U_PF_HPUX
1761 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1762 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1763 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1764 name = "hkbig5";
1765 }
1766 else if (uprv_strcmp(name, "eucJP") == 0) {
1767 /*
1768 ibm-1350 is the best match, but unavailable.
1769 ibm-954 is mostly a superset of ibm-1350.
1770 ibm-33722 is the default for eucJP (similar to Windows).
1771 */
1772 name = "eucjis";
1773 }
1774 #elif U_PLATFORM == U_PF_LINUX
1775 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1776 /* Linux underspecifies the "EUC" name. */
1777 if (uprv_strcmp(locale, "korean") == 0) {
1778 name = "EUC-KR";
1779 }
1780 else if (uprv_strcmp(locale, "japanese") == 0) {
1781 /* See comment below about eucJP */
1782 name = "eucjis";
1783 }
1784 }
1785 else if (uprv_strcmp(name, "eucjp") == 0) {
1786 /*
1787 ibm-1350 is the best match, but unavailable.
1788 ibm-954 is mostly a superset of ibm-1350.
1789 ibm-33722 is the default for eucJP (similar to Windows).
1790 */
1791 name = "eucjis";
1792 }
1793 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1794 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1795 /*
1796 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1797 */
1798 name = "UTF-8";
1799 }
1800 /*
1801 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1802 * it by falling back to 'US-ASCII' when NULL is returned from this
1803 * function. So, we don't have to worry about it here.
1804 */
1805 #endif
1806 /* return NULL when "" is passed in */
1807 if (*name == 0) {
1808 name = NULL;
1809 }
1810 return name;
1811 }
1812
1813 static const char*
getCodepageFromPOSIXID(const char * localeName,char * buffer,int32_t buffCapacity)1814 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1815 {
1816 char localeBuf[100];
1817 const char *name = NULL;
1818 char *variant = NULL;
1819
1820 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1821 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1822 uprv_strncpy(localeBuf, localeName, localeCapacity);
1823 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1824 name = uprv_strncpy(buffer, name+1, buffCapacity);
1825 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1826 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
1827 *variant = 0;
1828 }
1829 name = remapPlatformDependentCodepage(localeBuf, name);
1830 }
1831 return name;
1832 }
1833 #endif
1834
1835 static const char*
int_getDefaultCodepage()1836 int_getDefaultCodepage()
1837 {
1838 #if U_PLATFORM == U_PF_OS400
1839 uint32_t ccsid = 37; /* Default to ibm-37 */
1840 static char codepage[64];
1841 Qwc_JOBI0400_t jobinfo;
1842 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1843
1844 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1845 "* ", " ", &error);
1846
1847 if (error.Bytes_Available == 0) {
1848 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1849 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1850 }
1851 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1852 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1853 }
1854 /* else use the default */
1855 }
1856 sprintf(codepage,"ibm-%d", ccsid);
1857 return codepage;
1858
1859 #elif U_PLATFORM == U_PF_OS390
1860 static char codepage[64];
1861
1862 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1863 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1864 codepage[63] = 0; /* NULL terminate */
1865
1866 return codepage;
1867
1868 #elif U_PLATFORM_USES_ONLY_WIN32_API
1869 static char codepage[64];
1870 sprintf(codepage, "windows-%d", GetACP());
1871 return codepage;
1872
1873 #elif U_POSIX_LOCALE
1874 static char codesetName[100];
1875 const char *localeName = NULL;
1876 const char *name = NULL;
1877
1878 localeName = uprv_getPOSIXIDForDefaultCodepage();
1879 uprv_memset(codesetName, 0, sizeof(codesetName));
1880 #if U_HAVE_NL_LANGINFO_CODESET
1881 /* When available, check nl_langinfo first because it usually gives more
1882 useful names. It depends on LC_CTYPE.
1883 nl_langinfo may use the same buffer as setlocale. */
1884 {
1885 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1886 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
1887 /*
1888 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1889 * instead of ASCII.
1890 */
1891 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1892 codeset = remapPlatformDependentCodepage(localeName, codeset);
1893 } else
1894 #endif
1895 {
1896 codeset = remapPlatformDependentCodepage(NULL, codeset);
1897 }
1898
1899 if (codeset != NULL) {
1900 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1901 codesetName[sizeof(codesetName)-1] = 0;
1902 return codesetName;
1903 }
1904 }
1905 #endif
1906
1907 /* Use setlocale in a nice way, and then check some environment variables.
1908 Maybe the application used setlocale already.
1909 */
1910 uprv_memset(codesetName, 0, sizeof(codesetName));
1911 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1912 if (name) {
1913 /* if we can find the codeset name from setlocale, return that. */
1914 return name;
1915 }
1916
1917 if (*codesetName == 0)
1918 {
1919 /* Everything failed. Return US ASCII (ISO 646). */
1920 (void)uprv_strcpy(codesetName, "US-ASCII");
1921 }
1922 return codesetName;
1923 #else
1924 return "US-ASCII";
1925 #endif
1926 }
1927
1928
1929 U_CAPI const char* U_EXPORT2
uprv_getDefaultCodepage()1930 uprv_getDefaultCodepage()
1931 {
1932 static char const *name = NULL;
1933 umtx_lock(NULL);
1934 if (name == NULL) {
1935 name = int_getDefaultCodepage();
1936 }
1937 umtx_unlock(NULL);
1938 return name;
1939 }
1940 #endif /* !U_CHARSET_IS_UTF8 */
1941
1942
1943 /* end of platform-specific implementation -------------- */
1944
1945 /* version handling --------------------------------------------------------- */
1946
1947 U_CAPI void U_EXPORT2
u_versionFromString(UVersionInfo versionArray,const char * versionString)1948 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1949 char *end;
1950 uint16_t part=0;
1951
1952 if(versionArray==NULL) {
1953 return;
1954 }
1955
1956 if(versionString!=NULL) {
1957 for(;;) {
1958 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1959 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1960 break;
1961 }
1962 versionString=end+1;
1963 }
1964 }
1965
1966 while(part<U_MAX_VERSION_LENGTH) {
1967 versionArray[part++]=0;
1968 }
1969 }
1970
1971 U_CAPI void U_EXPORT2
u_versionFromUString(UVersionInfo versionArray,const UChar * versionString)1972 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
1973 if(versionArray!=NULL && versionString!=NULL) {
1974 char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
1975 int32_t len = u_strlen(versionString);
1976 if(len>U_MAX_VERSION_STRING_LENGTH) {
1977 len = U_MAX_VERSION_STRING_LENGTH;
1978 }
1979 u_UCharsToChars(versionString, versionChars, len);
1980 versionChars[len]=0;
1981 u_versionFromString(versionArray, versionChars);
1982 }
1983 }
1984
1985 U_CAPI void U_EXPORT2
u_versionToString(const UVersionInfo versionArray,char * versionString)1986 u_versionToString(const UVersionInfo versionArray, char *versionString) {
1987 uint16_t count, part;
1988 uint8_t field;
1989
1990 if(versionString==NULL) {
1991 return;
1992 }
1993
1994 if(versionArray==NULL) {
1995 versionString[0]=0;
1996 return;
1997 }
1998
1999 /* count how many fields need to be written */
2000 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2001 }
2002
2003 if(count <= 1) {
2004 count = 2;
2005 }
2006
2007 /* write the first part */
2008 /* write the decimal field value */
2009 field=versionArray[0];
2010 if(field>=100) {
2011 *versionString++=(char)('0'+field/100);
2012 field%=100;
2013 }
2014 if(field>=10) {
2015 *versionString++=(char)('0'+field/10);
2016 field%=10;
2017 }
2018 *versionString++=(char)('0'+field);
2019
2020 /* write the following parts */
2021 for(part=1; part<count; ++part) {
2022 /* write a dot first */
2023 *versionString++=U_VERSION_DELIMITER;
2024
2025 /* write the decimal field value */
2026 field=versionArray[part];
2027 if(field>=100) {
2028 *versionString++=(char)('0'+field/100);
2029 field%=100;
2030 }
2031 if(field>=10) {
2032 *versionString++=(char)('0'+field/10);
2033 field%=10;
2034 }
2035 *versionString++=(char)('0'+field);
2036 }
2037
2038 /* NUL-terminate */
2039 *versionString=0;
2040 }
2041
2042 U_CAPI void U_EXPORT2
u_getVersion(UVersionInfo versionArray)2043 u_getVersion(UVersionInfo versionArray) {
2044 (void)copyright; // Suppress unused variable warning from clang.
2045 u_versionFromString(versionArray, U_ICU_VERSION);
2046 }
2047
2048 /**
2049 * icucfg.h dependent code
2050 */
2051
2052 #if U_ENABLE_DYLOAD
2053
2054 #if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2055
2056 #if HAVE_DLFCN_H
2057
2058 #ifdef __MVS__
2059 #ifndef __SUSV3
2060 #define __SUSV3 1
2061 #endif
2062 #endif
2063 #include <dlfcn.h>
2064 #endif
2065
2066 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2067 uprv_dl_open(const char *libName, UErrorCode *status) {
2068 void *ret = NULL;
2069 if(U_FAILURE(*status)) return ret;
2070 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2071 if(ret==NULL) {
2072 #ifdef U_TRACE_DYLOAD
2073 printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2074 #endif
2075 *status = U_MISSING_RESOURCE_ERROR;
2076 }
2077 return ret;
2078 }
2079
2080 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2081 uprv_dl_close(void *lib, UErrorCode *status) {
2082 if(U_FAILURE(*status)) return;
2083 dlclose(lib);
2084 }
2085
2086 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void * lib,const char * sym,UErrorCode * status)2087 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2088 union {
2089 UVoidFunction *fp;
2090 void *vp;
2091 } uret;
2092 uret.fp = NULL;
2093 if(U_FAILURE(*status)) return uret.fp;
2094 uret.vp = dlsym(lib, sym);
2095 if(uret.vp == NULL) {
2096 #ifdef U_TRACE_DYLOAD
2097 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2098 #endif
2099 *status = U_MISSING_RESOURCE_ERROR;
2100 }
2101 return uret.fp;
2102 }
2103
2104 #else
2105
2106 /* null (nonexistent) implementation. */
2107
2108 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2109 uprv_dl_open(const char *libName, UErrorCode *status) {
2110 if(U_FAILURE(*status)) return NULL;
2111 *status = U_UNSUPPORTED_ERROR;
2112 return NULL;
2113 }
2114
2115 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2116 uprv_dl_close(void *lib, UErrorCode *status) {
2117 if(U_FAILURE(*status)) return;
2118 *status = U_UNSUPPORTED_ERROR;
2119 return;
2120 }
2121
2122
2123 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void * lib,const char * sym,UErrorCode * status)2124 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2125 if(U_SUCCESS(*status)) {
2126 *status = U_UNSUPPORTED_ERROR;
2127 }
2128 return (UVoidFunction*)NULL;
2129 }
2130
2131
2132
2133 #endif
2134
2135 #elif U_PLATFORM_USES_ONLY_WIN32_API
2136
2137 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2138 uprv_dl_open(const char *libName, UErrorCode *status) {
2139 HMODULE lib = NULL;
2140
2141 if(U_FAILURE(*status)) return NULL;
2142
2143 lib = LoadLibraryA(libName);
2144
2145 if(lib==NULL) {
2146 *status = U_MISSING_RESOURCE_ERROR;
2147 }
2148
2149 return (void*)lib;
2150 }
2151
2152 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2153 uprv_dl_close(void *lib, UErrorCode *status) {
2154 HMODULE handle = (HMODULE)lib;
2155 if(U_FAILURE(*status)) return;
2156
2157 FreeLibrary(handle);
2158
2159 return;
2160 }
2161
2162
2163 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void * lib,const char * sym,UErrorCode * status)2164 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2165 HMODULE handle = (HMODULE)lib;
2166 UVoidFunction* addr = NULL;
2167
2168 if(U_FAILURE(*status) || lib==NULL) return NULL;
2169
2170 addr = (UVoidFunction*)GetProcAddress(handle, sym);
2171
2172 if(addr==NULL) {
2173 DWORD lastError = GetLastError();
2174 if(lastError == ERROR_PROC_NOT_FOUND) {
2175 *status = U_MISSING_RESOURCE_ERROR;
2176 } else {
2177 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2178 }
2179 }
2180
2181 return addr;
2182 }
2183
2184
2185 #else
2186
2187 /* No dynamic loading set. */
2188
2189 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2190 uprv_dl_open(const char *libName, UErrorCode *status) {
2191 if(U_FAILURE(*status)) return NULL;
2192 *status = U_UNSUPPORTED_ERROR;
2193 return NULL;
2194 }
2195
2196 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2197 uprv_dl_close(void *lib, UErrorCode *status) {
2198 if(U_FAILURE(*status)) return;
2199 *status = U_UNSUPPORTED_ERROR;
2200 return;
2201 }
2202
2203
2204 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void * lib,const char * sym,UErrorCode * status)2205 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2206 if(U_SUCCESS(*status)) {
2207 *status = U_UNSUPPORTED_ERROR;
2208 }
2209 return (UVoidFunction*)NULL;
2210 }
2211
2212 #endif /* U_ENABLE_DYLOAD */
2213
2214 /*
2215 * Hey, Emacs, please set the following:
2216 *
2217 * Local Variables:
2218 * indent-tabs-mode: nil
2219 * End:
2220 *
2221 */
2222