1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1997-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10 *
11 * Date Name Description
12 * 04/14/97 aliu Creation.
13 * 04/24/97 aliu Added getDefaultDataDirectory() and
14 * getDefaultLocaleID().
15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
16 * for assumed case. Non-UNIX platforms must be
17 * special-cased. Rewrote numeric methods dealing
18 * with NaN and Infinity to be platform independent
19 * over all IEEE 754 platforms.
20 * 05/13/97 aliu Restored sign of timezone
21 * (semantics are hours West of GMT)
22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23 * nextDouble..
24 * 07/22/98 stephen Added remainder, max, min, trunc
25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26 * 08/24/98 stephen Added longBitsFromDouble
27 * 09/08/98 stephen Minor changes for Mac Port
28 * 03/02/99 stephen Removed openFile(). Added AS400 support.
29 * Fixed EBCDIC tables
30 * 04/15/99 stephen Converted to C.
31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32 * 08/04/99 jeffrey R. Added OS/2 changes
33 * 11/15/99 helena Integrated S/390 IEEE support.
34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36 * 01/03/08 Steven L. Fake Time Support
37 ******************************************************************************
38 */
39
40 /* Define _XOPEN_SOURCE for Solaris and friends. */
41 /* NetBSD needs it to be >= 4 */
42 #if !defined(_XOPEN_SOURCE)
43 #if __STDC_VERSION__ >= 199901L
44 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */
45 #define _XOPEN_SOURCE 600
46 #else
47 #define _XOPEN_SOURCE 4
48 #endif
49 #endif
50
51 /* Make sure things like readlink and such functions work.
52 Poorly upgraded Solaris machines can't have this defined.
53 Cleanly installed Solaris can use this #define.
54 */
55 #if !defined(_XOPEN_SOURCE_EXTENDED) && ((!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L) || defined(__xlc__))
56 #define _XOPEN_SOURCE_EXTENDED 1
57 #endif
58
59 /* include ICU headers */
60 #include "unicode/utypes.h"
61 #include "unicode/putil.h"
62 #include "unicode/ustring.h"
63 #include "putilimp.h"
64 #include "uassert.h"
65 #include "umutex.h"
66 #include "cmemory.h"
67 #include "cstring.h"
68 #include "locmap.h"
69 #include "ucln_cmn.h"
70
71 /* Include standard headers. */
72 #include <stdio.h>
73 #include <stdlib.h>
74 #include <string.h>
75 #include <math.h>
76 #include <locale.h>
77 #include <float.h>
78
79 /* include system headers */
80 #if defined(U_WINDOWS) || defined(U_MINGW)
81 # define WIN32_LEAN_AND_MEAN
82 # define VC_EXTRALEAN
83 # define NOUSER
84 # define NOSERVICE
85 # define NOIME
86 # define NOMCX
87 # include <windows.h>
88 # include "wintz.h"
89 #elif defined(OS400)
90 # include <float.h>
91 # include <qusec.h> /* error code structure */
92 # include <qusrjobi.h>
93 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
94 # include <mih/testptr.h> /* For uprv_maximumPtr */
95 #elif defined(XP_MAC)
96 # include <Files.h>
97 # include <IntlResources.h>
98 # include <Script.h>
99 # include <Folders.h>
100 # include <MacTypes.h>
101 # include <TextUtils.h>
102 # define ICU_NO_USER_DATA_OVERRIDE 1
103 #elif defined(OS390)
104 # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
105 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
106 # include <limits.h>
107 # include <unistd.h>
108 #elif defined(U_QNX)
109 # include <sys/neutrino.h>
110 #elif defined(U_SOLARIS)
111 # ifndef _XPG4_2
112 # define _XPG4_2
113 # endif
114 #endif
115
116 #if (defined(U_CYGWIN) || defined(U_MINGW)) && defined(__STRICT_ANSI__)
117 /* tzset isn't defined in strict ANSI on Cygwin and MinGW. */
118 #undef __STRICT_ANSI__
119 #endif
120
121 /*
122 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
123 */
124 #include <time.h>
125
126 #if defined(U_DARWIN)
127 #include <TargetConditionals.h>
128 #endif
129
130 #ifndef U_WINDOWS
131 #include <sys/time.h>
132 #endif
133
134 /*
135 * Only include langinfo.h if we have a way to get the codeset. If we later
136 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
137 *
138 */
139
140 #if U_HAVE_NL_LANGINFO_CODESET
141 #include <langinfo.h>
142 #endif
143
144 /**
145 * Simple things (presence of functions, etc) should just go in configure.in and be added to
146 * icucfg.h via autoheader.
147 */
148 #if defined(U_HAVE_ICUCFG)
149 #include "icucfg.h"
150 #endif
151
152 /* Define the extension for data files, again... */
153 #define DATA_TYPE "dat"
154
155 /* Leave this copyright notice here! */
156 static const char copyright[] = U_COPYRIGHT_STRING;
157
158 /* floating point implementations ------------------------------------------- */
159
160 /* We return QNAN rather than SNAN*/
161 #define SIGN 0x80000000U
162
163 /* Make it easy to define certain types of constants */
164 typedef union {
165 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
166 double d64;
167 } BitPatternConversion;
168 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
169 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
170
171 /*---------------------------------------------------------------------------
172 Platform utilities
173 Our general strategy is to assume we're on a POSIX platform. Platforms which
174 are non-POSIX must declare themselves so. The default POSIX implementation
175 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
176 functions).
177 ---------------------------------------------------------------------------*/
178
179 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400) || defined(U_MINGW)
180 # undef U_POSIX_LOCALE
181 #else
182 # define U_POSIX_LOCALE 1
183 #endif
184
185 /*
186 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
187 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
188 */
189 #if !IEEE_754
190 static char*
u_topNBytesOfDouble(double * d,int n)191 u_topNBytesOfDouble(double* d, int n)
192 {
193 #if U_IS_BIG_ENDIAN
194 return (char*)d;
195 #else
196 return (char*)(d + 1) - n;
197 #endif
198 }
199
200 static char*
u_bottomNBytesOfDouble(double * d,int n)201 u_bottomNBytesOfDouble(double* d, int n)
202 {
203 #if U_IS_BIG_ENDIAN
204 return (char*)(d + 1) - n;
205 #else
206 return (char*)d;
207 #endif
208 }
209 #endif /* !IEEE_754 */
210
211 #if IEEE_754
212 static UBool
u_signBit(double d)213 u_signBit(double d) {
214 uint8_t hiByte;
215 #if U_IS_BIG_ENDIAN
216 hiByte = *(uint8_t *)&d;
217 #else
218 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
219 #endif
220 return (hiByte & 0x80) != 0;
221 }
222 #endif
223
224
225
226 #if defined (U_DEBUG_FAKETIME)
227 /* Override the clock to test things without having to move the system clock.
228 * Assumes POSIX gettimeofday() will function
229 */
230 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
231 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
232 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
233 static UMTX fakeClockMutex = NULL;
234
getUTCtime_real()235 static UDate getUTCtime_real() {
236 struct timeval posixTime;
237 gettimeofday(&posixTime, NULL);
238 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
239 }
240
getUTCtime_fake()241 static UDate getUTCtime_fake() {
242 umtx_lock(&fakeClockMutex);
243 if(!fakeClock_set) {
244 UDate real = getUTCtime_real();
245 const char *fake_start = getenv("U_FAKETIME_START");
246 if((fake_start!=NULL) && (fake_start[0]!=0)) {
247 sscanf(fake_start,"%lf",&fakeClock_t0);
248 fakeClock_dt = fakeClock_t0 - real;
249 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
250 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
251 fakeClock_t0, fake_start, fakeClock_dt, real);
252 } else {
253 fakeClock_dt = 0;
254 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
255 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
256 }
257 fakeClock_set = TRUE;
258 }
259 umtx_unlock(&fakeClockMutex);
260
261 return getUTCtime_real() + fakeClock_dt;
262 }
263 #endif
264
265 #if defined(U_WINDOWS)
266 typedef union {
267 int64_t int64;
268 FILETIME fileTime;
269 } FileTimeConversion; /* This is like a ULARGE_INTEGER */
270
271 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
272 #define EPOCH_BIAS INT64_C(116444736000000000)
273 #define HECTONANOSECOND_PER_MILLISECOND 10000
274
275 #endif
276
277 /*---------------------------------------------------------------------------
278 Universal Implementations
279 These are designed to work on all platforms. Try these, and if they
280 don't work on your platform, then special case your platform with new
281 implementations.
282 ---------------------------------------------------------------------------*/
283
284 U_CAPI UDate U_EXPORT2
uprv_getUTCtime()285 uprv_getUTCtime()
286 {
287 #if defined(U_DEBUG_FAKETIME)
288 return getUTCtime_fake(); /* Hook for overriding the clock */
289 #else
290 return uprv_getRawUTCtime();
291 #endif
292 }
293
294 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
295 U_CAPI UDate U_EXPORT2
uprv_getRawUTCtime()296 uprv_getRawUTCtime()
297 {
298 #if defined(XP_MAC)
299 time_t t, t1, t2;
300 struct tm tmrec;
301
302 uprv_memset( &tmrec, 0, sizeof(tmrec) );
303 tmrec.tm_year = 70;
304 tmrec.tm_mon = 0;
305 tmrec.tm_mday = 1;
306 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/
307
308 time(&t);
309 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
310 t2 = mktime(&tmrec); /* seconds of current GMT*/
311 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/
312 #elif defined(U_WINDOWS)
313
314 FileTimeConversion winTime;
315 GetSystemTimeAsFileTime(&winTime.fileTime);
316 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
317 #else
318
319 #if defined(HAVE_GETTIMEOFDAY)
320 struct timeval posixTime;
321 gettimeofday(&posixTime, NULL);
322 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
323 #else
324 time_t epochtime;
325 time(&epochtime);
326 return (UDate)epochtime * U_MILLIS_PER_SECOND;
327 #endif
328
329 #endif
330 }
331
332 /*-----------------------------------------------------------------------------
333 IEEE 754
334 These methods detect and return NaN and infinity values for doubles
335 conforming to IEEE 754. Platforms which support this standard include X86,
336 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
337 If this doesn't work on your platform, you have non-IEEE floating-point, and
338 will need to code your own versions. A naive implementation is to return 0.0
339 for getNaN and getInfinity, and false for isNaN and isInfinite.
340 ---------------------------------------------------------------------------*/
341
342 U_CAPI UBool U_EXPORT2
uprv_isNaN(double number)343 uprv_isNaN(double number)
344 {
345 #if IEEE_754
346 BitPatternConversion convertedNumber;
347 convertedNumber.d64 = number;
348 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
349 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
350
351 #elif defined(OS390)
352 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
353 sizeof(uint32_t));
354 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
355 sizeof(uint32_t));
356
357 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
358 (lowBits == 0x00000000L);
359
360 #else
361 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
362 /* you'll need to replace this default implementation with what's correct*/
363 /* for your platform.*/
364 return number != number;
365 #endif
366 }
367
368 U_CAPI UBool U_EXPORT2
uprv_isInfinite(double number)369 uprv_isInfinite(double number)
370 {
371 #if IEEE_754
372 BitPatternConversion convertedNumber;
373 convertedNumber.d64 = number;
374 /* Infinity is exactly 0x7FF0000000000000U. */
375 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
376 #elif defined(OS390)
377 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
378 sizeof(uint32_t));
379 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
380 sizeof(uint32_t));
381
382 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
383
384 #else
385 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
386 /* value, you'll need to replace this default implementation with what's*/
387 /* correct for your platform.*/
388 return number == (2.0 * number);
389 #endif
390 }
391
392 U_CAPI UBool U_EXPORT2
uprv_isPositiveInfinity(double number)393 uprv_isPositiveInfinity(double number)
394 {
395 #if IEEE_754 || defined(OS390)
396 return (UBool)(number > 0 && uprv_isInfinite(number));
397 #else
398 return uprv_isInfinite(number);
399 #endif
400 }
401
402 U_CAPI UBool U_EXPORT2
uprv_isNegativeInfinity(double number)403 uprv_isNegativeInfinity(double number)
404 {
405 #if IEEE_754 || defined(OS390)
406 return (UBool)(number < 0 && uprv_isInfinite(number));
407
408 #else
409 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
410 sizeof(uint32_t));
411 return((highBits & SIGN) && uprv_isInfinite(number));
412
413 #endif
414 }
415
416 U_CAPI double U_EXPORT2
uprv_getNaN()417 uprv_getNaN()
418 {
419 #if IEEE_754 || defined(OS390)
420 return gNan.d64;
421 #else
422 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
423 /* you'll need to replace this default implementation with what's correct*/
424 /* for your platform.*/
425 return 0.0;
426 #endif
427 }
428
429 U_CAPI double U_EXPORT2
uprv_getInfinity()430 uprv_getInfinity()
431 {
432 #if IEEE_754 || defined(OS390)
433 return gInf.d64;
434 #else
435 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
436 /* value, you'll need to replace this default implementation with what's*/
437 /* correct for your platform.*/
438 return 0.0;
439 #endif
440 }
441
442 U_CAPI double U_EXPORT2
uprv_floor(double x)443 uprv_floor(double x)
444 {
445 return floor(x);
446 }
447
448 U_CAPI double U_EXPORT2
uprv_ceil(double x)449 uprv_ceil(double x)
450 {
451 return ceil(x);
452 }
453
454 U_CAPI double U_EXPORT2
uprv_round(double x)455 uprv_round(double x)
456 {
457 return uprv_floor(x + 0.5);
458 }
459
460 U_CAPI double U_EXPORT2
uprv_fabs(double x)461 uprv_fabs(double x)
462 {
463 return fabs(x);
464 }
465
466 U_CAPI double U_EXPORT2
uprv_modf(double x,double * y)467 uprv_modf(double x, double* y)
468 {
469 return modf(x, y);
470 }
471
472 U_CAPI double U_EXPORT2
uprv_fmod(double x,double y)473 uprv_fmod(double x, double y)
474 {
475 return fmod(x, y);
476 }
477
478 U_CAPI double U_EXPORT2
uprv_pow(double x,double y)479 uprv_pow(double x, double y)
480 {
481 /* This is declared as "double pow(double x, double y)" */
482 return pow(x, y);
483 }
484
485 U_CAPI double U_EXPORT2
uprv_pow10(int32_t x)486 uprv_pow10(int32_t x)
487 {
488 return pow(10.0, (double)x);
489 }
490
491 U_CAPI double U_EXPORT2
uprv_fmax(double x,double y)492 uprv_fmax(double x, double y)
493 {
494 #if IEEE_754
495 /* first handle NaN*/
496 if(uprv_isNaN(x) || uprv_isNaN(y))
497 return uprv_getNaN();
498
499 /* check for -0 and 0*/
500 if(x == 0.0 && y == 0.0 && u_signBit(x))
501 return y;
502
503 #endif
504
505 /* this should work for all flt point w/o NaN and Inf special cases */
506 return (x > y ? x : y);
507 }
508
509 U_CAPI double U_EXPORT2
uprv_fmin(double x,double y)510 uprv_fmin(double x, double y)
511 {
512 #if IEEE_754
513 /* first handle NaN*/
514 if(uprv_isNaN(x) || uprv_isNaN(y))
515 return uprv_getNaN();
516
517 /* check for -0 and 0*/
518 if(x == 0.0 && y == 0.0 && u_signBit(y))
519 return y;
520
521 #endif
522
523 /* this should work for all flt point w/o NaN and Inf special cases */
524 return (x > y ? y : x);
525 }
526
527 /**
528 * Truncates the given double.
529 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
530 * This is different than calling floor() or ceil():
531 * floor(3.3) = 3, floor(-3.3) = -4
532 * ceil(3.3) = 4, ceil(-3.3) = -3
533 */
534 U_CAPI double U_EXPORT2
uprv_trunc(double d)535 uprv_trunc(double d)
536 {
537 #if IEEE_754
538 /* handle error cases*/
539 if(uprv_isNaN(d))
540 return uprv_getNaN();
541 if(uprv_isInfinite(d))
542 return uprv_getInfinity();
543
544 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
545 return ceil(d);
546 else
547 return floor(d);
548
549 #else
550 return d >= 0 ? floor(d) : ceil(d);
551
552 #endif
553 }
554
555 /**
556 * Return the largest positive number that can be represented by an integer
557 * type of arbitrary bit length.
558 */
559 U_CAPI double U_EXPORT2
uprv_maxMantissa(void)560 uprv_maxMantissa(void)
561 {
562 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
563 }
564
565 U_CAPI double U_EXPORT2
uprv_log(double d)566 uprv_log(double d)
567 {
568 return log(d);
569 }
570
571 U_CAPI void * U_EXPORT2
uprv_maximumPtr(void * base)572 uprv_maximumPtr(void * base)
573 {
574 #if defined(OS400)
575 /*
576 * With the provided function we should never be out of range of a given segment
577 * (a traditional/typical segment that is). Our segments have 5 bytes for the
578 * id and 3 bytes for the offset. The key is that the casting takes care of
579 * only retrieving the offset portion minus x1000. Hence, the smallest offset
580 * seen in a program is x001000 and when casted to an int would be 0.
581 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
582 *
583 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
584 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
585 * This function determines the activation based on the pointer that is passed in and
586 * calculates the appropriate maximum available size for
587 * each pointer type (TERASPACE and non-TERASPACE)
588 *
589 * Unlike other operating systems, the pointer model isn't determined at
590 * compile time on i5/OS.
591 */
592 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
593 /* if it is a TERASPACE pointer the max is 2GB - 4k */
594 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
595 }
596 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
597 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
598
599 #else
600 return U_MAX_PTR(base);
601 #endif
602 }
603
604 /*---------------------------------------------------------------------------
605 Platform-specific Implementations
606 Try these, and if they don't work on your platform, then special case your
607 platform with new implementations.
608 ---------------------------------------------------------------------------*/
609
610 /* Generic time zone layer -------------------------------------------------- */
611
612 /* Time zone utilities */
613 U_CAPI void U_EXPORT2
uprv_tzset()614 uprv_tzset()
615 {
616 #if defined(U_TZSET)
617 U_TZSET();
618 #else
619 /* no initialization*/
620 #endif
621 }
622
623 U_CAPI int32_t U_EXPORT2
uprv_timezone()624 uprv_timezone()
625 {
626 #ifdef U_TIMEZONE
627 return U_TIMEZONE;
628 #else
629 time_t t, t1, t2;
630 struct tm tmrec;
631 UBool dst_checked;
632 int32_t tdiff = 0;
633
634 time(&t);
635 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
636 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
637 t1 = mktime(&tmrec); /* local time in seconds*/
638 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
639 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
640 tdiff = t2 - t1;
641 /* imitate NT behaviour, which returns same timezone offset to GMT for
642 winter and summer*/
643 if (dst_checked)
644 tdiff += 3600;
645 return tdiff;
646 #endif
647 }
648
649 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
650 some platforms need to have it declared here. */
651
652 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
653 /* RS6000 and others reject char **tzname. */
654 extern U_IMPORT char *U_TZNAME[];
655 #endif
656
657 #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
658 /* These platforms are likely to use Olson timezone IDs. */
659 #define CHECK_LOCALTIME_LINK 1
660 #if defined(U_DARWIN)
661 #include <tzfile.h>
662 #define TZZONEINFO (TZDIR "/")
663 #else
664 #define TZDEFAULT "/etc/localtime"
665 #define TZZONEINFO "/usr/share/zoneinfo/"
666 #endif
667 #if U_HAVE_DIRENT_H
668 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
669 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
670 symlinked to /etc/localtime, which makes searchForTZFile return
671 'localtime' when it's the first match. */
672 #define TZFILE_SKIP2 "localtime"
673 #define SEARCH_TZFILE
674 #include <dirent.h> /* Needed to search through system timezone files */
675 #endif
676 static char gTimeZoneBuffer[PATH_MAX];
677 static char *gTimeZoneBufferPtr = NULL;
678 #endif
679
680 #ifndef U_WINDOWS
681 #define isNonDigit(ch) (ch < '0' || '9' < ch)
isValidOlsonID(const char * id)682 static UBool isValidOlsonID(const char *id) {
683 int32_t idx = 0;
684
685 /* Determine if this is something like Iceland (Olson ID)
686 or AST4ADT (non-Olson ID) */
687 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
688 idx++;
689 }
690
691 /* If we went through the whole string, then it might be okay.
692 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
693 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
694 The rest of the time it could be an Olson ID. George */
695 return (UBool)(id[idx] == 0
696 || uprv_strcmp(id, "PST8PDT") == 0
697 || uprv_strcmp(id, "MST7MDT") == 0
698 || uprv_strcmp(id, "CST6CDT") == 0
699 || uprv_strcmp(id, "EST5EDT") == 0);
700 }
701
702 /* On some Unix-like OS, 'posix' subdirectory in
703 /usr/share/zoneinfo replicates the top-level contents. 'right'
704 subdirectory has the same set of files, but individual files
705 are different from those in the top-level directory or 'posix'
706 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
707 has files for UTC.
708 When the first match for /etc/localtime is in either of them
709 (usually in posix because 'right' has different file contents),
710 or TZ environment variable points to one of them, createTimeZone
711 fails because, say, 'posix/America/New_York' is not an Olson
712 timezone id ('America/New_York' is). So, we have to skip
713 'posix/' and 'right/' at the beginning. */
skipZoneIDPrefix(const char ** id)714 static void skipZoneIDPrefix(const char** id) {
715 if (uprv_strncmp(*id, "posix/", 6) == 0
716 || uprv_strncmp(*id, "right/", 6) == 0)
717 {
718 *id += 6;
719 }
720 }
721 #endif
722
723 #if defined(U_TZNAME) && !defined(U_WINDOWS)
724
725 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
726 typedef struct OffsetZoneMapping {
727 int32_t offsetSeconds;
728 int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/
729 const char *stdID;
730 const char *dstID;
731 const char *olsonID;
732 } OffsetZoneMapping;
733
734 /*
735 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
736 and maps it to an Olson ID.
737 Before adding anything to this list, take a look at
738 icu/source/tools/tzcode/tz.alias
739 Sometimes no daylight savings (0) is important to define due to aliases.
740 This list can be tested with icu/source/test/compat/tzone.pl
741 More values could be added to daylightType to increase precision.
742 */
743 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
744 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
745 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
746 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
747 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
748 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
749 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
750 {-36000, 2, "EST", "EST", "Australia/Sydney"},
751 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
752 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
753 {-34200, 2, "CST", "CST", "Australia/South"},
754 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
755 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
756 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
757 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
758 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
759 {-28800, 2, "WST", "WST", "Australia/West"},
760 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
761 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
762 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
763 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
764 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
765 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
766 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
767 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
768 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
769 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
770 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
771 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
772 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
773 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
774 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
775 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
776 {0, 1, "GMT", "IST", "Europe/Dublin"},
777 {0, 1, "GMT", "BST", "Europe/London"},
778 {0, 0, "WET", "WEST", "Africa/Casablanca"},
779 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
780 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
781 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
782 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
783 {10800, 2, "UYT", "UYST", "America/Montevideo"},
784 {10800, 1, "WGT", "WGST", "America/Godthab"},
785 {10800, 2, "BRT", "BRST", "Brazil/East"},
786 {12600, 1, "NST", "NDT", "America/St_Johns"},
787 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
788 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
789 {14400, 2, "CLT", "CLST", "Chile/Continental"},
790 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
791 {14400, 2, "PYT", "PYST", "America/Asuncion"},
792 {18000, 1, "CST", "CDT", "America/Havana"},
793 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
794 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
795 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
796 {21600, 0, "CST", "CDT", "America/Guatemala"},
797 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
798 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
799 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
800 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
801 {32400, 1, "AKST", "AKDT", "US/Alaska"},
802 {36000, 1, "HAST", "HADT", "US/Aleutian"}
803 };
804
805 /*#define DEBUG_TZNAME*/
806
remapShortTimeZone(const char * stdID,const char * dstID,int32_t daylightType,int32_t offset)807 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
808 {
809 int32_t idx;
810 #ifdef DEBUG_TZNAME
811 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
812 #endif
813 for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++)
814 {
815 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
816 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
817 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
818 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
819 {
820 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
821 }
822 }
823 return NULL;
824 }
825 #endif
826
827 #ifdef SEARCH_TZFILE
828 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
829 #define MAX_READ_SIZE 512
830
831 typedef struct DefaultTZInfo {
832 char* defaultTZBuffer;
833 int64_t defaultTZFileSize;
834 FILE* defaultTZFilePtr;
835 UBool defaultTZstatus;
836 int32_t defaultTZPosition;
837 } DefaultTZInfo;
838
839 /*
840 * This method compares the two files given to see if they are a match.
841 * It is currently use to compare two TZ files.
842 */
compareBinaryFiles(const char * defaultTZFileName,const char * TZFileName,DefaultTZInfo * tzInfo)843 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
844 FILE* file;
845 int64_t sizeFile;
846 int64_t sizeFileLeft;
847 int32_t sizeFileRead;
848 int32_t sizeFileToRead;
849 char bufferFile[MAX_READ_SIZE];
850 UBool result = TRUE;
851
852 if (tzInfo->defaultTZFilePtr == NULL) {
853 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
854 }
855 file = fopen(TZFileName, "r");
856
857 tzInfo->defaultTZPosition = 0; /* reset position to begin search */
858
859 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
860 /* First check that the file size are equal. */
861 if (tzInfo->defaultTZFileSize == 0) {
862 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
863 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
864 }
865 fseek(file, 0, SEEK_END);
866 sizeFile = ftell(file);
867 sizeFileLeft = sizeFile;
868
869 if (sizeFile != tzInfo->defaultTZFileSize) {
870 result = FALSE;
871 } else {
872 /* Store the data from the files in seperate buffers and
873 * compare each byte to determine equality.
874 */
875 if (tzInfo->defaultTZBuffer == NULL) {
876 rewind(tzInfo->defaultTZFilePtr);
877 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
878 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
879 }
880 rewind(file);
881 while(sizeFileLeft > 0) {
882 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
883 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
884
885 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
886 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
887 result = FALSE;
888 break;
889 }
890 sizeFileLeft -= sizeFileRead;
891 tzInfo->defaultTZPosition += sizeFileRead;
892 }
893 }
894 } else {
895 result = FALSE;
896 }
897
898 if (file != NULL) {
899 fclose(file);
900 }
901
902 return result;
903 }
904 /*
905 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
906 */
907 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
908 #define SKIP1 "."
909 #define SKIP2 ".."
910 static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
searchForTZFile(const char * path,DefaultTZInfo * tzInfo)911 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
912 char curpath[MAX_PATH_SIZE];
913 DIR* dirp = opendir(path);
914 DIR* subDirp = NULL;
915 struct dirent* dirEntry = NULL;
916
917 char* result = NULL;
918 if (dirp == NULL) {
919 return result;
920 }
921
922 /* Save the current path */
923 uprv_memset(curpath, 0, MAX_PATH_SIZE);
924 uprv_strcpy(curpath, path);
925
926 /* Check each entry in the directory. */
927 while((dirEntry = readdir(dirp)) != NULL) {
928 const char* dirName = dirEntry->d_name;
929 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
930 /* Create a newpath with the new entry to test each entry in the directory. */
931 char newpath[MAX_PATH_SIZE];
932 uprv_strcpy(newpath, curpath);
933 uprv_strcat(newpath, dirName);
934
935 if ((subDirp = opendir(newpath)) != NULL) {
936 /* If this new path is a directory, make a recursive call with the newpath. */
937 closedir(subDirp);
938 uprv_strcat(newpath, "/");
939 result = searchForTZFile(newpath, tzInfo);
940 /*
941 Have to get out here. Otherwise, we'd keep looking
942 and return the first match in the top-level directory
943 if there's a match in the top-level. If not, this function
944 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
945 It worked without this in most cases because we have a fallback of calling
946 localtime_r to figure out the default timezone.
947 */
948 if (result != NULL)
949 break;
950 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
951 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
952 const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
953 skipZoneIDPrefix(&zoneid);
954 uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
955 result = SEARCH_TZFILE_RESULT;
956 /* Get out after the first one found. */
957 break;
958 }
959 }
960 }
961 }
962 closedir(dirp);
963 return result;
964 }
965 #endif
966 U_CAPI const char* U_EXPORT2
uprv_tzname(int n)967 uprv_tzname(int n)
968 {
969 const char *tzid = NULL;
970 #ifdef U_WINDOWS
971 tzid = uprv_detectWindowsTimeZone();
972
973 if (tzid != NULL) {
974 return tzid;
975 }
976 #else
977
978 /*#if defined(U_DARWIN)
979 int ret;
980
981 tzid = getenv("TZFILE");
982 if (tzid != NULL) {
983 return tzid;
984 }
985 #endif*/
986
987 /* This code can be temporarily disabled to test tzname resolution later on. */
988 #ifndef DEBUG_TZNAME
989 tzid = getenv("TZ");
990 if (tzid != NULL && isValidOlsonID(tzid))
991 {
992 /* This might be a good Olson ID. */
993 skipZoneIDPrefix(&tzid);
994 return tzid;
995 }
996 /* else U_TZNAME will give a better result. */
997 #endif
998
999 #if defined(CHECK_LOCALTIME_LINK)
1000 /* Caller must handle threading issues */
1001 if (gTimeZoneBufferPtr == NULL) {
1002 /*
1003 This is a trick to look at the name of the link to get the Olson ID
1004 because the tzfile contents is underspecified.
1005 This isn't guaranteed to work because it may not be a symlink.
1006 */
1007 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
1008 if (0 < ret) {
1009 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1010 gTimeZoneBuffer[ret] = 0;
1011 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1012 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1013 {
1014 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1015 }
1016 } else {
1017 #if defined(SEARCH_TZFILE)
1018 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1019 if (tzInfo != NULL) {
1020 tzInfo->defaultTZBuffer = NULL;
1021 tzInfo->defaultTZFileSize = 0;
1022 tzInfo->defaultTZFilePtr = NULL;
1023 tzInfo->defaultTZstatus = FALSE;
1024 tzInfo->defaultTZPosition = 0;
1025
1026 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1027
1028 /* Free previously allocated memory */
1029 if (tzInfo->defaultTZBuffer != NULL) {
1030 uprv_free(tzInfo->defaultTZBuffer);
1031 }
1032 if (tzInfo->defaultTZFilePtr != NULL) {
1033 fclose(tzInfo->defaultTZFilePtr);
1034 }
1035 uprv_free(tzInfo);
1036 }
1037
1038 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1039 return gTimeZoneBufferPtr;
1040 }
1041 #endif
1042 }
1043 }
1044 else {
1045 return gTimeZoneBufferPtr;
1046 }
1047 #endif
1048 #endif
1049
1050 #ifdef U_TZNAME
1051 #if defined(U_WINDOWS) || defined(U_MINGW)
1052 /* The return value is free'd in timezone.cpp on Windows because
1053 * the other code path returns a pointer to a heap location. */
1054 return uprv_strdup(U_TZNAME[n]);
1055 #else
1056 /*
1057 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1058 So we remap the abbreviation to an olson ID.
1059
1060 Since Windows exposes a little more timezone information,
1061 we normally don't use this code on Windows because
1062 uprv_detectWindowsTimeZone should have already given the correct answer.
1063 */
1064 {
1065 struct tm juneSol, decemberSol;
1066 int daylightType;
1067 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1068 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1069
1070 /* This probing will tell us when daylight savings occurs. */
1071 localtime_r(&juneSolstice, &juneSol);
1072 localtime_r(&decemberSolstice, &decemberSol);
1073 daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0);
1074 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1075 if (tzid != NULL) {
1076 return tzid;
1077 }
1078 }
1079 return U_TZNAME[n];
1080 #endif
1081 #else
1082 return "";
1083 #endif
1084 }
1085
1086 /* Get and set the ICU data directory --------------------------------------- */
1087
1088 static char *gDataDirectory = NULL;
1089 #if U_POSIX_LOCALE
1090 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1091 #endif
1092
putil_cleanup(void)1093 static UBool U_CALLCONV putil_cleanup(void)
1094 {
1095 if (gDataDirectory && *gDataDirectory) {
1096 uprv_free(gDataDirectory);
1097 }
1098 gDataDirectory = NULL;
1099 #if U_POSIX_LOCALE
1100 if (gCorrectedPOSIXLocale) {
1101 uprv_free(gCorrectedPOSIXLocale);
1102 gCorrectedPOSIXLocale = NULL;
1103 }
1104 #endif
1105 return TRUE;
1106 }
1107
1108 /*
1109 * Set the data directory.
1110 * Make a copy of the passed string, and set the global data dir to point to it.
1111 * TODO: see bug #2849, regarding thread safety.
1112 */
1113 U_CAPI void U_EXPORT2
u_setDataDirectory(const char * directory)1114 u_setDataDirectory(const char *directory) {
1115 char *newDataDir;
1116 int32_t length;
1117
1118 if(directory==NULL || *directory==0) {
1119 /* A small optimization to prevent the malloc and copy when the
1120 shared library is used, and this is a way to make sure that NULL
1121 is never returned.
1122 */
1123 newDataDir = (char *)"";
1124 }
1125 else {
1126 length=(int32_t)uprv_strlen(directory);
1127 newDataDir = (char *)uprv_malloc(length + 2);
1128 /* Exit out if newDataDir could not be created. */
1129 if (newDataDir == NULL) {
1130 return;
1131 }
1132 uprv_strcpy(newDataDir, directory);
1133
1134 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1135 {
1136 char *p;
1137 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1138 *p = U_FILE_SEP_CHAR;
1139 }
1140 }
1141 #endif
1142 }
1143
1144 umtx_lock(NULL);
1145 if (gDataDirectory && *gDataDirectory) {
1146 uprv_free(gDataDirectory);
1147 }
1148 gDataDirectory = newDataDir;
1149 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1150 umtx_unlock(NULL);
1151 }
1152
1153 U_CAPI UBool U_EXPORT2
uprv_pathIsAbsolute(const char * path)1154 uprv_pathIsAbsolute(const char *path)
1155 {
1156 if(!path || !*path) {
1157 return FALSE;
1158 }
1159
1160 if(*path == U_FILE_SEP_CHAR) {
1161 return TRUE;
1162 }
1163
1164 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1165 if(*path == U_FILE_ALT_SEP_CHAR) {
1166 return TRUE;
1167 }
1168 #endif
1169
1170 #if defined(U_WINDOWS)
1171 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1172 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1173 path[1] == ':' ) {
1174 return TRUE;
1175 }
1176 #endif
1177
1178 return FALSE;
1179 }
1180
1181 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1182 until some client wrapper makefiles are updated */
1183 #if defined(U_DARWIN) && TARGET_IPHONE_SIMULATOR
1184 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1185 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1186 # endif
1187 #endif
1188
1189 U_CAPI const char * U_EXPORT2
u_getDataDirectory(void)1190 u_getDataDirectory(void) {
1191 const char *path = NULL;
1192 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1193 char datadir_path_buffer[PATH_MAX];
1194 #endif
1195
1196 /* if we have the directory, then return it immediately */
1197 UMTX_CHECK(NULL, gDataDirectory, path);
1198
1199 if(path) {
1200 return path;
1201 }
1202
1203 /*
1204 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1205 override ICU's data with the ICU_DATA environment variable. This prevents
1206 problems where multiple custom copies of ICU's specific version of data
1207 are installed on a system. Either the application must define the data
1208 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1209 ICU, set the data with udata_setCommonData or trust that all of the
1210 required data is contained in ICU's data library that contains
1211 the entry point defined by U_ICUDATA_ENTRY_POINT.
1212
1213 There may also be some platforms where environment variables
1214 are not allowed.
1215 */
1216 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1217 /* First try to get the environment variable */
1218 path=getenv("ICU_DATA");
1219 # endif
1220
1221 /* ICU_DATA_DIR may be set as a compile option.
1222 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1223 * and is used only when data is built in archive mode eliminating the need
1224 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1225 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1226 * set their own path.
1227 */
1228 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1229 if(path==NULL || *path==0) {
1230 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1231 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1232 # endif
1233 # ifdef ICU_DATA_DIR
1234 path=ICU_DATA_DIR;
1235 # else
1236 path=U_ICU_DATA_DEFAULT_DIR;
1237 # endif
1238 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1239 if (prefix != NULL) {
1240 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1241 path=datadir_path_buffer;
1242 }
1243 # endif
1244 }
1245 #endif
1246
1247 if(path==NULL) {
1248 /* It looks really bad, set it to something. */
1249 path = "";
1250 }
1251
1252 u_setDataDirectory(path);
1253 return gDataDirectory;
1254 }
1255
1256
1257
1258
1259
1260 /* Macintosh-specific locale information ------------------------------------ */
1261 #ifdef XP_MAC
1262
1263 typedef struct {
1264 int32_t script;
1265 int32_t region;
1266 int32_t lang;
1267 int32_t date_region;
1268 const char* posixID;
1269 } mac_lc_rec;
1270
1271 /* Todo: This will be updated with a newer version from www.unicode.org web
1272 page when it's available.*/
1273 #define MAC_LC_MAGIC_NUMBER -5
1274 #define MAC_LC_INIT_NUMBER -9
1275
1276 static const mac_lc_rec mac_lc_recs[] = {
1277 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1278 /* United States*/
1279 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1280 /* France*/
1281 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1282 /* Great Britain*/
1283 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1284 /* Germany*/
1285 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1286 /* Italy*/
1287 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1288 /* Metherlands*/
1289 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1290 /* French for Belgium or Lxembourg*/
1291 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1292 /* Sweden*/
1293 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1294 /* Denmark*/
1295 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1296 /* Portugal*/
1297 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1298 /* French Canada*/
1299 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1300 /* Israel*/
1301 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1302 /* Japan*/
1303 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1304 /* Australia*/
1305 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1306 /* the Arabic world (?)*/
1307 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1308 /* Finland*/
1309 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1310 /* French for Switzerland*/
1311 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1312 /* German for Switzerland*/
1313 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1314 /* Greece*/
1315 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1316 /* Iceland ===*/
1317 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1318 /* Malta ===*/
1319 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1320 /* Cyprus ===*/
1321 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1322 /* Turkey ===*/
1323 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1324 /* Croatian system for Yugoslavia*/
1325 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1326 /* Hindi system for India*/
1327 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1328 /* Pakistan*/
1329 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1330 /* Lithuania*/
1331 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1332 /* Poland*/
1333 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1334 /* Hungary*/
1335 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1336 /* Estonia*/
1337 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1338 /* Latvia*/
1339 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1340 /* Lapland [Ask Rich for the data. HS]*/
1341 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1342 /* Faeroe Islands*/
1343 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1344 /* Iran*/
1345 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1346 /* Russia*/
1347 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1348 /* Ireland*/
1349 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1350 /* Korea*/
1351 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1352 /* People's Republic of China*/
1353 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1354 /* Taiwan*/
1355 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1356 /* Thailand*/
1357
1358 /* fallback is en_US*/
1359 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1360 MAC_LC_MAGIC_NUMBER, "en_US"
1361 };
1362
1363 #endif
1364
1365 #if U_POSIX_LOCALE
1366 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1367 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1368 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1369 */
uprv_getPOSIXIDForCategory(int category)1370 static const char *uprv_getPOSIXIDForCategory(int category)
1371 {
1372 const char* posixID = NULL;
1373 if (category == LC_MESSAGES || category == LC_CTYPE) {
1374 /*
1375 * On Solaris two different calls to setlocale can result in
1376 * different values. Only get this value once.
1377 *
1378 * We must check this first because an application can set this.
1379 *
1380 * LC_ALL can't be used because it's platform dependent. The LANG
1381 * environment variable seems to affect LC_CTYPE variable by default.
1382 * Here is what setlocale(LC_ALL, NULL) can return.
1383 * HPUX can return 'C C C C C C C'
1384 * Solaris can return /en_US/C/C/C/C/C on the second try.
1385 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1386 *
1387 * The default codepage detection also needs to use LC_CTYPE.
1388 *
1389 * Do not call setlocale(LC_*, "")! Using an empty string instead
1390 * of NULL, will modify the libc behavior.
1391 */
1392 posixID = setlocale(category, NULL);
1393 if ((posixID == 0)
1394 || (uprv_strcmp("C", posixID) == 0)
1395 || (uprv_strcmp("POSIX", posixID) == 0))
1396 {
1397 /* Maybe we got some garbage. Try something more reasonable */
1398 posixID = getenv("LC_ALL");
1399 if (posixID == 0) {
1400 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1401 if (posixID == 0) {
1402 posixID = getenv("LANG");
1403 }
1404 }
1405 }
1406 }
1407 if ((posixID==0)
1408 || (uprv_strcmp("C", posixID) == 0)
1409 || (uprv_strcmp("POSIX", posixID) == 0))
1410 {
1411 /* Nothing worked. Give it a nice POSIX default value. */
1412 posixID = "en_US_POSIX";
1413 }
1414 return posixID;
1415 }
1416
1417 /* Return just the POSIX id for the default locale, whatever happens to be in
1418 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1419 */
uprv_getPOSIXIDForDefaultLocale(void)1420 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1421 {
1422 static const char* posixID = NULL;
1423 if (posixID == 0) {
1424 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1425 }
1426 return posixID;
1427 }
1428
1429 /* Return just the POSIX id for the default codepage, whatever happens to be in
1430 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1431 */
uprv_getPOSIXIDForDefaultCodepage(void)1432 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1433 {
1434 static const char* posixID = NULL;
1435 if (posixID == 0) {
1436 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1437 }
1438 return posixID;
1439 }
1440 #endif
1441
1442 /* NOTE: The caller should handle thread safety */
1443 U_CAPI const char* U_EXPORT2
uprv_getDefaultLocaleID()1444 uprv_getDefaultLocaleID()
1445 {
1446 #if U_POSIX_LOCALE
1447 /*
1448 Note that: (a '!' means the ID is improper somehow)
1449 LC_ALL ----> default_loc codepage
1450 --------------------------------------------------------
1451 ab.CD ab CD
1452 ab@CD ab__CD -
1453 ab@CD.EF ab__CD EF
1454
1455 ab_CD.EF@GH ab_CD_GH EF
1456
1457 Some 'improper' ways to do the same as above:
1458 ! ab_CD@GH.EF ab_CD_GH EF
1459 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1460 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1461
1462 _CD@GH _CD_GH -
1463 _CD.EF@GH _CD_GH EF
1464
1465 The variant cannot have dots in it.
1466 The 'rightmost' variant (@xxx) wins.
1467 The leftmost codepage (.xxx) wins.
1468 */
1469 char *correctedPOSIXLocale = 0;
1470 const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1471 const char *p;
1472 const char *q;
1473 int32_t len;
1474
1475 /* Format: (no spaces)
1476 ll [ _CC ] [ . MM ] [ @ VV]
1477
1478 l = lang, C = ctry, M = charmap, V = variant
1479 */
1480
1481 if (gCorrectedPOSIXLocale != NULL) {
1482 return gCorrectedPOSIXLocale;
1483 }
1484
1485 if ((p = uprv_strchr(posixID, '.')) != NULL) {
1486 /* assume new locale can't be larger than old one? */
1487 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1488 /* Exit on memory allocation error. */
1489 if (correctedPOSIXLocale == NULL) {
1490 return NULL;
1491 }
1492 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1493 correctedPOSIXLocale[p-posixID] = 0;
1494
1495 /* do not copy after the @ */
1496 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1497 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1498 }
1499 }
1500
1501 /* Note that we scan the *uncorrected* ID. */
1502 if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1503 if (correctedPOSIXLocale == NULL) {
1504 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1505 /* Exit on memory allocation error. */
1506 if (correctedPOSIXLocale == NULL) {
1507 return NULL;
1508 }
1509 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1510 correctedPOSIXLocale[p-posixID] = 0;
1511 }
1512 p++;
1513
1514 /* Take care of any special cases here.. */
1515 if (!uprv_strcmp(p, "nynorsk")) {
1516 p = "NY";
1517 /* Don't worry about no__NY. In practice, it won't appear. */
1518 }
1519
1520 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1521 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1522 }
1523 else {
1524 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1525 }
1526
1527 if ((q = uprv_strchr(p, '.')) != NULL) {
1528 /* How big will the resulting string be? */
1529 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1530 uprv_strncat(correctedPOSIXLocale, p, q-p);
1531 correctedPOSIXLocale[len] = 0;
1532 }
1533 else {
1534 /* Anything following the @ sign */
1535 uprv_strcat(correctedPOSIXLocale, p);
1536 }
1537
1538 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1539 * How about 'russian' -> 'ru'?
1540 * Many of the other locales using ISO codes will be handled by the
1541 * canonicalization functions in uloc_getDefault.
1542 */
1543 }
1544
1545 /* Was a correction made? */
1546 if (correctedPOSIXLocale != NULL) {
1547 posixID = correctedPOSIXLocale;
1548 }
1549 else {
1550 /* copy it, just in case the original pointer goes away. See j2395 */
1551 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1552 /* Exit on memory allocation error. */
1553 if (correctedPOSIXLocale == NULL) {
1554 return NULL;
1555 }
1556 posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1557 }
1558
1559 if (gCorrectedPOSIXLocale == NULL) {
1560 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1561 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1562 correctedPOSIXLocale = NULL;
1563 }
1564
1565 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
1566 uprv_free(correctedPOSIXLocale);
1567 }
1568
1569 return posixID;
1570
1571 #elif defined(U_WINDOWS) || defined(U_MINGW)
1572 UErrorCode status = U_ZERO_ERROR;
1573 LCID id = GetThreadLocale();
1574 const char* locID = uprv_convertToPosix(id, &status);
1575
1576 if (U_FAILURE(status)) {
1577 locID = "en_US";
1578 }
1579 return locID;
1580
1581 #elif defined(XP_MAC)
1582 int32_t script = MAC_LC_INIT_NUMBER;
1583 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1584 int32_t region = MAC_LC_INIT_NUMBER;
1585 /* = GetScriptManagerVariable(smRegionCode);*/
1586 int32_t lang = MAC_LC_INIT_NUMBER;
1587 /* = GetScriptManagerVariable(smScriptLang);*/
1588 int32_t date_region = MAC_LC_INIT_NUMBER;
1589 const char* posixID = 0;
1590 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1591 int32_t i;
1592 Intl1Hndl ih;
1593
1594 ih = (Intl1Hndl) GetIntlResource(1);
1595 if (ih)
1596 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1597
1598 for (i = 0; i < count; i++) {
1599 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1600 || (mac_lc_recs[i].script == script))
1601 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1602 || (mac_lc_recs[i].region == region))
1603 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1604 || (mac_lc_recs[i].lang == lang))
1605 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1606 || (mac_lc_recs[i].date_region == date_region))
1607 )
1608 {
1609 posixID = mac_lc_recs[i].posixID;
1610 break;
1611 }
1612 }
1613
1614 return posixID;
1615
1616 #elif defined(OS400)
1617 /* locales are process scoped and are by definition thread safe */
1618 static char correctedLocale[64];
1619 const char *localeID = getenv("LC_ALL");
1620 char *p;
1621
1622 if (localeID == NULL)
1623 localeID = getenv("LANG");
1624 if (localeID == NULL)
1625 localeID = setlocale(LC_ALL, NULL);
1626 /* Make sure we have something... */
1627 if (localeID == NULL)
1628 return "en_US_POSIX";
1629
1630 /* Extract the locale name from the path. */
1631 if((p = uprv_strrchr(localeID, '/')) != NULL)
1632 {
1633 /* Increment p to start of locale name. */
1634 p++;
1635 localeID = p;
1636 }
1637
1638 /* Copy to work location. */
1639 uprv_strcpy(correctedLocale, localeID);
1640
1641 /* Strip off the '.locale' extension. */
1642 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1643 *p = 0;
1644 }
1645
1646 /* Upper case the locale name. */
1647 T_CString_toUpperCase(correctedLocale);
1648
1649 /* See if we are using the POSIX locale. Any of the
1650 * following are equivalent and use the same QLGPGCMA
1651 * (POSIX) locale.
1652 * QLGPGCMA2 means UCS2
1653 * QLGPGCMA_4 means UTF-32
1654 * QLGPGCMA_8 means UTF-8
1655 */
1656 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1657 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1658 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1659 {
1660 uprv_strcpy(correctedLocale, "en_US_POSIX");
1661 }
1662 else
1663 {
1664 int16_t LocaleLen;
1665
1666 /* Lower case the lang portion. */
1667 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1668 {
1669 *p = uprv_tolower(*p);
1670 }
1671
1672 /* Adjust for Euro. After '_E' add 'URO'. */
1673 LocaleLen = uprv_strlen(correctedLocale);
1674 if (correctedLocale[LocaleLen - 2] == '_' &&
1675 correctedLocale[LocaleLen - 1] == 'E')
1676 {
1677 uprv_strcat(correctedLocale, "URO");
1678 }
1679
1680 /* If using Lotus-based locale then convert to
1681 * equivalent non Lotus.
1682 */
1683 else if (correctedLocale[LocaleLen - 2] == '_' &&
1684 correctedLocale[LocaleLen - 1] == 'L')
1685 {
1686 correctedLocale[LocaleLen - 2] = 0;
1687 }
1688
1689 /* There are separate simplified and traditional
1690 * locales called zh_HK_S and zh_HK_T.
1691 */
1692 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1693 {
1694 uprv_strcpy(correctedLocale, "zh_HK");
1695 }
1696
1697 /* A special zh_CN_GBK locale...
1698 */
1699 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1700 {
1701 uprv_strcpy(correctedLocale, "zh_CN");
1702 }
1703
1704 }
1705
1706 return correctedLocale;
1707 #endif
1708
1709 }
1710
1711 #if !U_CHARSET_IS_UTF8
1712 #if U_POSIX_LOCALE
1713 /*
1714 Due to various platform differences, one platform may specify a charset,
1715 when they really mean a different charset. Remap the names so that they are
1716 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1717 here. Before adding anything to this function, please consider adding unique
1718 names to the ICU alias table in the data directory.
1719 */
1720 static const char*
remapPlatformDependentCodepage(const char * locale,const char * name)1721 remapPlatformDependentCodepage(const char *locale, const char *name) {
1722 if (locale != NULL && *locale == 0) {
1723 /* Make sure that an empty locale is handled the same way. */
1724 locale = NULL;
1725 }
1726 if (name == NULL) {
1727 return NULL;
1728 }
1729 #if defined(U_AIX)
1730 if (uprv_strcmp(name, "IBM-943") == 0) {
1731 /* Use the ASCII compatible ibm-943 */
1732 name = "Shift-JIS";
1733 }
1734 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1735 /* Use the windows-1252 that contains the Euro */
1736 name = "IBM-5348";
1737 }
1738 #elif defined(U_SOLARIS)
1739 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1740 /* Solaris underspecifies the "EUC" name. */
1741 if (uprv_strcmp(locale, "zh_CN") == 0) {
1742 name = "EUC-CN";
1743 }
1744 else if (uprv_strcmp(locale, "zh_TW") == 0) {
1745 name = "EUC-TW";
1746 }
1747 else if (uprv_strcmp(locale, "ko_KR") == 0) {
1748 name = "EUC-KR";
1749 }
1750 }
1751 else if (uprv_strcmp(name, "eucJP") == 0) {
1752 /*
1753 ibm-954 is the best match.
1754 ibm-33722 is the default for eucJP (similar to Windows).
1755 */
1756 name = "eucjis";
1757 }
1758 else if (uprv_strcmp(name, "646") == 0) {
1759 /*
1760 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1761 * ISO-8859-1 instead of US-ASCII(646).
1762 */
1763 name = "ISO-8859-1";
1764 }
1765 #elif defined(U_DARWIN)
1766 if (locale == NULL && *name == 0) {
1767 /*
1768 No locale was specified, and an empty name was passed in.
1769 This usually indicates that nl_langinfo didn't return valid information.
1770 Mac OS X uses UTF-8 by default (especially the locale data and console).
1771 */
1772 name = "UTF-8";
1773 }
1774 else if (uprv_strcmp(name, "CP949") == 0) {
1775 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1776 name = "EUC-KR";
1777 }
1778 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1779 /*
1780 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1781 */
1782 name = "UTF-8";
1783 }
1784 #elif defined(U_BSD)
1785 if (uprv_strcmp(name, "CP949") == 0) {
1786 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1787 name = "EUC-KR";
1788 }
1789 #elif defined(U_HPUX)
1790 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1791 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1792 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1793 name = "hkbig5";
1794 }
1795 else if (uprv_strcmp(name, "eucJP") == 0) {
1796 /*
1797 ibm-1350 is the best match, but unavailable.
1798 ibm-954 is mostly a superset of ibm-1350.
1799 ibm-33722 is the default for eucJP (similar to Windows).
1800 */
1801 name = "eucjis";
1802 }
1803 #elif defined(U_LINUX)
1804 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1805 /* Linux underspecifies the "EUC" name. */
1806 if (uprv_strcmp(locale, "korean") == 0) {
1807 name = "EUC-KR";
1808 }
1809 else if (uprv_strcmp(locale, "japanese") == 0) {
1810 /* See comment below about eucJP */
1811 name = "eucjis";
1812 }
1813 }
1814 else if (uprv_strcmp(name, "eucjp") == 0) {
1815 /*
1816 ibm-1350 is the best match, but unavailable.
1817 ibm-954 is mostly a superset of ibm-1350.
1818 ibm-33722 is the default for eucJP (similar to Windows).
1819 */
1820 name = "eucjis";
1821 }
1822 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1823 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1824 /*
1825 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1826 */
1827 name = "UTF-8";
1828 }
1829 /*
1830 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1831 * it by falling back to 'US-ASCII' when NULL is returned from this
1832 * function. So, we don't have to worry about it here.
1833 */
1834 #endif
1835 /* return NULL when "" is passed in */
1836 if (*name == 0) {
1837 name = NULL;
1838 }
1839 return name;
1840 }
1841
1842 static const char*
getCodepageFromPOSIXID(const char * localeName,char * buffer,int32_t buffCapacity)1843 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1844 {
1845 char localeBuf[100];
1846 const char *name = NULL;
1847 char *variant = NULL;
1848
1849 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1850 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1851 uprv_strncpy(localeBuf, localeName, localeCapacity);
1852 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1853 name = uprv_strncpy(buffer, name+1, buffCapacity);
1854 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1855 if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1856 *variant = 0;
1857 }
1858 name = remapPlatformDependentCodepage(localeBuf, name);
1859 }
1860 return name;
1861 }
1862 #endif
1863
1864 static const char*
int_getDefaultCodepage()1865 int_getDefaultCodepage()
1866 {
1867 #if defined(OS400)
1868 uint32_t ccsid = 37; /* Default to ibm-37 */
1869 static char codepage[64];
1870 Qwc_JOBI0400_t jobinfo;
1871 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1872
1873 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1874 "* ", " ", &error);
1875
1876 if (error.Bytes_Available == 0) {
1877 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1878 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1879 }
1880 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1881 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1882 }
1883 /* else use the default */
1884 }
1885 sprintf(codepage,"ibm-%d", ccsid);
1886 return codepage;
1887
1888 #elif defined(OS390)
1889 static char codepage[64];
1890
1891 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1892 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1893 codepage[63] = 0; /* NULL terminate */
1894
1895 return codepage;
1896
1897 #elif defined(XP_MAC)
1898 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1899
1900 #elif defined(U_WINDOWS)
1901 static char codepage[64];
1902 sprintf(codepage, "windows-%d", GetACP());
1903 return codepage;
1904
1905 #elif U_POSIX_LOCALE
1906 static char codesetName[100];
1907 const char *localeName = NULL;
1908 const char *name = NULL;
1909
1910 localeName = uprv_getPOSIXIDForDefaultCodepage();
1911 uprv_memset(codesetName, 0, sizeof(codesetName));
1912 #if U_HAVE_NL_LANGINFO_CODESET
1913 /* When available, check nl_langinfo first because it usually gives more
1914 useful names. It depends on LC_CTYPE.
1915 nl_langinfo may use the same buffer as setlocale. */
1916 {
1917 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1918 #if defined(U_DARWIN) || defined(U_LINUX)
1919 /*
1920 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1921 * instead of ASCII.
1922 */
1923 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1924 codeset = remapPlatformDependentCodepage(localeName, codeset);
1925 } else
1926 #endif
1927 {
1928 codeset = remapPlatformDependentCodepage(NULL, codeset);
1929 }
1930
1931 if (codeset != NULL) {
1932 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1933 codesetName[sizeof(codesetName)-1] = 0;
1934 return codesetName;
1935 }
1936 }
1937 #endif
1938
1939 /* Use setlocale in a nice way, and then check some environment variables.
1940 Maybe the application used setlocale already.
1941 */
1942 uprv_memset(codesetName, 0, sizeof(codesetName));
1943 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1944 if (name) {
1945 /* if we can find the codeset name from setlocale, return that. */
1946 return name;
1947 }
1948
1949 if (*codesetName == 0)
1950 {
1951 /* Everything failed. Return US ASCII (ISO 646). */
1952 (void)uprv_strcpy(codesetName, "US-ASCII");
1953 }
1954 return codesetName;
1955 #else
1956 return "US-ASCII";
1957 #endif
1958 }
1959
1960
1961 U_CAPI const char* U_EXPORT2
uprv_getDefaultCodepage()1962 uprv_getDefaultCodepage()
1963 {
1964 static char const *name = NULL;
1965 umtx_lock(NULL);
1966 if (name == NULL) {
1967 name = int_getDefaultCodepage();
1968 }
1969 umtx_unlock(NULL);
1970 return name;
1971 }
1972 #endif /* !U_CHARSET_IS_UTF8 */
1973
1974
1975 /* end of platform-specific implementation -------------- */
1976
1977 /* version handling --------------------------------------------------------- */
1978
1979 U_CAPI void U_EXPORT2
u_versionFromString(UVersionInfo versionArray,const char * versionString)1980 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1981 char *end;
1982 uint16_t part=0;
1983
1984 if(versionArray==NULL) {
1985 return;
1986 }
1987
1988 if(versionString!=NULL) {
1989 for(;;) {
1990 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1991 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1992 break;
1993 }
1994 versionString=end+1;
1995 }
1996 }
1997
1998 while(part<U_MAX_VERSION_LENGTH) {
1999 versionArray[part++]=0;
2000 }
2001 }
2002
2003 U_CAPI void U_EXPORT2
u_versionFromUString(UVersionInfo versionArray,const UChar * versionString)2004 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2005 if(versionArray!=NULL && versionString!=NULL) {
2006 char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2007 int32_t len = u_strlen(versionString);
2008 if(len>U_MAX_VERSION_STRING_LENGTH) {
2009 len = U_MAX_VERSION_STRING_LENGTH;
2010 }
2011 u_UCharsToChars(versionString, versionChars, len);
2012 versionChars[len]=0;
2013 u_versionFromString(versionArray, versionChars);
2014 }
2015 }
2016
2017 U_CAPI void U_EXPORT2
u_versionToString(UVersionInfo versionArray,char * versionString)2018 u_versionToString(UVersionInfo versionArray, char *versionString) {
2019 uint16_t count, part;
2020 uint8_t field;
2021
2022 if(versionString==NULL) {
2023 return;
2024 }
2025
2026 if(versionArray==NULL) {
2027 versionString[0]=0;
2028 return;
2029 }
2030
2031 /* count how many fields need to be written */
2032 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2033 }
2034
2035 if(count <= 1) {
2036 count = 2;
2037 }
2038
2039 /* write the first part */
2040 /* write the decimal field value */
2041 field=versionArray[0];
2042 if(field>=100) {
2043 *versionString++=(char)('0'+field/100);
2044 field%=100;
2045 }
2046 if(field>=10) {
2047 *versionString++=(char)('0'+field/10);
2048 field%=10;
2049 }
2050 *versionString++=(char)('0'+field);
2051
2052 /* write the following parts */
2053 for(part=1; part<count; ++part) {
2054 /* write a dot first */
2055 *versionString++=U_VERSION_DELIMITER;
2056
2057 /* write the decimal field value */
2058 field=versionArray[part];
2059 if(field>=100) {
2060 *versionString++=(char)('0'+field/100);
2061 field%=100;
2062 }
2063 if(field>=10) {
2064 *versionString++=(char)('0'+field/10);
2065 field%=10;
2066 }
2067 *versionString++=(char)('0'+field);
2068 }
2069
2070 /* NUL-terminate */
2071 *versionString=0;
2072 }
2073
2074 U_CAPI void U_EXPORT2
u_getVersion(UVersionInfo versionArray)2075 u_getVersion(UVersionInfo versionArray) {
2076 u_versionFromString(versionArray, U_ICU_VERSION);
2077 }
2078
2079 /**
2080 * icucfg.h dependent code
2081 */
2082
2083 #if U_ENABLE_DYLOAD
2084
2085 #if defined(U_CHECK_DYLOAD)
2086
2087 #if defined(HAVE_DLOPEN)
2088
2089 #ifdef HAVE_DLFCN_H
2090 #ifdef __MVS__
2091 #ifndef __SUSV3
2092 #define __SUSV3 1
2093 #endif
2094 #endif
2095 #include <dlfcn.h>
2096 #endif
2097
2098 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2099 uprv_dl_open(const char *libName, UErrorCode *status) {
2100 void *ret = NULL;
2101 if(U_FAILURE(*status)) return ret;
2102 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2103 if(ret==NULL) {
2104 #ifndef U_TRACE_DYLOAD
2105 perror("dlopen");
2106 #endif
2107 *status = U_MISSING_RESOURCE_ERROR;
2108 }
2109 return ret;
2110 }
2111
2112 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2113 uprv_dl_close(void *lib, UErrorCode *status) {
2114 if(U_FAILURE(*status)) return;
2115 dlclose(lib);
2116 }
2117
2118 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void * lib,const char * sym,UErrorCode * status)2119 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2120 union {
2121 void* voidPtr;
2122 UVoidFunction* voidFunc;
2123 } ret;
2124 ret.voidPtr = NULL;
2125 if(U_FAILURE(*status)) return NULL;
2126 /*
2127 * ISO forbids the following cast, but it's needed for dlsym.
2128 * See: http://pubs.opengroup.org/onlinepubs/009695399/functions/dlsym.html
2129 * See: http://www.trilithium.com/johan/2004/12/problem-with-dlsym/
2130 */
2131 ret.voidPtr = dlsym(lib, sym);
2132 if(ret.voidPtr == NULL) {
2133 *status = U_MISSING_RESOURCE_ERROR;
2134 }
2135 return ret.voidFunc;
2136 }
2137
2138 #else
2139
2140 /* null (nonexistent) implementation. */
2141
2142 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2143 uprv_dl_open(const char *libName, UErrorCode *status) {
2144 if(U_FAILURE(*status)) return NULL;
2145 *status = U_UNSUPPORTED_ERROR;
2146 return NULL;
2147 }
2148
2149 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2150 uprv_dl_close(void *lib, UErrorCode *status) {
2151 if(U_FAILURE(*status)) return;
2152 *status = U_UNSUPPORTED_ERROR;
2153 return;
2154 }
2155
2156
2157 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void * lib,const char * sym,UErrorCode * status)2158 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2159 if(U_SUCCESS(*status)) {
2160 *status = U_UNSUPPORTED_ERROR;
2161 }
2162 return (UVoidFunction*)NULL;
2163 }
2164
2165
2166
2167 #endif
2168
2169 #elif defined U_WINDOWS
2170
2171 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2172 uprv_dl_open(const char *libName, UErrorCode *status) {
2173 HMODULE lib = NULL;
2174
2175 if(U_FAILURE(*status)) return NULL;
2176
2177 lib = LoadLibraryA(libName);
2178
2179 if(lib==NULL) {
2180 *status = U_MISSING_RESOURCE_ERROR;
2181 }
2182
2183 return (void*)lib;
2184 }
2185
2186 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2187 uprv_dl_close(void *lib, UErrorCode *status) {
2188 HMODULE handle = (HMODULE)lib;
2189 if(U_FAILURE(*status)) return;
2190
2191 FreeLibrary(handle);
2192
2193 return;
2194 }
2195
2196
2197 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void * lib,const char * sym,UErrorCode * status)2198 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2199 HMODULE handle = (HMODULE)lib;
2200 UVoidFunction* addr = NULL;
2201
2202 if(U_FAILURE(*status) || lib==NULL) return NULL;
2203
2204 addr = (UVoidFunction*)GetProcAddress(handle, sym);
2205
2206 if(addr==NULL) {
2207 DWORD lastError = GetLastError();
2208 if(lastError == ERROR_PROC_NOT_FOUND) {
2209 *status = U_MISSING_RESOURCE_ERROR;
2210 } else {
2211 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2212 }
2213 }
2214
2215 return addr;
2216 }
2217
2218
2219 #else
2220
2221 /* No dynamic loading set. */
2222
2223 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2224 uprv_dl_open(const char *libName, UErrorCode *status) {
2225 if(U_FAILURE(*status)) return NULL;
2226 *status = U_UNSUPPORTED_ERROR;
2227 return NULL;
2228 }
2229
2230 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2231 uprv_dl_close(void *lib, UErrorCode *status) {
2232 if(U_FAILURE(*status)) return;
2233 *status = U_UNSUPPORTED_ERROR;
2234 return;
2235 }
2236
2237
2238 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void * lib,const char * sym,UErrorCode * status)2239 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2240 if(U_SUCCESS(*status)) {
2241 *status = U_UNSUPPORTED_ERROR;
2242 }
2243 return (UVoidFunction*)NULL;
2244 }
2245
2246
2247 #endif
2248
2249 #endif /* U_ENABLE_DYLOAD */
2250
2251 /*
2252 * Hey, Emacs, please set the following:
2253 *
2254 * Local Variables:
2255 * indent-tabs-mode: nil
2256 * End:
2257 *
2258 */
2259