1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1997-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10 *
11 * Date Name Description
12 * 04/14/97 aliu Creation.
13 * 04/24/97 aliu Added getDefaultDataDirectory() and
14 * getDefaultLocaleID().
15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
16 * for assumed case. Non-UNIX platforms must be
17 * special-cased. Rewrote numeric methods dealing
18 * with NaN and Infinity to be platform independent
19 * over all IEEE 754 platforms.
20 * 05/13/97 aliu Restored sign of timezone
21 * (semantics are hours West of GMT)
22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23 * nextDouble..
24 * 07/22/98 stephen Added remainder, max, min, trunc
25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26 * 08/24/98 stephen Added longBitsFromDouble
27 * 09/08/98 stephen Minor changes for Mac Port
28 * 03/02/99 stephen Removed openFile(). Added AS400 support.
29 * Fixed EBCDIC tables
30 * 04/15/99 stephen Converted to C.
31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32 * 08/04/99 jeffrey R. Added OS/2 changes
33 * 11/15/99 helena Integrated S/390 IEEE support.
34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36 * 01/03/08 Steven L. Fake Time Support
37 ******************************************************************************
38 */
39
40 /* Define _XOPEN_SOURCE for access to POSIX functions. */
41 #ifdef _XOPEN_SOURCE
42 /* Use the predefined value. */
43 #else
44 /*
45 * Version 6.0:
46 * The Open Group Base Specifications Issue 6 (IEEE Std 1003.1, 2004 Edition)
47 * also known as
48 * SUSv3 = Open Group Single UNIX Specification, Version 3 (UNIX03)
49 */
50 # define _XOPEN_SOURCE 600
51 #endif
52
53 /* Make sure things like readlink and such functions work.
54 Poorly upgraded Solaris machines can't have this defined.
55 Cleanly installed Solaris can use this #define.
56 */
57 #if !defined(_XOPEN_SOURCE_EXTENDED) && ((!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L) || defined(__xlc__))
58 #define _XOPEN_SOURCE_EXTENDED 1
59 #endif
60
61 /* include ICU headers */
62 #include "unicode/utypes.h"
63 #include "unicode/putil.h"
64 #include "unicode/ustring.h"
65 #include "putilimp.h"
66 #include "uassert.h"
67 #include "umutex.h"
68 #include "cmemory.h"
69 #include "cstring.h"
70 #include "locmap.h"
71 #include "ucln_cmn.h"
72
73 /* Include standard headers. */
74 #include <stdio.h>
75 #include <stdlib.h>
76 #include <string.h>
77 #include <math.h>
78 #include <locale.h>
79 #include <float.h>
80 #include <time.h>
81
82 /* include system headers */
83 #ifdef U_WINDOWS
84 # define WIN32_LEAN_AND_MEAN
85 # define VC_EXTRALEAN
86 # define NOUSER
87 # define NOSERVICE
88 # define NOIME
89 # define NOMCX
90 # include <windows.h>
91 # include "wintz.h"
92 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
93 /* tzset isn't defined in strict ANSI on Cygwin. */
94 # undef __STRICT_ANSI__
95 #elif defined(OS400)
96 # include <float.h>
97 # include <qusec.h> /* error code structure */
98 # include <qusrjobi.h>
99 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
100 # include <mih/testptr.h> /* For uprv_maximumPtr */
101 #elif defined(XP_MAC)
102 # include <Files.h>
103 # include <IntlResources.h>
104 # include <Script.h>
105 # include <Folders.h>
106 # include <MacTypes.h>
107 # include <TextUtils.h>
108 # define ICU_NO_USER_DATA_OVERRIDE 1
109 #elif defined(OS390)
110 #include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
111 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
112 #include <limits.h>
113 #include <unistd.h>
114 #elif defined(U_QNX)
115 #include <sys/neutrino.h>
116 #elif defined(U_SOLARIS)
117 # ifndef _XPG4_2
118 # define _XPG4_2
119 # endif
120 #endif
121
122
123 #if defined(U_DARWIN)
124 #include <TargetConditionals.h>
125 #endif
126
127 #ifndef U_WINDOWS
128 #include <sys/time.h>
129 #endif
130
131 /*
132 * Only include langinfo.h if we have a way to get the codeset. If we later
133 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
134 *
135 */
136
137 #if U_HAVE_NL_LANGINFO_CODESET
138 #include <langinfo.h>
139 #endif
140
141 /**
142 * Simple things (presence of functions, etc) should just go in configure.in and be added to
143 * icucfg.h via autoheader.
144 */
145 #if defined(HAVE_CONFIG_H)
146 #include "icucfg.h"
147 #endif
148
149 /* Define the extension for data files, again... */
150 #define DATA_TYPE "dat"
151
152 /* Leave this copyright notice here! */
153 static const char copyright[] = U_COPYRIGHT_STRING;
154
155 /* floating point implementations ------------------------------------------- */
156
157 /* We return QNAN rather than SNAN*/
158 #define SIGN 0x80000000U
159
160 /* Make it easy to define certain types of constants */
161 typedef union {
162 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
163 double d64;
164 } BitPatternConversion;
165 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
166 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
167
168 /*---------------------------------------------------------------------------
169 Platform utilities
170 Our general strategy is to assume we're on a POSIX platform. Platforms which
171 are non-POSIX must declare themselves so. The default POSIX implementation
172 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
173 functions).
174 ---------------------------------------------------------------------------*/
175
176 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
177 # undef U_POSIX_LOCALE
178 #else
179 # define U_POSIX_LOCALE 1
180 #endif
181
182 /*
183 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
184 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
185 */
186 #if !IEEE_754
187 static char*
u_topNBytesOfDouble(double * d,int n)188 u_topNBytesOfDouble(double* d, int n)
189 {
190 #if U_IS_BIG_ENDIAN
191 return (char*)d;
192 #else
193 return (char*)(d + 1) - n;
194 #endif
195 }
196
197 static char*
u_bottomNBytesOfDouble(double * d,int n)198 u_bottomNBytesOfDouble(double* d, int n)
199 {
200 #if U_IS_BIG_ENDIAN
201 return (char*)(d + 1) - n;
202 #else
203 return (char*)d;
204 #endif
205 }
206 #endif /* !IEEE_754 */
207
208 #if IEEE_754
209 static UBool
u_signBit(double d)210 u_signBit(double d) {
211 uint8_t hiByte;
212 #if U_IS_BIG_ENDIAN
213 hiByte = *(uint8_t *)&d;
214 #else
215 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
216 #endif
217 return (hiByte & 0x80) != 0;
218 }
219 #endif
220
221
222
223 #if defined (U_DEBUG_FAKETIME)
224 /* Override the clock to test things without having to move the system clock.
225 * Assumes POSIX gettimeofday() will function
226 */
227 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
228 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
229 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
230 static UMTX fakeClockMutex = NULL;
231
getUTCtime_real()232 static UDate getUTCtime_real() {
233 struct timeval posixTime;
234 gettimeofday(&posixTime, NULL);
235 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
236 }
237
getUTCtime_fake()238 static UDate getUTCtime_fake() {
239 umtx_lock(&fakeClockMutex);
240 if(!fakeClock_set) {
241 UDate real = getUTCtime_real();
242 const char *fake_start = getenv("U_FAKETIME_START");
243 if((fake_start!=NULL) && (fake_start[0]!=0)) {
244 sscanf(fake_start,"%lf",&fakeClock_t0);
245 fakeClock_dt = fakeClock_t0 - real;
246 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
247 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
248 fakeClock_t0, fake_start, fakeClock_dt, real);
249 } else {
250 fakeClock_dt = 0;
251 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
252 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
253 }
254 fakeClock_set = TRUE;
255 }
256 umtx_unlock(&fakeClockMutex);
257
258 return getUTCtime_real() + fakeClock_dt;
259 }
260 #endif
261
262 #if defined(U_WINDOWS)
263 typedef union {
264 int64_t int64;
265 FILETIME fileTime;
266 } FileTimeConversion; /* This is like a ULARGE_INTEGER */
267
268 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
269 #define EPOCH_BIAS INT64_C(116444736000000000)
270 #define HECTONANOSECOND_PER_MILLISECOND 10000
271
272 #endif
273
274 /*---------------------------------------------------------------------------
275 Universal Implementations
276 These are designed to work on all platforms. Try these, and if they
277 don't work on your platform, then special case your platform with new
278 implementations.
279 ---------------------------------------------------------------------------*/
280
281 U_CAPI UDate U_EXPORT2
uprv_getUTCtime()282 uprv_getUTCtime()
283 {
284 #if defined(U_DEBUG_FAKETIME)
285 return getUTCtime_fake(); /* Hook for overriding the clock */
286 #else
287 return uprv_getRawUTCtime();
288 #endif
289 }
290
291 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
292 U_CAPI UDate U_EXPORT2
uprv_getRawUTCtime()293 uprv_getRawUTCtime()
294 {
295 #if defined(XP_MAC)
296 time_t t, t1, t2;
297 struct tm tmrec;
298
299 uprv_memset( &tmrec, 0, sizeof(tmrec) );
300 tmrec.tm_year = 70;
301 tmrec.tm_mon = 0;
302 tmrec.tm_mday = 1;
303 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/
304
305 time(&t);
306 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
307 t2 = mktime(&tmrec); /* seconds of current GMT*/
308 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/
309 #elif defined(U_WINDOWS)
310
311 FileTimeConversion winTime;
312 GetSystemTimeAsFileTime(&winTime.fileTime);
313 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
314 #else
315
316 #if defined(HAVE_GETTIMEOFDAY)
317 struct timeval posixTime;
318 gettimeofday(&posixTime, NULL);
319 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
320 #else
321 time_t epochtime;
322 time(&epochtime);
323 return (UDate)epochtime * U_MILLIS_PER_SECOND;
324 #endif
325
326 #endif
327 }
328
329 /*-----------------------------------------------------------------------------
330 IEEE 754
331 These methods detect and return NaN and infinity values for doubles
332 conforming to IEEE 754. Platforms which support this standard include X86,
333 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
334 If this doesn't work on your platform, you have non-IEEE floating-point, and
335 will need to code your own versions. A naive implementation is to return 0.0
336 for getNaN and getInfinity, and false for isNaN and isInfinite.
337 ---------------------------------------------------------------------------*/
338
339 U_CAPI UBool U_EXPORT2
uprv_isNaN(double number)340 uprv_isNaN(double number)
341 {
342 #if IEEE_754
343 BitPatternConversion convertedNumber;
344 convertedNumber.d64 = number;
345 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
346 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
347
348 #elif defined(OS390)
349 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
350 sizeof(uint32_t));
351 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
352 sizeof(uint32_t));
353
354 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
355 (lowBits == 0x00000000L);
356
357 #else
358 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
359 /* you'll need to replace this default implementation with what's correct*/
360 /* for your platform.*/
361 return number != number;
362 #endif
363 }
364
365 U_CAPI UBool U_EXPORT2
uprv_isInfinite(double number)366 uprv_isInfinite(double number)
367 {
368 #if IEEE_754
369 BitPatternConversion convertedNumber;
370 convertedNumber.d64 = number;
371 /* Infinity is exactly 0x7FF0000000000000U. */
372 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
373 #elif defined(OS390)
374 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
375 sizeof(uint32_t));
376 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
377 sizeof(uint32_t));
378
379 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
380
381 #else
382 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
383 /* value, you'll need to replace this default implementation with what's*/
384 /* correct for your platform.*/
385 return number == (2.0 * number);
386 #endif
387 }
388
389 U_CAPI UBool U_EXPORT2
uprv_isPositiveInfinity(double number)390 uprv_isPositiveInfinity(double number)
391 {
392 #if IEEE_754 || defined(OS390)
393 return (UBool)(number > 0 && uprv_isInfinite(number));
394 #else
395 return uprv_isInfinite(number);
396 #endif
397 }
398
399 U_CAPI UBool U_EXPORT2
uprv_isNegativeInfinity(double number)400 uprv_isNegativeInfinity(double number)
401 {
402 #if IEEE_754 || defined(OS390)
403 return (UBool)(number < 0 && uprv_isInfinite(number));
404
405 #else
406 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
407 sizeof(uint32_t));
408 return((highBits & SIGN) && uprv_isInfinite(number));
409
410 #endif
411 }
412
413 U_CAPI double U_EXPORT2
uprv_getNaN()414 uprv_getNaN()
415 {
416 #if IEEE_754 || defined(OS390)
417 return gNan.d64;
418 #else
419 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
420 /* you'll need to replace this default implementation with what's correct*/
421 /* for your platform.*/
422 return 0.0;
423 #endif
424 }
425
426 U_CAPI double U_EXPORT2
uprv_getInfinity()427 uprv_getInfinity()
428 {
429 #if IEEE_754 || defined(OS390)
430 return gInf.d64;
431 #else
432 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
433 /* value, you'll need to replace this default implementation with what's*/
434 /* correct for your platform.*/
435 return 0.0;
436 #endif
437 }
438
439 U_CAPI double U_EXPORT2
uprv_floor(double x)440 uprv_floor(double x)
441 {
442 return floor(x);
443 }
444
445 U_CAPI double U_EXPORT2
uprv_ceil(double x)446 uprv_ceil(double x)
447 {
448 return ceil(x);
449 }
450
451 U_CAPI double U_EXPORT2
uprv_round(double x)452 uprv_round(double x)
453 {
454 return uprv_floor(x + 0.5);
455 }
456
457 U_CAPI double U_EXPORT2
uprv_fabs(double x)458 uprv_fabs(double x)
459 {
460 return fabs(x);
461 }
462
463 U_CAPI double U_EXPORT2
uprv_modf(double x,double * y)464 uprv_modf(double x, double* y)
465 {
466 return modf(x, y);
467 }
468
469 U_CAPI double U_EXPORT2
uprv_fmod(double x,double y)470 uprv_fmod(double x, double y)
471 {
472 return fmod(x, y);
473 }
474
475 U_CAPI double U_EXPORT2
uprv_pow(double x,double y)476 uprv_pow(double x, double y)
477 {
478 /* This is declared as "double pow(double x, double y)" */
479 return pow(x, y);
480 }
481
482 U_CAPI double U_EXPORT2
uprv_pow10(int32_t x)483 uprv_pow10(int32_t x)
484 {
485 return pow(10.0, (double)x);
486 }
487
488 U_CAPI double U_EXPORT2
uprv_fmax(double x,double y)489 uprv_fmax(double x, double y)
490 {
491 #if IEEE_754
492 /* first handle NaN*/
493 if(uprv_isNaN(x) || uprv_isNaN(y))
494 return uprv_getNaN();
495
496 /* check for -0 and 0*/
497 if(x == 0.0 && y == 0.0 && u_signBit(x))
498 return y;
499
500 #endif
501
502 /* this should work for all flt point w/o NaN and Inf special cases */
503 return (x > y ? x : y);
504 }
505
506 U_CAPI double U_EXPORT2
uprv_fmin(double x,double y)507 uprv_fmin(double x, double y)
508 {
509 #if IEEE_754
510 /* first handle NaN*/
511 if(uprv_isNaN(x) || uprv_isNaN(y))
512 return uprv_getNaN();
513
514 /* check for -0 and 0*/
515 if(x == 0.0 && y == 0.0 && u_signBit(y))
516 return y;
517
518 #endif
519
520 /* this should work for all flt point w/o NaN and Inf special cases */
521 return (x > y ? y : x);
522 }
523
524 /**
525 * Truncates the given double.
526 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
527 * This is different than calling floor() or ceil():
528 * floor(3.3) = 3, floor(-3.3) = -4
529 * ceil(3.3) = 4, ceil(-3.3) = -3
530 */
531 U_CAPI double U_EXPORT2
uprv_trunc(double d)532 uprv_trunc(double d)
533 {
534 #if IEEE_754
535 /* handle error cases*/
536 if(uprv_isNaN(d))
537 return uprv_getNaN();
538 if(uprv_isInfinite(d))
539 return uprv_getInfinity();
540
541 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
542 return ceil(d);
543 else
544 return floor(d);
545
546 #else
547 return d >= 0 ? floor(d) : ceil(d);
548
549 #endif
550 }
551
552 /**
553 * Return the largest positive number that can be represented by an integer
554 * type of arbitrary bit length.
555 */
556 U_CAPI double U_EXPORT2
uprv_maxMantissa(void)557 uprv_maxMantissa(void)
558 {
559 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
560 }
561
562 U_CAPI double U_EXPORT2
uprv_log(double d)563 uprv_log(double d)
564 {
565 return log(d);
566 }
567
568 U_CAPI void * U_EXPORT2
uprv_maximumPtr(void * base)569 uprv_maximumPtr(void * base)
570 {
571 #if defined(OS400)
572 /*
573 * With the provided function we should never be out of range of a given segment
574 * (a traditional/typical segment that is). Our segments have 5 bytes for the
575 * id and 3 bytes for the offset. The key is that the casting takes care of
576 * only retrieving the offset portion minus x1000. Hence, the smallest offset
577 * seen in a program is x001000 and when casted to an int would be 0.
578 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
579 *
580 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
581 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
582 * This function determines the activation based on the pointer that is passed in and
583 * calculates the appropriate maximum available size for
584 * each pointer type (TERASPACE and non-TERASPACE)
585 *
586 * Unlike other operating systems, the pointer model isn't determined at
587 * compile time on i5/OS.
588 */
589 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
590 /* if it is a TERASPACE pointer the max is 2GB - 4k */
591 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
592 }
593 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
594 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
595
596 #else
597 return U_MAX_PTR(base);
598 #endif
599 }
600
601 /*---------------------------------------------------------------------------
602 Platform-specific Implementations
603 Try these, and if they don't work on your platform, then special case your
604 platform with new implementations.
605 ---------------------------------------------------------------------------*/
606
607 /* Generic time zone layer -------------------------------------------------- */
608
609 /* Time zone utilities */
610 U_CAPI void U_EXPORT2
uprv_tzset()611 uprv_tzset()
612 {
613 #ifdef U_TZSET
614 U_TZSET();
615 #else
616 /* no initialization*/
617 #endif
618 }
619
620 U_CAPI int32_t U_EXPORT2
uprv_timezone()621 uprv_timezone()
622 {
623 #ifdef U_TIMEZONE
624 return U_TIMEZONE;
625 #else
626 time_t t, t1, t2;
627 struct tm tmrec;
628 #ifndef U_IOS
629 UBool dst_checked;
630 #endif
631 int32_t tdiff = 0;
632
633 time(&t);
634 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
635 #ifndef U_IOS
636 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
637 #endif
638 t1 = mktime(&tmrec); /* local time in seconds*/
639 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
640 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
641 tdiff = t2 - t1;
642 #ifndef U_IOS
643 /* On iOS the calculated tdiff is correct so and doesn't need this dst
644 shift applied. */
645 /* imitate NT behaviour, which returns same timezone offset to GMT for
646 winter and summer*/
647 if (dst_checked)
648 tdiff += 3600;
649 #endif
650 return tdiff;
651 #endif
652 }
653
654 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
655 some platforms need to have it declared here. */
656
657 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
658 /* RS6000 and others reject char **tzname. */
659 extern U_IMPORT char *U_TZNAME[];
660 #endif
661
662 #if !UCONFIG_NO_FILE_IO && ((defined(U_DARWIN) && !defined(U_IOS)) || defined(U_LINUX) || defined(U_BSD))
663 /* These platforms are likely to use Olson timezone IDs. */
664 #define CHECK_LOCALTIME_LINK 1
665 #if defined(U_DARWIN)
666 #include <tzfile.h>
667 #define TZZONEINFO (TZDIR "/")
668 #else
669 #define TZDEFAULT "/etc/localtime"
670 #define TZZONEINFO "/usr/share/zoneinfo/"
671 #endif
672 #if U_HAVE_DIRENT_H
673 #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
674 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
675 symlinked to /etc/localtime, which makes searchForTZFile return
676 'localtime' when it's the first match. */
677 #define TZFILE_SKIP2 "localtime"
678 #define SEARCH_TZFILE
679 #include <dirent.h> /* Needed to search through system timezone files */
680 #endif
681 static char gTimeZoneBuffer[PATH_MAX];
682 static char *gTimeZoneBufferPtr = NULL;
683 #endif
684
685 #ifndef U_WINDOWS
686 #define isNonDigit(ch) (ch < '0' || '9' < ch)
isValidOlsonID(const char * id)687 static UBool isValidOlsonID(const char *id) {
688 int32_t idx = 0;
689
690 /* Determine if this is something like Iceland (Olson ID)
691 or AST4ADT (non-Olson ID) */
692 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
693 idx++;
694 }
695
696 /* If we went through the whole string, then it might be okay.
697 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
698 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
699 The rest of the time it could be an Olson ID. George */
700 return (UBool)(id[idx] == 0
701 || uprv_strcmp(id, "PST8PDT") == 0
702 || uprv_strcmp(id, "MST7MDT") == 0
703 || uprv_strcmp(id, "CST6CDT") == 0
704 || uprv_strcmp(id, "EST5EDT") == 0);
705 }
706
707 /* On some Unix-like OS, 'posix' subdirectory in
708 /usr/share/zoneinfo replicates the top-level contents. 'right'
709 subdirectory has the same set of files, but individual files
710 are different from those in the top-level directory or 'posix'
711 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
712 has files for UTC.
713 When the first match for /etc/localtime is in either of them
714 (usually in posix because 'right' has different file contents),
715 or TZ environment variable points to one of them, createTimeZone
716 fails because, say, 'posix/America/New_York' is not an Olson
717 timezone id ('America/New_York' is). So, we have to skip
718 'posix/' and 'right/' at the beginning. */
skipZoneIDPrefix(const char ** id)719 static void skipZoneIDPrefix(const char** id) {
720 if (uprv_strncmp(*id, "posix/", 6) == 0
721 || uprv_strncmp(*id, "right/", 6) == 0)
722 {
723 *id += 6;
724 }
725 }
726 #endif
727
728 #if defined(U_TZNAME) && !defined(U_WINDOWS)
729
730 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
731 typedef struct OffsetZoneMapping {
732 int32_t offsetSeconds;
733 int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/
734 const char *stdID;
735 const char *dstID;
736 const char *olsonID;
737 } OffsetZoneMapping;
738
739 /*
740 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
741 and maps it to an Olson ID.
742 Before adding anything to this list, take a look at
743 icu/source/tools/tzcode/tz.alias
744 Sometimes no daylight savings (0) is important to define due to aliases.
745 This list can be tested with icu/source/test/compat/tzone.pl
746 More values could be added to daylightType to increase precision.
747 */
748 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
749 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
750 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
751 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
752 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
753 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
754 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
755 {-36000, 2, "EST", "EST", "Australia/Sydney"},
756 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
757 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
758 {-34200, 2, "CST", "CST", "Australia/South"},
759 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
760 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
761 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
762 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
763 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
764 {-28800, 2, "WST", "WST", "Australia/West"},
765 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
766 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
767 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
768 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
769 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
770 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
771 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
772 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
773 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
774 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
775 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
776 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
777 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
778 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
779 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
780 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
781 {0, 1, "GMT", "IST", "Europe/Dublin"},
782 {0, 1, "GMT", "BST", "Europe/London"},
783 {0, 0, "WET", "WEST", "Africa/Casablanca"},
784 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
785 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
786 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
787 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
788 {10800, 2, "UYT", "UYST", "America/Montevideo"},
789 {10800, 1, "WGT", "WGST", "America/Godthab"},
790 {10800, 2, "BRT", "BRST", "Brazil/East"},
791 {12600, 1, "NST", "NDT", "America/St_Johns"},
792 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
793 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
794 {14400, 2, "CLT", "CLST", "Chile/Continental"},
795 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
796 {14400, 2, "PYT", "PYST", "America/Asuncion"},
797 {18000, 1, "CST", "CDT", "America/Havana"},
798 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
799 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
800 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
801 {21600, 0, "CST", "CDT", "America/Guatemala"},
802 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
803 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
804 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
805 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
806 {32400, 1, "AKST", "AKDT", "US/Alaska"},
807 {36000, 1, "HAST", "HADT", "US/Aleutian"}
808 };
809
810 /*#define DEBUG_TZNAME*/
811
remapShortTimeZone(const char * stdID,const char * dstID,int32_t daylightType,int32_t offset)812 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
813 {
814 int32_t idx;
815 #ifdef DEBUG_TZNAME
816 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
817 #endif
818 for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++)
819 {
820 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
821 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
822 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
823 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
824 {
825 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
826 }
827 }
828 return NULL;
829 }
830 #endif
831
832 #ifdef SEARCH_TZFILE
833 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
834 #define MAX_READ_SIZE 512
835
836 typedef struct DefaultTZInfo {
837 char* defaultTZBuffer;
838 int64_t defaultTZFileSize;
839 FILE* defaultTZFilePtr;
840 UBool defaultTZstatus;
841 int32_t defaultTZPosition;
842 } DefaultTZInfo;
843
844 /*
845 * This method compares the two files given to see if they are a match.
846 * It is currently use to compare two TZ files.
847 */
compareBinaryFiles(const char * defaultTZFileName,const char * TZFileName,DefaultTZInfo * tzInfo)848 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
849 FILE* file;
850 int64_t sizeFile;
851 int64_t sizeFileLeft;
852 int32_t sizeFileRead;
853 int32_t sizeFileToRead;
854 char bufferFile[MAX_READ_SIZE];
855 UBool result = TRUE;
856
857 if (tzInfo->defaultTZFilePtr == NULL) {
858 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
859 }
860 file = fopen(TZFileName, "r");
861
862 tzInfo->defaultTZPosition = 0; /* reset position to begin search */
863
864 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
865 /* First check that the file size are equal. */
866 if (tzInfo->defaultTZFileSize == 0) {
867 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
868 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
869 }
870 fseek(file, 0, SEEK_END);
871 sizeFile = ftell(file);
872 sizeFileLeft = sizeFile;
873
874 if (sizeFile != tzInfo->defaultTZFileSize) {
875 result = FALSE;
876 } else {
877 /* Store the data from the files in seperate buffers and
878 * compare each byte to determine equality.
879 */
880 if (tzInfo->defaultTZBuffer == NULL) {
881 rewind(tzInfo->defaultTZFilePtr);
882 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
883 fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
884 }
885 rewind(file);
886 while(sizeFileLeft > 0) {
887 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
888 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
889
890 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
891 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
892 result = FALSE;
893 break;
894 }
895 sizeFileLeft -= sizeFileRead;
896 tzInfo->defaultTZPosition += sizeFileRead;
897 }
898 }
899 } else {
900 result = FALSE;
901 }
902
903 if (file != NULL) {
904 fclose(file);
905 }
906
907 return result;
908 }
909 /*
910 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
911 */
912 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
913 #define SKIP1 "."
914 #define SKIP2 ".."
915 static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
searchForTZFile(const char * path,DefaultTZInfo * tzInfo)916 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
917 char curpath[MAX_PATH_SIZE];
918 DIR* dirp = opendir(path);
919 DIR* subDirp = NULL;
920 struct dirent* dirEntry = NULL;
921
922 char* result = NULL;
923 if (dirp == NULL) {
924 return result;
925 }
926
927 /* Save the current path */
928 uprv_memset(curpath, 0, MAX_PATH_SIZE);
929 uprv_strcpy(curpath, path);
930
931 /* Check each entry in the directory. */
932 while((dirEntry = readdir(dirp)) != NULL) {
933 const char* dirName = dirEntry->d_name;
934 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
935 /* Create a newpath with the new entry to test each entry in the directory. */
936 char newpath[MAX_PATH_SIZE];
937 uprv_strcpy(newpath, curpath);
938 uprv_strcat(newpath, dirName);
939
940 if ((subDirp = opendir(newpath)) != NULL) {
941 /* If this new path is a directory, make a recursive call with the newpath. */
942 closedir(subDirp);
943 uprv_strcat(newpath, "/");
944 result = searchForTZFile(newpath, tzInfo);
945 /*
946 Have to get out here. Otherwise, we'd keep looking
947 and return the first match in the top-level directory
948 if there's a match in the top-level. If not, this function
949 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
950 It worked without this in most cases because we have a fallback of calling
951 localtime_r to figure out the default timezone.
952 */
953 if (result != NULL)
954 break;
955 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
956 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
957 const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
958 skipZoneIDPrefix(&zoneid);
959 uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
960 result = SEARCH_TZFILE_RESULT;
961 /* Get out after the first one found. */
962 break;
963 }
964 }
965 }
966 }
967 closedir(dirp);
968 return result;
969 }
970 #endif
971 U_CAPI const char* U_EXPORT2
uprv_tzname(int n)972 uprv_tzname(int n)
973 {
974 const char *tzid = NULL;
975 #ifdef U_WINDOWS
976 tzid = uprv_detectWindowsTimeZone();
977
978 if (tzid != NULL) {
979 return tzid;
980 }
981 #else
982
983 /*#if defined(U_DARWIN)
984 int ret;
985
986 tzid = getenv("TZFILE");
987 if (tzid != NULL) {
988 return tzid;
989 }
990 #endif*/
991
992 /* This code can be temporarily disabled to test tzname resolution later on. */
993 #ifndef DEBUG_TZNAME
994 tzid = getenv("TZ");
995 if (tzid != NULL && isValidOlsonID(tzid))
996 {
997 /* This might be a good Olson ID. */
998 skipZoneIDPrefix(&tzid);
999 return tzid;
1000 }
1001 /* else U_TZNAME will give a better result. */
1002 #endif
1003
1004 #if defined(CHECK_LOCALTIME_LINK)
1005 /* Caller must handle threading issues */
1006 if (gTimeZoneBufferPtr == NULL) {
1007 /*
1008 This is a trick to look at the name of the link to get the Olson ID
1009 because the tzfile contents is underspecified.
1010 This isn't guaranteed to work because it may not be a symlink.
1011 */
1012 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
1013 if (0 < ret) {
1014 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1015 gTimeZoneBuffer[ret] = 0;
1016 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1017 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1018 {
1019 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1020 }
1021 } else {
1022 #if defined(SEARCH_TZFILE)
1023 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1024 if (tzInfo != NULL) {
1025 tzInfo->defaultTZBuffer = NULL;
1026 tzInfo->defaultTZFileSize = 0;
1027 tzInfo->defaultTZFilePtr = NULL;
1028 tzInfo->defaultTZstatus = FALSE;
1029 tzInfo->defaultTZPosition = 0;
1030
1031 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1032
1033 /* Free previously allocated memory */
1034 if (tzInfo->defaultTZBuffer != NULL) {
1035 uprv_free(tzInfo->defaultTZBuffer);
1036 }
1037 if (tzInfo->defaultTZFilePtr != NULL) {
1038 fclose(tzInfo->defaultTZFilePtr);
1039 }
1040 uprv_free(tzInfo);
1041 }
1042
1043 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1044 return gTimeZoneBufferPtr;
1045 }
1046 #endif
1047 }
1048 }
1049 else {
1050 return gTimeZoneBufferPtr;
1051 }
1052 #endif
1053 #endif
1054
1055 #ifdef U_TZNAME
1056 #ifdef U_WINDOWS
1057 /* The return value is free'd in timezone.cpp on Windows because
1058 * the other code path returns a pointer to a heap location. */
1059 return uprv_strdup(U_TZNAME[n]);
1060 #else
1061 /*
1062 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1063 So we remap the abbreviation to an olson ID.
1064
1065 Since Windows exposes a little more timezone information,
1066 we normally don't use this code on Windows because
1067 uprv_detectWindowsTimeZone should have already given the correct answer.
1068 */
1069 {
1070 struct tm juneSol, decemberSol;
1071 int daylightType;
1072 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1073 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1074
1075 /* This probing will tell us when daylight savings occurs. */
1076 localtime_r(&juneSolstice, &juneSol);
1077 localtime_r(&decemberSolstice, &decemberSol);
1078 daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0);
1079 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1080 if (tzid != NULL) {
1081 return tzid;
1082 }
1083 }
1084 return U_TZNAME[n];
1085 #endif
1086 #else
1087 return "";
1088 #endif
1089 }
1090
1091 /* Get and set the ICU data directory --------------------------------------- */
1092
1093 static char *gDataDirectory = NULL;
1094 #if U_POSIX_LOCALE
1095 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1096 #endif
1097
putil_cleanup(void)1098 static UBool U_CALLCONV putil_cleanup(void)
1099 {
1100 if (gDataDirectory && *gDataDirectory) {
1101 uprv_free(gDataDirectory);
1102 }
1103 gDataDirectory = NULL;
1104 #if U_POSIX_LOCALE
1105 if (gCorrectedPOSIXLocale) {
1106 uprv_free(gCorrectedPOSIXLocale);
1107 gCorrectedPOSIXLocale = NULL;
1108 }
1109 #endif
1110 return TRUE;
1111 }
1112
1113 /*
1114 * Set the data directory.
1115 * Make a copy of the passed string, and set the global data dir to point to it.
1116 * TODO: see bug #2849, regarding thread safety.
1117 */
1118 U_CAPI void U_EXPORT2
u_setDataDirectory(const char * directory)1119 u_setDataDirectory(const char *directory) {
1120 char *newDataDir;
1121 int32_t length;
1122
1123 if(directory==NULL || *directory==0) {
1124 /* A small optimization to prevent the malloc and copy when the
1125 shared library is used, and this is a way to make sure that NULL
1126 is never returned.
1127 */
1128 newDataDir = (char *)"";
1129 }
1130 else {
1131 length=(int32_t)uprv_strlen(directory);
1132 newDataDir = (char *)uprv_malloc(length + 2);
1133 /* Exit out if newDataDir could not be created. */
1134 if (newDataDir == NULL) {
1135 return;
1136 }
1137 uprv_strcpy(newDataDir, directory);
1138
1139 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1140 {
1141 char *p;
1142 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1143 *p = U_FILE_SEP_CHAR;
1144 }
1145 }
1146 #endif
1147 }
1148
1149 umtx_lock(NULL);
1150 if (gDataDirectory && *gDataDirectory) {
1151 uprv_free(gDataDirectory);
1152 }
1153 gDataDirectory = newDataDir;
1154 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1155 umtx_unlock(NULL);
1156 }
1157
1158 U_CAPI UBool U_EXPORT2
uprv_pathIsAbsolute(const char * path)1159 uprv_pathIsAbsolute(const char *path)
1160 {
1161 if(!path || !*path) {
1162 return FALSE;
1163 }
1164
1165 if(*path == U_FILE_SEP_CHAR) {
1166 return TRUE;
1167 }
1168
1169 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1170 if(*path == U_FILE_ALT_SEP_CHAR) {
1171 return TRUE;
1172 }
1173 #endif
1174
1175 #if defined(U_WINDOWS)
1176 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1177 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1178 path[1] == ':' ) {
1179 return TRUE;
1180 }
1181 #endif
1182
1183 return FALSE;
1184 }
1185
1186 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1187 until some client wrapper makefiles are updated */
1188 #if defined(U_DARWIN) && TARGET_IPHONE_SIMULATOR
1189 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1190 # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1191 # endif
1192 #endif
1193
1194 U_CAPI const char * U_EXPORT2
u_getDataDirectory(void)1195 u_getDataDirectory(void) {
1196 const char *path = NULL;
1197 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1198 char datadir_path_buffer[PATH_MAX];
1199 #endif
1200
1201 /* if we have the directory, then return it immediately */
1202 UMTX_CHECK(NULL, gDataDirectory, path);
1203
1204 if(path) {
1205 return path;
1206 }
1207
1208 /*
1209 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1210 override ICU's data with the ICU_DATA environment variable. This prevents
1211 problems where multiple custom copies of ICU's specific version of data
1212 are installed on a system. Either the application must define the data
1213 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1214 ICU, set the data with udata_setCommonData or trust that all of the
1215 required data is contained in ICU's data library that contains
1216 the entry point defined by U_ICUDATA_ENTRY_POINT.
1217
1218 There may also be some platforms where environment variables
1219 are not allowed.
1220 */
1221 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1222 /* First try to get the environment variable */
1223 path=getenv("ICU_DATA");
1224 # endif
1225
1226 /* ICU_DATA_DIR may be set as a compile option.
1227 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1228 * and is used only when data is built in archive mode eliminating the need
1229 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1230 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1231 * set their own path.
1232 */
1233 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1234 if(path==NULL || *path==0) {
1235 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1236 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1237 # endif
1238 # ifdef ICU_DATA_DIR
1239 path=ICU_DATA_DIR;
1240 # else
1241 path=U_ICU_DATA_DEFAULT_DIR;
1242 # endif
1243 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1244 if (prefix != NULL) {
1245 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1246 path=datadir_path_buffer;
1247 }
1248 # endif
1249 }
1250 #endif
1251
1252 if(path==NULL) {
1253 /* It looks really bad, set it to something. */
1254 path = "";
1255 }
1256
1257 u_setDataDirectory(path);
1258 return gDataDirectory;
1259 }
1260
1261
1262
1263
1264
1265 /* Macintosh-specific locale information ------------------------------------ */
1266 #ifdef XP_MAC
1267
1268 typedef struct {
1269 int32_t script;
1270 int32_t region;
1271 int32_t lang;
1272 int32_t date_region;
1273 const char* posixID;
1274 } mac_lc_rec;
1275
1276 /* Todo: This will be updated with a newer version from www.unicode.org web
1277 page when it's available.*/
1278 #define MAC_LC_MAGIC_NUMBER -5
1279 #define MAC_LC_INIT_NUMBER -9
1280
1281 static const mac_lc_rec mac_lc_recs[] = {
1282 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1283 /* United States*/
1284 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1285 /* France*/
1286 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1287 /* Great Britain*/
1288 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1289 /* Germany*/
1290 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1291 /* Italy*/
1292 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1293 /* Metherlands*/
1294 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1295 /* French for Belgium or Lxembourg*/
1296 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1297 /* Sweden*/
1298 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1299 /* Denmark*/
1300 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1301 /* Portugal*/
1302 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1303 /* French Canada*/
1304 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1305 /* Israel*/
1306 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1307 /* Japan*/
1308 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1309 /* Australia*/
1310 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1311 /* the Arabic world (?)*/
1312 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1313 /* Finland*/
1314 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1315 /* French for Switzerland*/
1316 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1317 /* German for Switzerland*/
1318 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1319 /* Greece*/
1320 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1321 /* Iceland ===*/
1322 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1323 /* Malta ===*/
1324 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1325 /* Cyprus ===*/
1326 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1327 /* Turkey ===*/
1328 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1329 /* Croatian system for Yugoslavia*/
1330 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1331 /* Hindi system for India*/
1332 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1333 /* Pakistan*/
1334 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1335 /* Lithuania*/
1336 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1337 /* Poland*/
1338 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1339 /* Hungary*/
1340 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1341 /* Estonia*/
1342 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1343 /* Latvia*/
1344 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1345 /* Lapland [Ask Rich for the data. HS]*/
1346 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1347 /* Faeroe Islands*/
1348 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1349 /* Iran*/
1350 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1351 /* Russia*/
1352 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1353 /* Ireland*/
1354 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1355 /* Korea*/
1356 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1357 /* People's Republic of China*/
1358 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1359 /* Taiwan*/
1360 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1361 /* Thailand*/
1362
1363 /* fallback is en_US*/
1364 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1365 MAC_LC_MAGIC_NUMBER, "en_US"
1366 };
1367
1368 #endif
1369
1370 #if U_POSIX_LOCALE
1371 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1372 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1373 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1374 */
uprv_getPOSIXIDForCategory(int category)1375 static const char *uprv_getPOSIXIDForCategory(int category)
1376 {
1377 const char* posixID = NULL;
1378 if (category == LC_MESSAGES || category == LC_CTYPE) {
1379 /*
1380 * On Solaris two different calls to setlocale can result in
1381 * different values. Only get this value once.
1382 *
1383 * We must check this first because an application can set this.
1384 *
1385 * LC_ALL can't be used because it's platform dependent. The LANG
1386 * environment variable seems to affect LC_CTYPE variable by default.
1387 * Here is what setlocale(LC_ALL, NULL) can return.
1388 * HPUX can return 'C C C C C C C'
1389 * Solaris can return /en_US/C/C/C/C/C on the second try.
1390 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1391 *
1392 * The default codepage detection also needs to use LC_CTYPE.
1393 *
1394 * Do not call setlocale(LC_*, "")! Using an empty string instead
1395 * of NULL, will modify the libc behavior.
1396 */
1397 posixID = setlocale(category, NULL);
1398 if ((posixID == 0)
1399 || (uprv_strcmp("C", posixID) == 0)
1400 || (uprv_strcmp("POSIX", posixID) == 0))
1401 {
1402 /* Maybe we got some garbage. Try something more reasonable */
1403 posixID = getenv("LC_ALL");
1404 if (posixID == 0) {
1405 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1406 if (posixID == 0) {
1407 posixID = getenv("LANG");
1408 }
1409 }
1410 }
1411 }
1412 if ((posixID==0)
1413 || (uprv_strcmp("C", posixID) == 0)
1414 || (uprv_strcmp("POSIX", posixID) == 0))
1415 {
1416 /* Nothing worked. Give it a nice POSIX default value. */
1417 posixID = "en_US_POSIX";
1418 }
1419 return posixID;
1420 }
1421
1422 /* Return just the POSIX id for the default locale, whatever happens to be in
1423 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1424 */
uprv_getPOSIXIDForDefaultLocale(void)1425 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1426 {
1427 static const char* posixID = NULL;
1428 if (posixID == 0) {
1429 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1430 }
1431 return posixID;
1432 }
1433
1434 /* Return just the POSIX id for the default codepage, whatever happens to be in
1435 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1436 */
uprv_getPOSIXIDForDefaultCodepage(void)1437 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1438 {
1439 static const char* posixID = NULL;
1440 if (posixID == 0) {
1441 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1442 }
1443 return posixID;
1444 }
1445 #endif
1446
1447 /* NOTE: The caller should handle thread safety */
1448 U_CAPI const char* U_EXPORT2
uprv_getDefaultLocaleID()1449 uprv_getDefaultLocaleID()
1450 {
1451 #if U_POSIX_LOCALE
1452 /*
1453 Note that: (a '!' means the ID is improper somehow)
1454 LC_ALL ----> default_loc codepage
1455 --------------------------------------------------------
1456 ab.CD ab CD
1457 ab@CD ab__CD -
1458 ab@CD.EF ab__CD EF
1459
1460 ab_CD.EF@GH ab_CD_GH EF
1461
1462 Some 'improper' ways to do the same as above:
1463 ! ab_CD@GH.EF ab_CD_GH EF
1464 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1465 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1466
1467 _CD@GH _CD_GH -
1468 _CD.EF@GH _CD_GH EF
1469
1470 The variant cannot have dots in it.
1471 The 'rightmost' variant (@xxx) wins.
1472 The leftmost codepage (.xxx) wins.
1473 */
1474 char *correctedPOSIXLocale = 0;
1475 const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1476 const char *p;
1477 const char *q;
1478 int32_t len;
1479
1480 /* Format: (no spaces)
1481 ll [ _CC ] [ . MM ] [ @ VV]
1482
1483 l = lang, C = ctry, M = charmap, V = variant
1484 */
1485
1486 if (gCorrectedPOSIXLocale != NULL) {
1487 return gCorrectedPOSIXLocale;
1488 }
1489
1490 if ((p = uprv_strchr(posixID, '.')) != NULL) {
1491 /* assume new locale can't be larger than old one? */
1492 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1493 /* Exit on memory allocation error. */
1494 if (correctedPOSIXLocale == NULL) {
1495 return NULL;
1496 }
1497 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1498 correctedPOSIXLocale[p-posixID] = 0;
1499
1500 /* do not copy after the @ */
1501 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1502 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1503 }
1504 }
1505
1506 /* Note that we scan the *uncorrected* ID. */
1507 if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1508 if (correctedPOSIXLocale == NULL) {
1509 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1510 /* Exit on memory allocation error. */
1511 if (correctedPOSIXLocale == NULL) {
1512 return NULL;
1513 }
1514 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1515 correctedPOSIXLocale[p-posixID] = 0;
1516 }
1517 p++;
1518
1519 /* Take care of any special cases here.. */
1520 if (!uprv_strcmp(p, "nynorsk")) {
1521 p = "NY";
1522 /* Don't worry about no__NY. In practice, it won't appear. */
1523 }
1524
1525 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1526 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1527 }
1528 else {
1529 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1530 }
1531
1532 if ((q = uprv_strchr(p, '.')) != NULL) {
1533 /* How big will the resulting string be? */
1534 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1535 uprv_strncat(correctedPOSIXLocale, p, q-p);
1536 correctedPOSIXLocale[len] = 0;
1537 }
1538 else {
1539 /* Anything following the @ sign */
1540 uprv_strcat(correctedPOSIXLocale, p);
1541 }
1542
1543 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1544 * How about 'russian' -> 'ru'?
1545 * Many of the other locales using ISO codes will be handled by the
1546 * canonicalization functions in uloc_getDefault.
1547 */
1548 }
1549
1550 /* Was a correction made? */
1551 if (correctedPOSIXLocale != NULL) {
1552 posixID = correctedPOSIXLocale;
1553 }
1554 else {
1555 /* copy it, just in case the original pointer goes away. See j2395 */
1556 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1557 /* Exit on memory allocation error. */
1558 if (correctedPOSIXLocale == NULL) {
1559 return NULL;
1560 }
1561 posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1562 }
1563
1564 if (gCorrectedPOSIXLocale == NULL) {
1565 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1566 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1567 correctedPOSIXLocale = NULL;
1568 }
1569
1570 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
1571 uprv_free(correctedPOSIXLocale);
1572 }
1573
1574 return posixID;
1575
1576 #elif defined(U_WINDOWS)
1577 UErrorCode status = U_ZERO_ERROR;
1578 LCID id = GetThreadLocale();
1579 const char* locID = uprv_convertToPosix(id, &status);
1580
1581 if (U_FAILURE(status)) {
1582 locID = "en_US";
1583 }
1584 return locID;
1585
1586 #elif defined(XP_MAC)
1587 int32_t script = MAC_LC_INIT_NUMBER;
1588 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1589 int32_t region = MAC_LC_INIT_NUMBER;
1590 /* = GetScriptManagerVariable(smRegionCode);*/
1591 int32_t lang = MAC_LC_INIT_NUMBER;
1592 /* = GetScriptManagerVariable(smScriptLang);*/
1593 int32_t date_region = MAC_LC_INIT_NUMBER;
1594 const char* posixID = 0;
1595 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1596 int32_t i;
1597 Intl1Hndl ih;
1598
1599 ih = (Intl1Hndl) GetIntlResource(1);
1600 if (ih)
1601 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1602
1603 for (i = 0; i < count; i++) {
1604 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1605 || (mac_lc_recs[i].script == script))
1606 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1607 || (mac_lc_recs[i].region == region))
1608 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1609 || (mac_lc_recs[i].lang == lang))
1610 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1611 || (mac_lc_recs[i].date_region == date_region))
1612 )
1613 {
1614 posixID = mac_lc_recs[i].posixID;
1615 break;
1616 }
1617 }
1618
1619 return posixID;
1620
1621 #elif defined(OS400)
1622 /* locales are process scoped and are by definition thread safe */
1623 static char correctedLocale[64];
1624 const char *localeID = getenv("LC_ALL");
1625 char *p;
1626
1627 if (localeID == NULL)
1628 localeID = getenv("LANG");
1629 if (localeID == NULL)
1630 localeID = setlocale(LC_ALL, NULL);
1631 /* Make sure we have something... */
1632 if (localeID == NULL)
1633 return "en_US_POSIX";
1634
1635 /* Extract the locale name from the path. */
1636 if((p = uprv_strrchr(localeID, '/')) != NULL)
1637 {
1638 /* Increment p to start of locale name. */
1639 p++;
1640 localeID = p;
1641 }
1642
1643 /* Copy to work location. */
1644 uprv_strcpy(correctedLocale, localeID);
1645
1646 /* Strip off the '.locale' extension. */
1647 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1648 *p = 0;
1649 }
1650
1651 /* Upper case the locale name. */
1652 T_CString_toUpperCase(correctedLocale);
1653
1654 /* See if we are using the POSIX locale. Any of the
1655 * following are equivalent and use the same QLGPGCMA
1656 * (POSIX) locale.
1657 * QLGPGCMA2 means UCS2
1658 * QLGPGCMA_4 means UTF-32
1659 * QLGPGCMA_8 means UTF-8
1660 */
1661 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1662 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1663 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1664 {
1665 uprv_strcpy(correctedLocale, "en_US_POSIX");
1666 }
1667 else
1668 {
1669 int16_t LocaleLen;
1670
1671 /* Lower case the lang portion. */
1672 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1673 {
1674 *p = uprv_tolower(*p);
1675 }
1676
1677 /* Adjust for Euro. After '_E' add 'URO'. */
1678 LocaleLen = uprv_strlen(correctedLocale);
1679 if (correctedLocale[LocaleLen - 2] == '_' &&
1680 correctedLocale[LocaleLen - 1] == 'E')
1681 {
1682 uprv_strcat(correctedLocale, "URO");
1683 }
1684
1685 /* If using Lotus-based locale then convert to
1686 * equivalent non Lotus.
1687 */
1688 else if (correctedLocale[LocaleLen - 2] == '_' &&
1689 correctedLocale[LocaleLen - 1] == 'L')
1690 {
1691 correctedLocale[LocaleLen - 2] = 0;
1692 }
1693
1694 /* There are separate simplified and traditional
1695 * locales called zh_HK_S and zh_HK_T.
1696 */
1697 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1698 {
1699 uprv_strcpy(correctedLocale, "zh_HK");
1700 }
1701
1702 /* A special zh_CN_GBK locale...
1703 */
1704 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1705 {
1706 uprv_strcpy(correctedLocale, "zh_CN");
1707 }
1708
1709 }
1710
1711 return correctedLocale;
1712 #endif
1713
1714 }
1715
1716 #if !U_CHARSET_IS_UTF8
1717 #if U_POSIX_LOCALE
1718 /*
1719 Due to various platform differences, one platform may specify a charset,
1720 when they really mean a different charset. Remap the names so that they are
1721 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1722 here. Before adding anything to this function, please consider adding unique
1723 names to the ICU alias table in the data directory.
1724 */
1725 static const char*
remapPlatformDependentCodepage(const char * locale,const char * name)1726 remapPlatformDependentCodepage(const char *locale, const char *name) {
1727 if (locale != NULL && *locale == 0) {
1728 /* Make sure that an empty locale is handled the same way. */
1729 locale = NULL;
1730 }
1731 if (name == NULL) {
1732 return NULL;
1733 }
1734 #if defined(U_AIX)
1735 if (uprv_strcmp(name, "IBM-943") == 0) {
1736 /* Use the ASCII compatible ibm-943 */
1737 name = "Shift-JIS";
1738 }
1739 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1740 /* Use the windows-1252 that contains the Euro */
1741 name = "IBM-5348";
1742 }
1743 #elif defined(U_SOLARIS)
1744 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1745 /* Solaris underspecifies the "EUC" name. */
1746 if (uprv_strcmp(locale, "zh_CN") == 0) {
1747 name = "EUC-CN";
1748 }
1749 else if (uprv_strcmp(locale, "zh_TW") == 0) {
1750 name = "EUC-TW";
1751 }
1752 else if (uprv_strcmp(locale, "ko_KR") == 0) {
1753 name = "EUC-KR";
1754 }
1755 }
1756 else if (uprv_strcmp(name, "eucJP") == 0) {
1757 /*
1758 ibm-954 is the best match.
1759 ibm-33722 is the default for eucJP (similar to Windows).
1760 */
1761 name = "eucjis";
1762 }
1763 else if (uprv_strcmp(name, "646") == 0) {
1764 /*
1765 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1766 * ISO-8859-1 instead of US-ASCII(646).
1767 */
1768 name = "ISO-8859-1";
1769 }
1770 #elif defined(U_DARWIN)
1771 if (locale == NULL && *name == 0) {
1772 /*
1773 No locale was specified, and an empty name was passed in.
1774 This usually indicates that nl_langinfo didn't return valid information.
1775 Mac OS X uses UTF-8 by default (especially the locale data and console).
1776 */
1777 name = "UTF-8";
1778 }
1779 else if (uprv_strcmp(name, "CP949") == 0) {
1780 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1781 name = "EUC-KR";
1782 }
1783 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1784 /*
1785 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1786 */
1787 name = "UTF-8";
1788 }
1789 #elif defined(U_BSD)
1790 if (uprv_strcmp(name, "CP949") == 0) {
1791 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1792 name = "EUC-KR";
1793 }
1794 #elif defined(U_HPUX)
1795 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1796 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1797 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1798 name = "hkbig5";
1799 }
1800 else if (uprv_strcmp(name, "eucJP") == 0) {
1801 /*
1802 ibm-1350 is the best match, but unavailable.
1803 ibm-954 is mostly a superset of ibm-1350.
1804 ibm-33722 is the default for eucJP (similar to Windows).
1805 */
1806 name = "eucjis";
1807 }
1808 #elif defined(U_LINUX)
1809 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1810 /* Linux underspecifies the "EUC" name. */
1811 if (uprv_strcmp(locale, "korean") == 0) {
1812 name = "EUC-KR";
1813 }
1814 else if (uprv_strcmp(locale, "japanese") == 0) {
1815 /* See comment below about eucJP */
1816 name = "eucjis";
1817 }
1818 }
1819 else if (uprv_strcmp(name, "eucjp") == 0) {
1820 /*
1821 ibm-1350 is the best match, but unavailable.
1822 ibm-954 is mostly a superset of ibm-1350.
1823 ibm-33722 is the default for eucJP (similar to Windows).
1824 */
1825 name = "eucjis";
1826 }
1827 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1828 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1829 /*
1830 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1831 */
1832 name = "UTF-8";
1833 }
1834 /*
1835 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1836 * it by falling back to 'US-ASCII' when NULL is returned from this
1837 * function. So, we don't have to worry about it here.
1838 */
1839 #endif
1840 /* return NULL when "" is passed in */
1841 if (*name == 0) {
1842 name = NULL;
1843 }
1844 return name;
1845 }
1846
1847 static const char*
getCodepageFromPOSIXID(const char * localeName,char * buffer,int32_t buffCapacity)1848 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1849 {
1850 char localeBuf[100];
1851 const char *name = NULL;
1852 char *variant = NULL;
1853
1854 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1855 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1856 uprv_strncpy(localeBuf, localeName, localeCapacity);
1857 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1858 name = uprv_strncpy(buffer, name+1, buffCapacity);
1859 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1860 if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1861 *variant = 0;
1862 }
1863 name = remapPlatformDependentCodepage(localeBuf, name);
1864 }
1865 return name;
1866 }
1867 #endif
1868
1869 static const char*
int_getDefaultCodepage()1870 int_getDefaultCodepage()
1871 {
1872 #if defined(OS400)
1873 uint32_t ccsid = 37; /* Default to ibm-37 */
1874 static char codepage[64];
1875 Qwc_JOBI0400_t jobinfo;
1876 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1877
1878 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1879 "* ", " ", &error);
1880
1881 if (error.Bytes_Available == 0) {
1882 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1883 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1884 }
1885 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1886 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1887 }
1888 /* else use the default */
1889 }
1890 sprintf(codepage,"ibm-%d", ccsid);
1891 return codepage;
1892
1893 #elif defined(OS390)
1894 static char codepage[64];
1895
1896 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1897 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1898 codepage[63] = 0; /* NULL terminate */
1899
1900 return codepage;
1901
1902 #elif defined(XP_MAC)
1903 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1904
1905 #elif defined(U_WINDOWS)
1906 static char codepage[64];
1907 sprintf(codepage, "windows-%d", GetACP());
1908 return codepage;
1909
1910 #elif U_POSIX_LOCALE
1911 static char codesetName[100];
1912 const char *localeName = NULL;
1913 const char *name = NULL;
1914
1915 localeName = uprv_getPOSIXIDForDefaultCodepage();
1916 uprv_memset(codesetName, 0, sizeof(codesetName));
1917 #if U_HAVE_NL_LANGINFO_CODESET
1918 /* When available, check nl_langinfo first because it usually gives more
1919 useful names. It depends on LC_CTYPE.
1920 nl_langinfo may use the same buffer as setlocale. */
1921 {
1922 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1923 #if defined(U_DARWIN) || defined(U_LINUX)
1924 /*
1925 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1926 * instead of ASCII.
1927 */
1928 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1929 codeset = remapPlatformDependentCodepage(localeName, codeset);
1930 } else
1931 #endif
1932 {
1933 codeset = remapPlatformDependentCodepage(NULL, codeset);
1934 }
1935
1936 if (codeset != NULL) {
1937 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1938 codesetName[sizeof(codesetName)-1] = 0;
1939 return codesetName;
1940 }
1941 }
1942 #endif
1943
1944 /* Use setlocale in a nice way, and then check some environment variables.
1945 Maybe the application used setlocale already.
1946 */
1947 uprv_memset(codesetName, 0, sizeof(codesetName));
1948 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1949 if (name) {
1950 /* if we can find the codeset name from setlocale, return that. */
1951 return name;
1952 }
1953
1954 if (*codesetName == 0)
1955 {
1956 /* Everything failed. Return US ASCII (ISO 646). */
1957 (void)uprv_strcpy(codesetName, "US-ASCII");
1958 }
1959 return codesetName;
1960 #else
1961 return "US-ASCII";
1962 #endif
1963 }
1964
1965
1966 U_CAPI const char* U_EXPORT2
uprv_getDefaultCodepage()1967 uprv_getDefaultCodepage()
1968 {
1969 static char const *name = NULL;
1970 umtx_lock(NULL);
1971 if (name == NULL) {
1972 name = int_getDefaultCodepage();
1973 }
1974 umtx_unlock(NULL);
1975 return name;
1976 }
1977 #endif /* !U_CHARSET_IS_UTF8 */
1978
1979
1980 /* end of platform-specific implementation -------------- */
1981
1982 /* version handling --------------------------------------------------------- */
1983
1984 U_CAPI void U_EXPORT2
u_versionFromString(UVersionInfo versionArray,const char * versionString)1985 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1986 char *end;
1987 uint16_t part=0;
1988
1989 if(versionArray==NULL) {
1990 return;
1991 }
1992
1993 if(versionString!=NULL) {
1994 for(;;) {
1995 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1996 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1997 break;
1998 }
1999 versionString=end+1;
2000 }
2001 }
2002
2003 while(part<U_MAX_VERSION_LENGTH) {
2004 versionArray[part++]=0;
2005 }
2006 }
2007
2008 U_CAPI void U_EXPORT2
u_versionFromUString(UVersionInfo versionArray,const UChar * versionString)2009 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2010 if(versionArray!=NULL && versionString!=NULL) {
2011 char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2012 int32_t len = u_strlen(versionString);
2013 if(len>U_MAX_VERSION_STRING_LENGTH) {
2014 len = U_MAX_VERSION_STRING_LENGTH;
2015 }
2016 u_UCharsToChars(versionString, versionChars, len);
2017 versionChars[len]=0;
2018 u_versionFromString(versionArray, versionChars);
2019 }
2020 }
2021
2022 U_CAPI void U_EXPORT2
u_versionToString(UVersionInfo versionArray,char * versionString)2023 u_versionToString(UVersionInfo versionArray, char *versionString) {
2024 uint16_t count, part;
2025 uint8_t field;
2026
2027 if(versionString==NULL) {
2028 return;
2029 }
2030
2031 if(versionArray==NULL) {
2032 versionString[0]=0;
2033 return;
2034 }
2035
2036 /* count how many fields need to be written */
2037 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2038 }
2039
2040 if(count <= 1) {
2041 count = 2;
2042 }
2043
2044 /* write the first part */
2045 /* write the decimal field value */
2046 field=versionArray[0];
2047 if(field>=100) {
2048 *versionString++=(char)('0'+field/100);
2049 field%=100;
2050 }
2051 if(field>=10) {
2052 *versionString++=(char)('0'+field/10);
2053 field%=10;
2054 }
2055 *versionString++=(char)('0'+field);
2056
2057 /* write the following parts */
2058 for(part=1; part<count; ++part) {
2059 /* write a dot first */
2060 *versionString++=U_VERSION_DELIMITER;
2061
2062 /* write the decimal field value */
2063 field=versionArray[part];
2064 if(field>=100) {
2065 *versionString++=(char)('0'+field/100);
2066 field%=100;
2067 }
2068 if(field>=10) {
2069 *versionString++=(char)('0'+field/10);
2070 field%=10;
2071 }
2072 *versionString++=(char)('0'+field);
2073 }
2074
2075 /* NUL-terminate */
2076 *versionString=0;
2077 }
2078
2079 U_CAPI void U_EXPORT2
u_getVersion(UVersionInfo versionArray)2080 u_getVersion(UVersionInfo versionArray) {
2081 u_versionFromString(versionArray, U_ICU_VERSION);
2082 }
2083
2084 /**
2085 * icucfg.h dependent code
2086 */
2087
2088 #if U_ENABLE_DYLOAD
2089
2090 #if defined(U_CHECK_DYLOAD)
2091
2092 #if defined(HAVE_DLOPEN)
2093
2094 #ifdef HAVE_DLFCN_H
2095 #ifdef __MVS__
2096 #ifndef __SUSV3
2097 #define __SUSV3 1
2098 #endif
2099 #endif
2100 #include <dlfcn.h>
2101 #endif
2102
2103 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2104 uprv_dl_open(const char *libName, UErrorCode *status) {
2105 void *ret = NULL;
2106 if(U_FAILURE(*status)) return ret;
2107 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2108 if(ret==NULL) {
2109 #ifndef U_TRACE_DYLOAD
2110 perror("dlopen");
2111 #endif
2112 *status = U_MISSING_RESOURCE_ERROR;
2113 }
2114 return ret;
2115 }
2116
2117 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2118 uprv_dl_close(void *lib, UErrorCode *status) {
2119 if(U_FAILURE(*status)) return;
2120 dlclose(lib);
2121 }
2122
2123 U_INTERNAL void* U_EXPORT2
uprv_dl_sym(void * lib,const char * sym,UErrorCode * status)2124 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2125 void *ret = NULL;
2126 if(U_FAILURE(*status)) return ret;
2127 ret = dlsym(lib, sym);
2128 if(ret == NULL) {
2129 *status = U_MISSING_RESOURCE_ERROR;
2130 }
2131 return ret;
2132 }
2133
2134 #else
2135
2136 /* null (nonexistent) implementation. */
2137
2138 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2139 uprv_dl_open(const char *libName, UErrorCode *status) {
2140 if(U_FAILURE(*status)) return NULL;
2141 *status = U_UNSUPPORTED_ERROR;
2142 return NULL;
2143 }
2144
2145 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2146 uprv_dl_close(void *lib, UErrorCode *status) {
2147 if(U_FAILURE(*status)) return;
2148 *status = U_UNSUPPORTED_ERROR;
2149 return;
2150 }
2151
2152
2153 U_INTERNAL void* U_EXPORT2
uprv_dl_sym(void * lib,const char * sym,UErrorCode * status)2154 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2155 if(U_FAILURE(*status)) return NULL;
2156 *status = U_UNSUPPORTED_ERROR;
2157 return NULL;
2158 }
2159
2160
2161
2162 #endif
2163
2164 #elif defined U_WINDOWS
2165
2166 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2167 uprv_dl_open(const char *libName, UErrorCode *status) {
2168 HMODULE lib = NULL;
2169
2170 if(U_FAILURE(*status)) return NULL;
2171
2172 lib = LoadLibraryA(libName);
2173
2174 if(lib==NULL) {
2175 *status = U_MISSING_RESOURCE_ERROR;
2176 }
2177
2178 return (void*)lib;
2179 }
2180
2181 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2182 uprv_dl_close(void *lib, UErrorCode *status) {
2183 HMODULE handle = (HMODULE)lib;
2184 if(U_FAILURE(*status)) return;
2185
2186 FreeLibrary(handle);
2187
2188 return;
2189 }
2190
2191
2192 U_INTERNAL void* U_EXPORT2
uprv_dl_sym(void * lib,const char * sym,UErrorCode * status)2193 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2194 HMODULE handle = (HMODULE)lib;
2195 void * addr = NULL;
2196
2197 if(U_FAILURE(*status) || lib==NULL) return NULL;
2198
2199 addr = GetProcAddress(handle, sym);
2200
2201 if(addr==NULL) {
2202 DWORD lastError = GetLastError();
2203 if(lastError == ERROR_PROC_NOT_FOUND) {
2204 *status = U_MISSING_RESOURCE_ERROR;
2205 } else {
2206 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2207 }
2208 }
2209
2210 return addr;
2211 }
2212
2213
2214 #else
2215
2216 /* No dynamic loading set. */
2217
2218 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2219 uprv_dl_open(const char *libName, UErrorCode *status) {
2220 if(U_FAILURE(*status)) return NULL;
2221 *status = U_UNSUPPORTED_ERROR;
2222 return NULL;
2223 }
2224
2225 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2226 uprv_dl_close(void *lib, UErrorCode *status) {
2227 if(U_FAILURE(*status)) return;
2228 *status = U_UNSUPPORTED_ERROR;
2229 return;
2230 }
2231
2232
2233 U_INTERNAL void* U_EXPORT2
uprv_dl_sym(void * lib,const char * sym,UErrorCode * status)2234 uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2235 if(U_FAILURE(*status)) return NULL;
2236 *status = U_UNSUPPORTED_ERROR;
2237 return NULL;
2238 }
2239
2240
2241 #endif
2242
2243 #endif /* U_ENABLE_DYLOAD */
2244
2245 /*
2246 * Hey, Emacs, please set the following:
2247 *
2248 * Local Variables:
2249 * indent-tabs-mode: nil
2250 * End:
2251 *
2252 */
2253