• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 *
4 *   Copyright (C) 1997-2007, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 *  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10 *
11 *   Date        Name        Description
12 *   04/14/97    aliu        Creation.
13 *   04/24/97    aliu        Added getDefaultDataDirectory() and
14 *                            getDefaultLocaleID().
15 *   04/28/97    aliu        Rewritten to assume Unix and apply general methods
16 *                            for assumed case.  Non-UNIX platforms must be
17 *                            special-cased.  Rewrote numeric methods dealing
18 *                            with NaN and Infinity to be platform independent
19 *                             over all IEEE 754 platforms.
20 *   05/13/97    aliu        Restored sign of timezone
21 *                            (semantics are hours West of GMT)
22 *   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23 *                             nextDouble..
24 *   07/22/98    stephen     Added remainder, max, min, trunc
25 *   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
26 *   08/24/98    stephen     Added longBitsFromDouble
27 *   09/08/98    stephen     Minor changes for Mac Port
28 *   03/02/99    stephen     Removed openFile().  Added AS400 support.
29 *                            Fixed EBCDIC tables
30 *   04/15/99    stephen     Converted to C.
31 *   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
32 *   08/04/99    jeffrey R.  Added OS/2 changes
33 *   11/15/99    helena      Integrated S/390 IEEE support.
34 *   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
35 *   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
36 ******************************************************************************
37 */
38 
39 /* Define _XOPEN_SOURCE for Solaris and friends. */
40 /* NetBSD needs it to be >= 4 */
41 #if !defined(_XOPEN_SOURCE)
42 #if __STDC_VERSION__ >= 199901L
43 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */
44 #define _XOPEN_SOURCE 600
45 #else
46 #define _XOPEN_SOURCE 4
47 #endif
48 #endif
49 
50 /* Make sure things like readlink and such functions work.
51 Poorly upgraded Solaris machines can't have this defined.
52 Cleanly installed Solaris can use this #define.
53 */
54 #if !defined(_XOPEN_SOURCE_EXTENDED) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L)
55 #define _XOPEN_SOURCE_EXTENDED 1
56 #endif
57 
58 /* include ICU headers */
59 #include "unicode/utypes.h"
60 #include "unicode/putil.h"
61 #include "unicode/ustring.h"
62 #include "putilimp.h"
63 #include "uassert.h"
64 #include "umutex.h"
65 #include "cmemory.h"
66 #include "cstring.h"
67 #include "locmap.h"
68 #include "ucln_cmn.h"
69 
70 /* Include standard headers. */
71 #include <stdio.h>
72 #include <stdlib.h>
73 #include <string.h>
74 #include <math.h>
75 #include <locale.h>
76 #include <float.h>
77 #include <time.h>
78 
79 /* include system headers */
80 #ifdef U_WINDOWS
81 #   define WIN32_LEAN_AND_MEAN
82 #   define VC_EXTRALEAN
83 #   define NOUSER
84 #   define NOSERVICE
85 #   define NOIME
86 #   define NOMCX
87 #   include <windows.h>
88 #   include "wintz.h"
89 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
90 /* tzset isn't defined in strict ANSI on Cygwin. */
91 #   undef __STRICT_ANSI__
92 #elif defined(OS400)
93 #   include <float.h>
94 #   include <qusec.h>       /* error code structure */
95 #   include <qusrjobi.h>
96 #   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
97 #   include <mih/testptr.h> /* For uprv_maximumPtr */
98 #elif defined(XP_MAC)
99 #   include <Files.h>
100 #   include <IntlResources.h>
101 #   include <Script.h>
102 #   include <Folders.h>
103 #   include <MacTypes.h>
104 #   include <TextUtils.h>
105 #   define ICU_NO_USER_DATA_OVERRIDE 1
106 #elif defined(OS390)
107 #include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
108 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
109 #include <limits.h>
110 #include <unistd.h>
111 #elif defined(U_QNX)
112 #include <sys/neutrino.h>
113 #endif
114 
115 #ifndef U_WINDOWS
116 #include <sys/time.h>
117 #endif
118 
119 /*
120  * Only include langinfo.h if we have a way to get the codeset. If we later
121  * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
122  *
123  */
124 
125 #if U_HAVE_NL_LANGINFO_CODESET
126 #include <langinfo.h>
127 #endif
128 
129 /* Define the extension for data files, again... */
130 #define DATA_TYPE "dat"
131 
132 /* Leave this copyright notice here! */
133 static const char copyright[] = U_COPYRIGHT_STRING;
134 
135 /* floating point implementations ------------------------------------------- */
136 
137 /* We return QNAN rather than SNAN*/
138 #define SIGN 0x80000000U
139 
140 /* Make it easy to define certain types of constants */
141 typedef union {
142     int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
143     double d64;
144 } BitPatternConversion;
145 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
146 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
147 
148 /*---------------------------------------------------------------------------
149   Platform utilities
150   Our general strategy is to assume we're on a POSIX platform.  Platforms which
151   are non-POSIX must declare themselves so.  The default POSIX implementation
152   will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
153   functions).
154   ---------------------------------------------------------------------------*/
155 
156 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
157 #   undef U_POSIX_LOCALE
158 #else
159 #   define U_POSIX_LOCALE    1
160 #endif
161 
162 /*
163     WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
164     can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
165 */
166 #if !IEEE_754
167 static char*
u_topNBytesOfDouble(double * d,int n)168 u_topNBytesOfDouble(double* d, int n)
169 {
170 #if U_IS_BIG_ENDIAN
171     return (char*)d;
172 #else
173     return (char*)(d + 1) - n;
174 #endif
175 }
176 #endif
177 
178 static char*
u_bottomNBytesOfDouble(double * d,int n)179 u_bottomNBytesOfDouble(double* d, int n)
180 {
181 #if U_IS_BIG_ENDIAN
182     return (char*)(d + 1) - n;
183 #else
184     return (char*)d;
185 #endif
186 }
187 
188 #if defined(U_WINDOWS)
189 typedef union {
190     int64_t int64;
191     FILETIME fileTime;
192 } FileTimeConversion;   /* This is like a ULARGE_INTEGER */
193 
194 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
195 #define EPOCH_BIAS  INT64_C(116444736000000000)
196 #define HECTONANOSECOND_PER_MILLISECOND   10000
197 
198 #endif
199 
200 /*---------------------------------------------------------------------------
201   Universal Implementations
202   These are designed to work on all platforms.  Try these, and if they
203   don't work on your platform, then special case your platform with new
204   implementations.
205 ---------------------------------------------------------------------------*/
206 
207 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
208 U_CAPI UDate U_EXPORT2
uprv_getUTCtime()209 uprv_getUTCtime()
210 {
211 #ifdef XP_MAC
212     time_t t, t1, t2;
213     struct tm tmrec;
214 
215     uprv_memset( &tmrec, 0, sizeof(tmrec) );
216     tmrec.tm_year = 70;
217     tmrec.tm_mon = 0;
218     tmrec.tm_mday = 1;
219     t1 = mktime(&tmrec);    /* seconds of 1/1/1970*/
220 
221     time(&t);
222     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
223     t2 = mktime(&tmrec);    /* seconds of current GMT*/
224     return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND;         /* GMT (or UTC) in seconds since 1970*/
225 #elif defined(U_WINDOWS)
226 
227     FileTimeConversion winTime;
228     GetSystemTimeAsFileTime(&winTime.fileTime);
229     return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
230 #else
231 /*
232     struct timeval posixTime;
233     gettimeofday(&posixTime, NULL);
234     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
235 */
236     time_t epochtime;
237     time(&epochtime);
238     return (UDate)epochtime * U_MILLIS_PER_SECOND;
239 #endif
240 }
241 
242 /*-----------------------------------------------------------------------------
243   IEEE 754
244   These methods detect and return NaN and infinity values for doubles
245   conforming to IEEE 754.  Platforms which support this standard include X86,
246   Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
247   If this doesn't work on your platform, you have non-IEEE floating-point, and
248   will need to code your own versions.  A naive implementation is to return 0.0
249   for getNaN and getInfinity, and false for isNaN and isInfinite.
250   ---------------------------------------------------------------------------*/
251 
252 U_CAPI UBool U_EXPORT2
uprv_isNaN(double number)253 uprv_isNaN(double number)
254 {
255 #if IEEE_754
256     BitPatternConversion convertedNumber;
257     convertedNumber.d64 = number;
258     /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
259     return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
260 
261 #elif defined(OS390)
262     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
263                         sizeof(uint32_t));
264     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
265                         sizeof(uint32_t));
266 
267     return ((highBits & 0x7F080000L) == 0x7F080000L) &&
268       (lowBits == 0x00000000L);
269 
270 #else
271     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
272     /* you'll need to replace this default implementation with what's correct*/
273     /* for your platform.*/
274     return number != number;
275 #endif
276 }
277 
278 U_CAPI UBool U_EXPORT2
uprv_isInfinite(double number)279 uprv_isInfinite(double number)
280 {
281 #if IEEE_754
282     BitPatternConversion convertedNumber;
283     convertedNumber.d64 = number;
284     /* Infinity is exactly 0x7FF0000000000000U. */
285     return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
286 #elif defined(OS390)
287     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
288                         sizeof(uint32_t));
289     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
290                         sizeof(uint32_t));
291 
292     return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
293 
294 #else
295     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
296     /* value, you'll need to replace this default implementation with what's*/
297     /* correct for your platform.*/
298     return number == (2.0 * number);
299 #endif
300 }
301 
302 U_CAPI UBool U_EXPORT2
uprv_isPositiveInfinity(double number)303 uprv_isPositiveInfinity(double number)
304 {
305 #if IEEE_754 || defined(OS390)
306     return (UBool)(number > 0 && uprv_isInfinite(number));
307 #else
308     return uprv_isInfinite(number);
309 #endif
310 }
311 
312 U_CAPI UBool U_EXPORT2
uprv_isNegativeInfinity(double number)313 uprv_isNegativeInfinity(double number)
314 {
315 #if IEEE_754 || defined(OS390)
316     return (UBool)(number < 0 && uprv_isInfinite(number));
317 
318 #else
319     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
320                         sizeof(uint32_t));
321     return((highBits & SIGN) && uprv_isInfinite(number));
322 
323 #endif
324 }
325 
326 U_CAPI double U_EXPORT2
uprv_getNaN()327 uprv_getNaN()
328 {
329 #if IEEE_754 || defined(OS390)
330     return gNan.d64;
331 #else
332     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
333     /* you'll need to replace this default implementation with what's correct*/
334     /* for your platform.*/
335     return 0.0;
336 #endif
337 }
338 
339 U_CAPI double U_EXPORT2
uprv_getInfinity()340 uprv_getInfinity()
341 {
342 #if IEEE_754 || defined(OS390)
343     return gInf.d64;
344 #else
345     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
346     /* value, you'll need to replace this default implementation with what's*/
347     /* correct for your platform.*/
348     return 0.0;
349 #endif
350 }
351 
352 U_CAPI double U_EXPORT2
uprv_floor(double x)353 uprv_floor(double x)
354 {
355     return floor(x);
356 }
357 
358 U_CAPI double U_EXPORT2
uprv_ceil(double x)359 uprv_ceil(double x)
360 {
361     return ceil(x);
362 }
363 
364 U_CAPI double U_EXPORT2
uprv_round(double x)365 uprv_round(double x)
366 {
367     return uprv_floor(x + 0.5);
368 }
369 
370 U_CAPI double U_EXPORT2
uprv_fabs(double x)371 uprv_fabs(double x)
372 {
373     return fabs(x);
374 }
375 
376 U_CAPI double U_EXPORT2
uprv_modf(double x,double * y)377 uprv_modf(double x, double* y)
378 {
379     return modf(x, y);
380 }
381 
382 U_CAPI double U_EXPORT2
uprv_fmod(double x,double y)383 uprv_fmod(double x, double y)
384 {
385     return fmod(x, y);
386 }
387 
388 U_CAPI double U_EXPORT2
uprv_pow(double x,double y)389 uprv_pow(double x, double y)
390 {
391     /* This is declared as "double pow(double x, double y)" */
392     return pow(x, y);
393 }
394 
395 U_CAPI double U_EXPORT2
uprv_pow10(int32_t x)396 uprv_pow10(int32_t x)
397 {
398     return pow(10.0, (double)x);
399 }
400 
401 U_CAPI double U_EXPORT2
uprv_fmax(double x,double y)402 uprv_fmax(double x, double y)
403 {
404 #if IEEE_754
405     int32_t lowBits;
406 
407     /* first handle NaN*/
408     if(uprv_isNaN(x) || uprv_isNaN(y))
409         return uprv_getNaN();
410 
411     /* check for -0 and 0*/
412     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
413     if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
414         return y;
415 
416 #endif
417 
418     /* this should work for all flt point w/o NaN and Infpecial cases */
419     return (x > y ? x : y);
420 }
421 
422 U_CAPI double U_EXPORT2
uprv_fmin(double x,double y)423 uprv_fmin(double x, double y)
424 {
425 #if IEEE_754
426     int32_t lowBits;
427 
428     /* first handle NaN*/
429     if(uprv_isNaN(x) || uprv_isNaN(y))
430         return uprv_getNaN();
431 
432     /* check for -0 and 0*/
433     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
434     if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
435         return y;
436 
437 #endif
438 
439     /* this should work for all flt point w/o NaN and Inf special cases */
440     return (x > y ? y : x);
441 }
442 
443 /**
444  * Truncates the given double.
445  * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
446  * This is different than calling floor() or ceil():
447  * floor(3.3) = 3, floor(-3.3) = -4
448  * ceil(3.3) = 4, ceil(-3.3) = -3
449  */
450 U_CAPI double U_EXPORT2
uprv_trunc(double d)451 uprv_trunc(double d)
452 {
453 #if IEEE_754
454     int32_t lowBits;
455 
456     /* handle error cases*/
457     if(uprv_isNaN(d))
458         return uprv_getNaN();
459     if(uprv_isInfinite(d))
460         return uprv_getInfinity();
461 
462     lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
463     if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
464         return ceil(d);
465     else
466         return floor(d);
467 
468 #else
469     return d >= 0 ? floor(d) : ceil(d);
470 
471 #endif
472 }
473 
474 /**
475  * Return the largest positive number that can be represented by an integer
476  * type of arbitrary bit length.
477  */
478 U_CAPI double U_EXPORT2
uprv_maxMantissa(void)479 uprv_maxMantissa(void)
480 {
481     return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
482 }
483 
484 U_CAPI double U_EXPORT2
uprv_log(double d)485 uprv_log(double d)
486 {
487     return log(d);
488 }
489 
490 U_CAPI void * U_EXPORT2
uprv_maximumPtr(void * base)491 uprv_maximumPtr(void * base)
492 {
493 #if defined(OS400)
494     /*
495      * With the provided function we should never be out of range of a given segment
496      * (a traditional/typical segment that is).  Our segments have 5 bytes for the
497      * id and 3 bytes for the offset.  The key is that the casting takes care of
498      * only retrieving the offset portion minus x1000.  Hence, the smallest offset
499      * seen in a program is x001000 and when casted to an int would be 0.
500      * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
501      *
502      * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
503      * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
504      * This function determines the activation based on the pointer that is passed in and
505      * calculates the appropriate maximum available size for
506      * each pointer type (TERASPACE and non-TERASPACE)
507      *
508      * Unlike other operating systems, the pointer model isn't determined at
509      * compile time on i5/OS.
510      */
511     if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
512         /* if it is a TERASPACE pointer the max is 2GB - 4k */
513         return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
514     }
515     /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
516     return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
517 
518 #else
519     return U_MAX_PTR(base);
520 #endif
521 }
522 
523 /*---------------------------------------------------------------------------
524   Platform-specific Implementations
525   Try these, and if they don't work on your platform, then special case your
526   platform with new implementations.
527   ---------------------------------------------------------------------------*/
528 
529 /* Generic time zone layer -------------------------------------------------- */
530 
531 /* Time zone utilities */
532 U_CAPI void U_EXPORT2
uprv_tzset()533 uprv_tzset()
534 {
535 #ifdef U_TZSET
536     U_TZSET();
537 #else
538     /* no initialization*/
539 #endif
540 }
541 
542 U_CAPI int32_t U_EXPORT2
uprv_timezone()543 uprv_timezone()
544 {
545 #ifdef U_TIMEZONE
546     return U_TIMEZONE;
547 #else
548     time_t t, t1, t2;
549     struct tm tmrec;
550     UBool dst_checked;
551     int32_t tdiff = 0;
552 
553     time(&t);
554     uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
555     dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
556     t1 = mktime(&tmrec);                 /* local time in seconds*/
557     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
558     t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
559     tdiff = t2 - t1;
560     /* imitate NT behaviour, which returns same timezone offset to GMT for
561        winter and summer*/
562     if (dst_checked)
563         tdiff += 3600;
564     return tdiff;
565 #endif
566 }
567 
568 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
569    some platforms need to have it declared here. */
570 
571 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
572 /* RS6000 and others reject char **tzname.  */
573 extern U_IMPORT char *U_TZNAME[];
574 #endif
575 
576 #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
577 /* These platforms are likely to use Olson timezone IDs. */
578 #define CHECK_LOCALTIME_LINK 1
579 #if defined(U_LINUX)
580 #define TZDEFAULT       "/etc/localtime"
581 #define TZZONEINFO      "/usr/share/zoneinfo/"
582 #else
583 #include <tzfile.h>
584 #define TZZONEINFO      (TZDIR "/")
585 #endif
586 static char gTimeZoneBuffer[PATH_MAX];
587 static char *gTimeZoneBufferPtr = NULL;
588 #endif
589 
590 #ifndef U_WINDOWS
591 #define isNonDigit(ch) (ch < '0' || '9' < ch)
isValidOlsonID(const char * id)592 static UBool isValidOlsonID(const char *id) {
593     int32_t idx = 0;
594 
595     /* Determine if this is something like Iceland (Olson ID)
596     or AST4ADT (non-Olson ID) */
597     while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
598         idx++;
599     }
600 
601     /* If we went through the whole string, then it might be okay.
602     The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
603     "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
604     The rest of the time it could be an Olson ID. George */
605     return (UBool)(id[idx] == 0
606         || uprv_strcmp(id, "PST8PDT") == 0
607         || uprv_strcmp(id, "MST7MDT") == 0
608         || uprv_strcmp(id, "CST6CDT") == 0
609         || uprv_strcmp(id, "EST5EDT") == 0);
610 }
611 #endif
612 
613 #if defined(U_TZNAME) && !defined(U_WINDOWS)
614 
615 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
616 typedef struct OffsetZoneMapping {
617     int32_t offsetSeconds;
618     int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/
619     const char *stdID;
620     const char *dstID;
621     const char *olsonID;
622 } OffsetZoneMapping;
623 
624 /*
625 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
626 and maps it to an Olson ID.
627 Before adding anything to this list, take a look at
628 icu/source/tools/tzcode/tz.alias
629 Sometimes no daylight savings (0) is important to define due to aliases.
630 This list can be tested with icu/source/test/compat/tzone.pl
631 More values could be added to daylightType to increase precision.
632 */
633 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
634     {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
635     {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
636     {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
637     {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
638     {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
639     {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
640     {-36000, 2, "EST", "EST", "Australia/Sydney"},
641     {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
642     {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
643     {-34200, 2, "CST", "CST", "Australia/South"},
644     {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
645     {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
646     {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
647     {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
648     {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
649     {-28800, 2, "WST", "WST", "Australia/West"},
650     {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
651     {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
652     {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
653     {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
654     {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
655     {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
656     {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
657     {-14400, 1, "AZT", "AZST", "Asia/Baku"},
658     {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
659     {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
660     {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
661     {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
662     {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
663     {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
664     {-3600, 0, "CET", "WEST", "Africa/Algiers"},
665     {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
666     {0, 1, "GMT", "IST", "Europe/Dublin"},
667     {0, 1, "GMT", "BST", "Europe/London"},
668     {0, 0, "WET", "WEST", "Africa/Casablanca"},
669     {0, 0, "WET", "WET", "Africa/El_Aaiun"},
670     {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
671     {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
672     {10800, 1, "PMST", "PMDT", "America/Miquelon"},
673     {10800, 2, "UYT", "UYST", "America/Montevideo"},
674     {10800, 1, "WGT", "WGST", "America/Godthab"},
675     {10800, 2, "BRT", "BRST", "Brazil/East"},
676     {12600, 1, "NST", "NDT", "America/St_Johns"},
677     {14400, 1, "AST", "ADT", "Canada/Atlantic"},
678     {14400, 2, "AMT", "AMST", "America/Cuiaba"},
679     {14400, 2, "CLT", "CLST", "Chile/Continental"},
680     {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
681     {14400, 2, "PYT", "PYST", "America/Asuncion"},
682     {18000, 1, "CST", "CDT", "America/Havana"},
683     {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
684     {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
685     {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
686     {21600, 0, "CST", "CDT", "America/Guatemala"},
687     {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
688     {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
689     {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
690     {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
691     {32400, 1, "AKST", "AKDT", "US/Alaska"},
692     {36000, 1, "HAST", "HADT", "US/Aleutian"}
693 };
694 
695 /*#define DEBUG_TZNAME*/
696 
remapShortTimeZone(const char * stdID,const char * dstID,int32_t daylightType,int32_t offset)697 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
698 {
699     int32_t idx;
700 #ifdef DEBUG_TZNAME
701     fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
702 #endif
703     for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++)
704     {
705         if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
706             && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
707             && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
708             && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
709         {
710             return OFFSET_ZONE_MAPPINGS[idx].olsonID;
711         }
712     }
713     return NULL;
714 }
715 #endif
716 
717 U_CAPI const char* U_EXPORT2
uprv_tzname(int n)718 uprv_tzname(int n)
719 {
720     const char *tzid = NULL;
721 #ifdef U_WINDOWS
722     tzid = uprv_detectWindowsTimeZone();
723 
724     if (tzid != NULL) {
725         return tzid;
726     }
727 #else
728 
729 /*#if defined(U_DARWIN)
730     int ret;
731 
732     tzid = getenv("TZFILE");
733     if (tzid != NULL) {
734         return tzid;
735     }
736 #endif*/
737 
738 /* This code can be temporarily disabled to test tzname resolution later on. */
739 #ifndef DEBUG_TZNAME
740     tzid = getenv("TZ");
741     if (tzid != NULL && isValidOlsonID(tzid))
742     {
743         /* This might be a good Olson ID. */
744         if (uprv_strncmp(tzid, "posix/", 6) == 0
745             || uprv_strncmp(tzid, "right/", 6) == 0)
746         {
747             /* Remove the posix/ or right/ prefix. */
748             tzid += 6;
749         }
750         return tzid;
751     }
752     /* else U_TZNAME will give a better result. */
753 #endif
754 
755 #if defined(CHECK_LOCALTIME_LINK)
756     /* Caller must handle threading issues */
757     if (gTimeZoneBufferPtr == NULL) {
758         /*
759         This is a trick to look at the name of the link to get the Olson ID
760         because the tzfile contents is underspecified.
761         This isn't guaranteed to work because it may not be a symlink.
762         */
763         int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
764         if (0 < ret) {
765             int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
766             gTimeZoneBuffer[ret] = 0;
767             if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
768                 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
769             {
770                 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
771             }
772         }
773     }
774     else {
775         return gTimeZoneBufferPtr;
776     }
777 #endif
778 #endif
779 
780 #ifdef U_TZNAME
781 #if !defined(U_WINDOWS)
782     /*
783     U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
784     So we remap the abbreviation to an olson ID.
785 
786     Since Windows exposes a little more timezone information,
787     we normally don't use this code on Windows because
788     uprv_detectWindowsTimeZone should have already given the correct answer.
789     */
790     {
791         struct tm juneSol, decemberSol;
792         int daylightType;
793         static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
794         static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
795 
796         /* This probing will tell us when daylight savings occurs.  */
797         localtime_r(&juneSolstice, &juneSol);
798         localtime_r(&decemberSolstice, &decemberSol);
799         daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0);
800         tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
801         if (tzid != NULL) {
802             return tzid;
803         }
804     }
805 #endif
806     return U_TZNAME[n];
807 #else
808     return "";
809 #endif
810 }
811 
812 /* Get and set the ICU data directory --------------------------------------- */
813 
814 static char *gDataDirectory = NULL;
815 #if U_POSIX_LOCALE
816  static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
817 #endif
818 
putil_cleanup(void)819 static UBool U_CALLCONV putil_cleanup(void)
820 {
821     if (gDataDirectory && *gDataDirectory) {
822         uprv_free(gDataDirectory);
823     }
824     gDataDirectory = NULL;
825 #if U_POSIX_LOCALE
826     if (gCorrectedPOSIXLocale) {
827         uprv_free(gCorrectedPOSIXLocale);
828         gCorrectedPOSIXLocale = NULL;
829     }
830 #endif
831     return TRUE;
832 }
833 
834 /*
835  * Set the data directory.
836  *    Make a copy of the passed string, and set the global data dir to point to it.
837  *    TODO:  see bug #2849, regarding thread safety.
838  */
839 U_CAPI void U_EXPORT2
u_setDataDirectory(const char * directory)840 u_setDataDirectory(const char *directory) {
841     char *newDataDir;
842     int32_t length;
843 
844     if(directory==NULL || *directory==0) {
845         /* A small optimization to prevent the malloc and copy when the
846         shared library is used, and this is a way to make sure that NULL
847         is never returned.
848         */
849         newDataDir = (char *)"";
850     }
851     else {
852         length=(int32_t)uprv_strlen(directory);
853         newDataDir = (char *)uprv_malloc(length + 2);
854         uprv_strcpy(newDataDir, directory);
855 
856 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
857         {
858             char *p;
859             while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
860                 *p = U_FILE_SEP_CHAR;
861             }
862         }
863 #endif
864     }
865 
866     umtx_lock(NULL);
867     if (gDataDirectory && *gDataDirectory) {
868         uprv_free(gDataDirectory);
869     }
870     gDataDirectory = newDataDir;
871     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
872     umtx_unlock(NULL);
873 }
874 
875 U_CAPI UBool U_EXPORT2
uprv_pathIsAbsolute(const char * path)876 uprv_pathIsAbsolute(const char *path)
877 {
878   if(!path || !*path) {
879     return FALSE;
880   }
881 
882   if(*path == U_FILE_SEP_CHAR) {
883     return TRUE;
884   }
885 
886 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
887   if(*path == U_FILE_ALT_SEP_CHAR) {
888     return TRUE;
889   }
890 #endif
891 
892 #if defined(U_WINDOWS)
893   if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
894        ((path[0] >= 'a') && (path[0] <= 'z'))) &&
895       path[1] == ':' ) {
896     return TRUE;
897   }
898 #endif
899 
900   return FALSE;
901 }
902 
903 U_CAPI const char * U_EXPORT2
u_getDataDirectory(void)904 u_getDataDirectory(void) {
905     const char *path = NULL;
906 
907     /* if we have the directory, then return it immediately */
908     UMTX_CHECK(NULL, gDataDirectory, path);
909 
910     if(path) {
911         return path;
912     }
913 
914     /*
915     When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
916     override ICU's data with the ICU_DATA environment variable. This prevents
917     problems where multiple custom copies of ICU's specific version of data
918     are installed on a system. Either the application must define the data
919     directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
920     ICU, set the data with udata_setCommonData or trust that all of the
921     required data is contained in ICU's data library that contains
922     the entry point defined by U_ICUDATA_ENTRY_POINT.
923 
924     There may also be some platforms where environment variables
925     are not allowed.
926     */
927 #   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
928     /* First try to get the environment variable */
929     path=getenv("ICU_DATA");
930 #   endif
931 
932     /* ICU_DATA_DIR may be set as a compile option */
933 #   ifdef ICU_DATA_DIR
934     if(path==NULL || *path==0) {
935         path=ICU_DATA_DIR;
936     }
937 #   endif
938 
939     if(path==NULL) {
940         /* It looks really bad, set it to something. */
941         path = "";
942     }
943 
944     u_setDataDirectory(path);
945     return gDataDirectory;
946 }
947 
948 
949 
950 
951 
952 /* Macintosh-specific locale information ------------------------------------ */
953 #ifdef XP_MAC
954 
955 typedef struct {
956     int32_t script;
957     int32_t region;
958     int32_t lang;
959     int32_t date_region;
960     const char* posixID;
961 } mac_lc_rec;
962 
963 /* Todo: This will be updated with a newer version from www.unicode.org web
964    page when it's available.*/
965 #define MAC_LC_MAGIC_NUMBER -5
966 #define MAC_LC_INIT_NUMBER -9
967 
968 static const mac_lc_rec mac_lc_recs[] = {
969     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
970     /* United States*/
971     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
972     /* France*/
973     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
974     /* Great Britain*/
975     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
976     /* Germany*/
977     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
978     /* Italy*/
979     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
980     /* Metherlands*/
981     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
982     /* French for Belgium or Lxembourg*/
983     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
984     /* Sweden*/
985     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
986     /* Denmark*/
987     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
988     /* Portugal*/
989     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
990     /* French Canada*/
991     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
992     /* Israel*/
993     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
994     /* Japan*/
995     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
996     /* Australia*/
997     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
998     /* the Arabic world (?)*/
999     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1000     /* Finland*/
1001     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1002     /* French for Switzerland*/
1003     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1004     /* German for Switzerland*/
1005     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1006     /* Greece*/
1007     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1008     /* Iceland ===*/
1009     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1010     /* Malta ===*/
1011     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1012     /* Cyprus ===*/
1013     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1014     /* Turkey ===*/
1015     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1016     /* Croatian system for Yugoslavia*/
1017     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1018     /* Hindi system for India*/
1019     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1020     /* Pakistan*/
1021     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1022     /* Lithuania*/
1023     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1024     /* Poland*/
1025     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1026     /* Hungary*/
1027     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1028     /* Estonia*/
1029     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1030     /* Latvia*/
1031     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1032     /* Lapland  [Ask Rich for the data. HS]*/
1033     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1034     /* Faeroe Islands*/
1035     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1036     /* Iran*/
1037     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1038     /* Russia*/
1039     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1040     /* Ireland*/
1041     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1042     /* Korea*/
1043     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1044     /* People's Republic of China*/
1045     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1046     /* Taiwan*/
1047     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1048     /* Thailand*/
1049 
1050     /* fallback is en_US*/
1051     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1052     MAC_LC_MAGIC_NUMBER, "en_US"
1053 };
1054 
1055 #endif
1056 
1057 #if U_POSIX_LOCALE
1058 /* Return just the POSIX id, whatever happens to be in it */
uprv_getPOSIXID(void)1059 static const char *uprv_getPOSIXID(void)
1060 {
1061     static const char* posixID = NULL;
1062     if (posixID == 0) {
1063         /*
1064         * On Solaris two different calls to setlocale can result in
1065         * different values. Only get this value once.
1066         *
1067         * We must check this first because an application can set this.
1068         *
1069         * LC_ALL can't be used because it's platform dependent. The LANG
1070         * environment variable seems to affect LC_CTYPE variable by default.
1071         * Here is what setlocale(LC_ALL, NULL) can return.
1072         * HPUX can return 'C C C C C C C'
1073         * Solaris can return /en_US/C/C/C/C/C on the second try.
1074         * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1075         *
1076         * The default codepage detection also needs to use LC_CTYPE.
1077         *
1078         * Do not call setlocale(LC_*, "")! Using an empty string instead
1079         * of NULL, will modify the libc behavior.
1080         */
1081         posixID = setlocale(LC_CTYPE, NULL);
1082         if ((posixID == 0)
1083             || (uprv_strcmp("C", posixID) == 0)
1084             || (uprv_strcmp("POSIX", posixID) == 0))
1085         {
1086             /* Maybe we got some garbage.  Try something more reasonable */
1087             posixID = getenv("LC_ALL");
1088             if (posixID == 0) {
1089                 posixID = getenv("LC_CTYPE");
1090                 if (posixID == 0) {
1091                     posixID = getenv("LANG");
1092                 }
1093             }
1094         }
1095 
1096         if ((posixID==0)
1097             || (uprv_strcmp("C", posixID) == 0)
1098             || (uprv_strcmp("POSIX", posixID) == 0))
1099         {
1100             /* Nothing worked.  Give it a nice POSIX default value. */
1101             posixID = "en_US_POSIX";
1102         }
1103     }
1104 
1105     return posixID;
1106 }
1107 #endif
1108 
1109 /* NOTE: The caller should handle thread safety */
1110 U_CAPI const char* U_EXPORT2
uprv_getDefaultLocaleID()1111 uprv_getDefaultLocaleID()
1112 {
1113 #if U_POSIX_LOCALE
1114 /*
1115   Note that:  (a '!' means the ID is improper somehow)
1116      LC_ALL  ---->     default_loc          codepage
1117 --------------------------------------------------------
1118      ab.CD             ab                   CD
1119      ab@CD             ab__CD               -
1120      ab@CD.EF          ab__CD               EF
1121 
1122      ab_CD.EF@GH       ab_CD_GH             EF
1123 
1124 Some 'improper' ways to do the same as above:
1125   !  ab_CD@GH.EF       ab_CD_GH             EF
1126   !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1127   !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1128 
1129      _CD@GH            _CD_GH               -
1130      _CD.EF@GH         _CD_GH               EF
1131 
1132 The variant cannot have dots in it.
1133 The 'rightmost' variant (@xxx) wins.
1134 The leftmost codepage (.xxx) wins.
1135 */
1136     char *correctedPOSIXLocale = 0;
1137     const char* posixID = uprv_getPOSIXID();
1138     const char *p;
1139     const char *q;
1140     int32_t len;
1141 
1142     /* Format: (no spaces)
1143     ll [ _CC ] [ . MM ] [ @ VV]
1144 
1145       l = lang, C = ctry, M = charmap, V = variant
1146     */
1147 
1148     if (gCorrectedPOSIXLocale != NULL) {
1149         return gCorrectedPOSIXLocale;
1150     }
1151 
1152     if ((p = uprv_strchr(posixID, '.')) != NULL) {
1153         /* assume new locale can't be larger than old one? */
1154         correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1155         uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1156         correctedPOSIXLocale[p-posixID] = 0;
1157 
1158         /* do not copy after the @ */
1159         if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1160             correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1161         }
1162     }
1163 
1164     /* Note that we scan the *uncorrected* ID. */
1165     if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1166         if (correctedPOSIXLocale == NULL) {
1167             correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1168             uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1169             correctedPOSIXLocale[p-posixID] = 0;
1170         }
1171         p++;
1172 
1173         /* Take care of any special cases here.. */
1174         if (!uprv_strcmp(p, "nynorsk")) {
1175             p = "NY";
1176             /* Don't worry about no__NY. In practice, it won't appear. */
1177         }
1178 
1179         if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1180             uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1181         }
1182         else {
1183             uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1184         }
1185 
1186         if ((q = uprv_strchr(p, '.')) != NULL) {
1187             /* How big will the resulting string be? */
1188             len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1189             uprv_strncat(correctedPOSIXLocale, p, q-p);
1190             correctedPOSIXLocale[len] = 0;
1191         }
1192         else {
1193             /* Anything following the @ sign */
1194             uprv_strcat(correctedPOSIXLocale, p);
1195         }
1196 
1197         /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1198          * How about 'russian' -> 'ru'?
1199          * Many of the other locales using ISO codes will be handled by the
1200          * canonicalization functions in uloc_getDefault.
1201          */
1202     }
1203 
1204     /* Was a correction made? */
1205     if (correctedPOSIXLocale != NULL) {
1206         posixID = correctedPOSIXLocale;
1207     }
1208     else {
1209         /* copy it, just in case the original pointer goes away.  See j2395 */
1210         correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1211         posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1212     }
1213 
1214     if (gCorrectedPOSIXLocale == NULL) {
1215         gCorrectedPOSIXLocale = correctedPOSIXLocale;
1216         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1217         correctedPOSIXLocale = NULL;
1218     }
1219 
1220     if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
1221         uprv_free(correctedPOSIXLocale);
1222     }
1223 
1224     return posixID;
1225 
1226 #elif defined(U_WINDOWS)
1227     UErrorCode status = U_ZERO_ERROR;
1228     LCID id = GetThreadLocale();
1229     const char* locID = uprv_convertToPosix(id, &status);
1230 
1231     if (U_FAILURE(status)) {
1232         locID = "en_US";
1233     }
1234     return locID;
1235 
1236 #elif defined(XP_MAC)
1237     int32_t script = MAC_LC_INIT_NUMBER;
1238     /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1239     int32_t region = MAC_LC_INIT_NUMBER;
1240     /* = GetScriptManagerVariable(smRegionCode);*/
1241     int32_t lang = MAC_LC_INIT_NUMBER;
1242     /* = GetScriptManagerVariable(smScriptLang);*/
1243     int32_t date_region = MAC_LC_INIT_NUMBER;
1244     const char* posixID = 0;
1245     int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1246     int32_t i;
1247     Intl1Hndl ih;
1248 
1249     ih = (Intl1Hndl) GetIntlResource(1);
1250     if (ih)
1251         date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1252 
1253     for (i = 0; i < count; i++) {
1254         if (   ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1255              || (mac_lc_recs[i].script == script))
1256             && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1257              || (mac_lc_recs[i].region == region))
1258             && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1259              || (mac_lc_recs[i].lang == lang))
1260             && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1261              || (mac_lc_recs[i].date_region == date_region))
1262             )
1263         {
1264             posixID = mac_lc_recs[i].posixID;
1265             break;
1266         }
1267     }
1268 
1269     return posixID;
1270 
1271 #elif defined(OS400)
1272     /* locales are process scoped and are by definition thread safe */
1273     static char correctedLocale[64];
1274     const  char *localeID = getenv("LC_ALL");
1275            char *p;
1276 
1277     if (localeID == NULL)
1278         localeID = getenv("LANG");
1279     if (localeID == NULL)
1280         localeID = setlocale(LC_ALL, NULL);
1281     /* Make sure we have something... */
1282     if (localeID == NULL)
1283         return "en_US_POSIX";
1284 
1285     /* Extract the locale name from the path. */
1286     if((p = uprv_strrchr(localeID, '/')) != NULL)
1287     {
1288         /* Increment p to start of locale name. */
1289         p++;
1290         localeID = p;
1291     }
1292 
1293     /* Copy to work location. */
1294     uprv_strcpy(correctedLocale, localeID);
1295 
1296     /* Strip off the '.locale' extension. */
1297     if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1298         *p = 0;
1299     }
1300 
1301     /* Upper case the locale name. */
1302     T_CString_toUpperCase(correctedLocale);
1303 
1304     /* See if we are using the POSIX locale.  Any of the
1305     * following are equivalent and use the same QLGPGCMA
1306     * (POSIX) locale.
1307     * QLGPGCMA2 means UCS2
1308     * QLGPGCMA_4 means UTF-32
1309     * QLGPGCMA_8 means UTF-8
1310     */
1311     if ((uprv_strcmp("C", correctedLocale) == 0) ||
1312         (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1313         (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1314     {
1315         uprv_strcpy(correctedLocale, "en_US_POSIX");
1316     }
1317     else
1318     {
1319         int16_t LocaleLen;
1320 
1321         /* Lower case the lang portion. */
1322         for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1323         {
1324             *p = uprv_tolower(*p);
1325         }
1326 
1327         /* Adjust for Euro.  After '_E' add 'URO'. */
1328         LocaleLen = uprv_strlen(correctedLocale);
1329         if (correctedLocale[LocaleLen - 2] == '_' &&
1330             correctedLocale[LocaleLen - 1] == 'E')
1331         {
1332             uprv_strcat(correctedLocale, "URO");
1333         }
1334 
1335         /* If using Lotus-based locale then convert to
1336          * equivalent non Lotus.
1337          */
1338         else if (correctedLocale[LocaleLen - 2] == '_' &&
1339             correctedLocale[LocaleLen - 1] == 'L')
1340         {
1341             correctedLocale[LocaleLen - 2] = 0;
1342         }
1343 
1344         /* There are separate simplified and traditional
1345          * locales called zh_HK_S and zh_HK_T.
1346          */
1347         else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1348         {
1349             uprv_strcpy(correctedLocale, "zh_HK");
1350         }
1351 
1352         /* A special zh_CN_GBK locale...
1353         */
1354         else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1355         {
1356             uprv_strcpy(correctedLocale, "zh_CN");
1357         }
1358 
1359     }
1360 
1361     return correctedLocale;
1362 #endif
1363 
1364 }
1365 
1366 #if U_POSIX_LOCALE
1367 /*
1368 Due to various platform differences, one platform may specify a charset,
1369 when they really mean a different charset. Remap the names so that they are
1370 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1371 here. Before adding anything to this function, please consider adding unique
1372 names to the ICU alias table in the data directory.
1373 */
1374 static const char*
remapPlatformDependentCodepage(const char * locale,const char * name)1375 remapPlatformDependentCodepage(const char *locale, const char *name) {
1376     if (locale != NULL && *locale == 0) {
1377         /* Make sure that an empty locale is handled the same way. */
1378         locale = NULL;
1379     }
1380     if (name == NULL) {
1381         return NULL;
1382     }
1383 #if defined(U_AIX)
1384     if (uprv_strcmp(name, "IBM-943") == 0) {
1385         /* Use the ASCII compatible ibm-943 */
1386         name = "Shift-JIS";
1387     }
1388     else if (uprv_strcmp(name, "IBM-1252") == 0) {
1389         /* Use the windows-1252 that contains the Euro */
1390         name = "IBM-5348";
1391     }
1392 #elif defined(U_SOLARIS)
1393     if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1394         /* Solaris underspecifies the "EUC" name. */
1395         if (uprv_strcmp(locale, "zh_CN") == 0) {
1396             name = "EUC-CN";
1397         }
1398         else if (uprv_strcmp(locale, "zh_TW") == 0) {
1399             name = "EUC-TW";
1400         }
1401         else if (uprv_strcmp(locale, "ko_KR") == 0) {
1402             name = "EUC-KR";
1403         }
1404     }
1405     else if (uprv_strcmp(name, "eucJP") == 0) {
1406         /*
1407         ibm-954 is the best match.
1408         ibm-33722 is the default for eucJP (similar to Windows).
1409         */
1410         name = "eucjis";
1411     }
1412 #elif defined(U_DARWIN)
1413     if (locale == NULL && *name == 0) {
1414         /*
1415         No locale was specified, and an empty name was passed in.
1416         This usually indicates that nl_langinfo didn't return valid information.
1417         Mac OS X uses UTF-8 by default (especially the locale data and console).
1418         */
1419         name = "UTF-8";
1420     }
1421 #elif defined(U_HPUX)
1422     if (uprv_strcmp(name, "eucJP") == 0) {
1423         /*
1424         ibm-1350 is the best match, but unavailable.
1425         ibm-954 is mostly a superset of ibm-1350.
1426         ibm-33722 is the default for eucJP (similar to Windows).
1427         */
1428         name = "eucjis";
1429     }
1430 #elif defined(U_LINUX)
1431     if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1432         /* Linux underspecifies the "EUC" name. */
1433         if (uprv_strcmp(locale, "korean") == 0) {
1434             name = "EUC-KR";
1435         }
1436         else if (uprv_strcmp(locale, "japanese") == 0) {
1437             /* See comment below about eucJP */
1438             name = "eucjis";
1439         }
1440     }
1441     else if (uprv_strcmp(name, "eucjp") == 0) {
1442         /*
1443         ibm-1350 is the best match, but unavailable.
1444         ibm-954 is mostly a superset of ibm-1350.
1445         ibm-33722 is the default for eucJP (similar to Windows).
1446         */
1447         name = "eucjis";
1448     }
1449 #endif
1450     /* return NULL when "" is passed in */
1451     if (*name == 0) {
1452         name = NULL;
1453     }
1454     return name;
1455 }
1456 
1457 static const char*
getCodepageFromPOSIXID(const char * localeName,char * buffer,int32_t buffCapacity)1458 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1459 {
1460     char localeBuf[100];
1461     const char *name = NULL;
1462     char *variant = NULL;
1463 
1464     if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1465         size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1466         uprv_strncpy(localeBuf, localeName, localeCapacity);
1467         localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1468         name = uprv_strncpy(buffer, name+1, buffCapacity);
1469         buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1470         if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1471             *variant = 0;
1472         }
1473         name = remapPlatformDependentCodepage(localeBuf, name);
1474     }
1475     return name;
1476 }
1477 #endif
1478 
1479 static const char*
int_getDefaultCodepage()1480 int_getDefaultCodepage()
1481 {
1482 #if defined(OS400)
1483     uint32_t ccsid = 37; /* Default to ibm-37 */
1484     static char codepage[64];
1485     Qwc_JOBI0400_t jobinfo;
1486     Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1487 
1488     EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1489         "*                         ", "                ", &error);
1490 
1491     if (error.Bytes_Available == 0) {
1492         if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1493             ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1494         }
1495         else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1496             ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1497         }
1498         /* else use the default */
1499     }
1500     sprintf(codepage,"ibm-%d", ccsid);
1501     return codepage;
1502 
1503 #elif defined(OS390)
1504     static char codepage[64];
1505     sprintf(codepage,"%63s" UCNV_SWAP_LFNL_OPTION_STRING, nl_langinfo(CODESET));
1506     codepage[63] = 0; /* NULL terminate */
1507     return codepage;
1508 
1509 #elif defined(XP_MAC)
1510     return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1511 
1512 #elif defined(U_WINDOWS)
1513     static char codepage[64];
1514     sprintf(codepage, "windows-%d", GetACP());
1515     return codepage;
1516 
1517 #elif U_POSIX_LOCALE
1518     static char codesetName[100];
1519     const char *localeName = NULL;
1520     const char *name = NULL;
1521 
1522     uprv_memset(codesetName, 0, sizeof(codesetName));
1523 
1524     /* Use setlocale in a nice way, and then check some environment variables.
1525        Maybe the application used setlocale already.
1526     */
1527     localeName = uprv_getPOSIXID();
1528     name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1529     if (name) {
1530         /* if we can find the codeset name from setlocale, return that. */
1531         return name;
1532     }
1533     /* else "C" was probably returned. That's underspecified. */
1534 
1535 #if U_HAVE_NL_LANGINFO_CODESET
1536     if (*codesetName) {
1537         uprv_memset(codesetName, 0, sizeof(codesetName));
1538     }
1539     /* When available, check nl_langinfo because it usually gives more
1540        useful names. It depends on LC_CTYPE and not LANG or LC_ALL.
1541        nl_langinfo may use the same buffer as setlocale. */
1542     {
1543         const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1544         codeset = remapPlatformDependentCodepage(NULL, codeset);
1545         if (codeset != NULL) {
1546             uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1547             codesetName[sizeof(codesetName)-1] = 0;
1548             return codesetName;
1549         }
1550     }
1551 #endif
1552 
1553     if (*codesetName == 0)
1554     {
1555         /* Everything failed. Return US ASCII (ISO 646). */
1556         (void)uprv_strcpy(codesetName, "US-ASCII");
1557     }
1558     return codesetName;
1559 #else
1560     return "US-ASCII";
1561 #endif
1562 }
1563 
1564 
1565 U_CAPI const char*  U_EXPORT2
uprv_getDefaultCodepage()1566 uprv_getDefaultCodepage()
1567 {
1568     static char const  *name = NULL;
1569     umtx_lock(NULL);
1570     if (name == NULL) {
1571         name = int_getDefaultCodepage();
1572     }
1573     umtx_unlock(NULL);
1574     return name;
1575 }
1576 
1577 
1578 /* end of platform-specific implementation -------------- */
1579 
1580 /* version handling --------------------------------------------------------- */
1581 
1582 U_CAPI void U_EXPORT2
u_versionFromString(UVersionInfo versionArray,const char * versionString)1583 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1584     char *end;
1585     uint16_t part=0;
1586 
1587     if(versionArray==NULL) {
1588         return;
1589     }
1590 
1591     if(versionString!=NULL) {
1592         for(;;) {
1593             versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1594             if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1595                 break;
1596             }
1597             versionString=end+1;
1598         }
1599     }
1600 
1601     while(part<U_MAX_VERSION_LENGTH) {
1602         versionArray[part++]=0;
1603     }
1604 }
1605 
1606 U_CAPI void U_EXPORT2
u_versionToString(UVersionInfo versionArray,char * versionString)1607 u_versionToString(UVersionInfo versionArray, char *versionString) {
1608     uint16_t count, part;
1609     uint8_t field;
1610 
1611     if(versionString==NULL) {
1612         return;
1613     }
1614 
1615     if(versionArray==NULL) {
1616         versionString[0]=0;
1617         return;
1618     }
1619 
1620     /* count how many fields need to be written */
1621     for(count=4; count>0 && versionArray[count-1]==0; --count) {
1622     }
1623 
1624     if(count <= 1) {
1625         count = 2;
1626     }
1627 
1628     /* write the first part */
1629     /* write the decimal field value */
1630     field=versionArray[0];
1631     if(field>=100) {
1632         *versionString++=(char)('0'+field/100);
1633         field%=100;
1634     }
1635     if(field>=10) {
1636         *versionString++=(char)('0'+field/10);
1637         field%=10;
1638     }
1639     *versionString++=(char)('0'+field);
1640 
1641     /* write the following parts */
1642     for(part=1; part<count; ++part) {
1643         /* write a dot first */
1644         *versionString++=U_VERSION_DELIMITER;
1645 
1646         /* write the decimal field value */
1647         field=versionArray[part];
1648         if(field>=100) {
1649             *versionString++=(char)('0'+field/100);
1650             field%=100;
1651         }
1652         if(field>=10) {
1653             *versionString++=(char)('0'+field/10);
1654             field%=10;
1655         }
1656         *versionString++=(char)('0'+field);
1657     }
1658 
1659     /* NUL-terminate */
1660     *versionString=0;
1661 }
1662 
1663 U_CAPI void U_EXPORT2
u_getVersion(UVersionInfo versionArray)1664 u_getVersion(UVersionInfo versionArray) {
1665     u_versionFromString(versionArray, U_ICU_VERSION);
1666 }
1667 
1668 /*
1669  * Hey, Emacs, please set the following:
1670  *
1671  * Local Variables:
1672  * indent-tabs-mode: nil
1673  * End:
1674  *
1675  */
1676