• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 *
4 *   Copyright (C) 1997-2011, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 *  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10 *
11 *   Date        Name        Description
12 *   04/14/97    aliu        Creation.
13 *   04/24/97    aliu        Added getDefaultDataDirectory() and
14 *                            getDefaultLocaleID().
15 *   04/28/97    aliu        Rewritten to assume Unix and apply general methods
16 *                            for assumed case.  Non-UNIX platforms must be
17 *                            special-cased.  Rewrote numeric methods dealing
18 *                            with NaN and Infinity to be platform independent
19 *                             over all IEEE 754 platforms.
20 *   05/13/97    aliu        Restored sign of timezone
21 *                            (semantics are hours West of GMT)
22 *   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23 *                             nextDouble..
24 *   07/22/98    stephen     Added remainder, max, min, trunc
25 *   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
26 *   08/24/98    stephen     Added longBitsFromDouble
27 *   09/08/98    stephen     Minor changes for Mac Port
28 *   03/02/99    stephen     Removed openFile().  Added AS400 support.
29 *                            Fixed EBCDIC tables
30 *   04/15/99    stephen     Converted to C.
31 *   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
32 *   08/04/99    jeffrey R.  Added OS/2 changes
33 *   11/15/99    helena      Integrated S/390 IEEE support.
34 *   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
35 *   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
36 *   01/03/08    Steven L.   Fake Time Support
37 ******************************************************************************
38 */
39 
40 /* Define _XOPEN_SOURCE for Solaris and friends. */
41 /* NetBSD needs it to be >= 4 */
42 #if !defined(_XOPEN_SOURCE)
43 #if __STDC_VERSION__ >= 199901L
44 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */
45 #define _XOPEN_SOURCE 600
46 #else
47 #define _XOPEN_SOURCE 4
48 #endif
49 #endif
50 
51 /* Make sure things like readlink and such functions work.
52 Poorly upgraded Solaris machines can't have this defined.
53 Cleanly installed Solaris can use this #define.
54 */
55 #if !defined(_XOPEN_SOURCE_EXTENDED) && ((!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L) || defined(__xlc__))
56 #define _XOPEN_SOURCE_EXTENDED 1
57 #endif
58 
59 /* include ICU headers */
60 #include "unicode/utypes.h"
61 #include "unicode/putil.h"
62 #include "unicode/ustring.h"
63 #include "putilimp.h"
64 #include "uassert.h"
65 #include "umutex.h"
66 #include "cmemory.h"
67 #include "cstring.h"
68 #include "locmap.h"
69 #include "ucln_cmn.h"
70 
71 /* Include standard headers. */
72 #include <stdio.h>
73 #include <stdlib.h>
74 #include <string.h>
75 #include <math.h>
76 #include <locale.h>
77 #include <float.h>
78 
79 /* include system headers */
80 #if defined(U_WINDOWS) || defined(U_MINGW)
81 #   define WIN32_LEAN_AND_MEAN
82 #   define VC_EXTRALEAN
83 #   define NOUSER
84 #   define NOSERVICE
85 #   define NOIME
86 #   define NOMCX
87 #   include <windows.h>
88 #   include "wintz.h"
89 #elif defined(OS400)
90 #   include <float.h>
91 #   include <qusec.h>       /* error code structure */
92 #   include <qusrjobi.h>
93 #   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
94 #   include <mih/testptr.h> /* For uprv_maximumPtr */
95 #elif defined(XP_MAC)
96 #   include <Files.h>
97 #   include <IntlResources.h>
98 #   include <Script.h>
99 #   include <Folders.h>
100 #   include <MacTypes.h>
101 #   include <TextUtils.h>
102 #   define ICU_NO_USER_DATA_OVERRIDE 1
103 #elif defined(OS390)
104 #   include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
105 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
106 #   include <limits.h>
107 #   include <unistd.h>
108 #elif defined(U_QNX)
109 #   include <sys/neutrino.h>
110 #elif defined(U_SOLARIS)
111 #   ifndef _XPG4_2
112 #       define _XPG4_2
113 #   endif
114 #endif
115 
116 #if (defined(U_CYGWIN) || defined(U_MINGW)) && defined(__STRICT_ANSI__)
117 /* tzset isn't defined in strict ANSI on Cygwin and MinGW. */
118 #undef __STRICT_ANSI__
119 #endif
120 
121 /*
122  * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
123  */
124 #include <time.h>
125 
126 #if defined(U_DARWIN)
127 #include <TargetConditionals.h>
128 #endif
129 
130 #ifndef U_WINDOWS
131 #include <sys/time.h>
132 #endif
133 
134 /*
135  * Only include langinfo.h if we have a way to get the codeset. If we later
136  * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
137  *
138  */
139 
140 #if U_HAVE_NL_LANGINFO_CODESET
141 #include <langinfo.h>
142 #endif
143 
144 /**
145  * Simple things (presence of functions, etc) should just go in configure.in and be added to
146  * icucfg.h via autoheader.
147  */
148 #if defined(U_HAVE_ICUCFG)
149 #include "icucfg.h"
150 #endif
151 
152 /* Define the extension for data files, again... */
153 #define DATA_TYPE "dat"
154 
155 /* Leave this copyright notice here! */
156 static const char copyright[] = U_COPYRIGHT_STRING;
157 
158 /* floating point implementations ------------------------------------------- */
159 
160 /* We return QNAN rather than SNAN*/
161 #define SIGN 0x80000000U
162 
163 /* Make it easy to define certain types of constants */
164 typedef union {
165     int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
166     double d64;
167 } BitPatternConversion;
168 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
169 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
170 
171 /*---------------------------------------------------------------------------
172   Platform utilities
173   Our general strategy is to assume we're on a POSIX platform.  Platforms which
174   are non-POSIX must declare themselves so.  The default POSIX implementation
175   will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
176   functions).
177   ---------------------------------------------------------------------------*/
178 
179 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400) || defined(U_MINGW)
180 #   undef U_POSIX_LOCALE
181 #else
182 #   define U_POSIX_LOCALE    1
183 #endif
184 
185 /*
186     WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
187     can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
188 */
189 #if !IEEE_754
190 static char*
u_topNBytesOfDouble(double * d,int n)191 u_topNBytesOfDouble(double* d, int n)
192 {
193 #if U_IS_BIG_ENDIAN
194     return (char*)d;
195 #else
196     return (char*)(d + 1) - n;
197 #endif
198 }
199 
200 static char*
u_bottomNBytesOfDouble(double * d,int n)201 u_bottomNBytesOfDouble(double* d, int n)
202 {
203 #if U_IS_BIG_ENDIAN
204     return (char*)(d + 1) - n;
205 #else
206     return (char*)d;
207 #endif
208 }
209 #endif   /* !IEEE_754 */
210 
211 #if IEEE_754
212 static UBool
u_signBit(double d)213 u_signBit(double d) {
214     uint8_t hiByte;
215 #if U_IS_BIG_ENDIAN
216     hiByte = *(uint8_t *)&d;
217 #else
218     hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
219 #endif
220     return (hiByte & 0x80) != 0;
221 }
222 #endif
223 
224 
225 
226 #if defined (U_DEBUG_FAKETIME)
227 /* Override the clock to test things without having to move the system clock.
228  * Assumes POSIX gettimeofday() will function
229  */
230 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
231 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
232 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
233 static UMTX fakeClockMutex = NULL;
234 
getUTCtime_real()235 static UDate getUTCtime_real() {
236     struct timeval posixTime;
237     gettimeofday(&posixTime, NULL);
238     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
239 }
240 
getUTCtime_fake()241 static UDate getUTCtime_fake() {
242     umtx_lock(&fakeClockMutex);
243     if(!fakeClock_set) {
244         UDate real = getUTCtime_real();
245         const char *fake_start = getenv("U_FAKETIME_START");
246         if((fake_start!=NULL) && (fake_start[0]!=0)) {
247             sscanf(fake_start,"%lf",&fakeClock_t0);
248             fakeClock_dt = fakeClock_t0 - real;
249             fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
250                     "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
251                     fakeClock_t0, fake_start, fakeClock_dt, real);
252         } else {
253           fakeClock_dt = 0;
254             fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
255                     "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
256         }
257         fakeClock_set = TRUE;
258     }
259     umtx_unlock(&fakeClockMutex);
260 
261     return getUTCtime_real() + fakeClock_dt;
262 }
263 #endif
264 
265 #if defined(U_WINDOWS)
266 typedef union {
267     int64_t int64;
268     FILETIME fileTime;
269 } FileTimeConversion;   /* This is like a ULARGE_INTEGER */
270 
271 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
272 #define EPOCH_BIAS  INT64_C(116444736000000000)
273 #define HECTONANOSECOND_PER_MILLISECOND   10000
274 
275 #endif
276 
277 /*---------------------------------------------------------------------------
278   Universal Implementations
279   These are designed to work on all platforms.  Try these, and if they
280   don't work on your platform, then special case your platform with new
281   implementations.
282 ---------------------------------------------------------------------------*/
283 
284 U_CAPI UDate U_EXPORT2
uprv_getUTCtime()285 uprv_getUTCtime()
286 {
287 #if defined(U_DEBUG_FAKETIME)
288     return getUTCtime_fake(); /* Hook for overriding the clock */
289 #else
290     return uprv_getRawUTCtime();
291 #endif
292 }
293 
294 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
295 U_CAPI UDate U_EXPORT2
uprv_getRawUTCtime()296 uprv_getRawUTCtime()
297 {
298 #if defined(XP_MAC)
299     time_t t, t1, t2;
300     struct tm tmrec;
301 
302     uprv_memset( &tmrec, 0, sizeof(tmrec) );
303     tmrec.tm_year = 70;
304     tmrec.tm_mon = 0;
305     tmrec.tm_mday = 1;
306     t1 = mktime(&tmrec);    /* seconds of 1/1/1970*/
307 
308     time(&t);
309     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
310     t2 = mktime(&tmrec);    /* seconds of current GMT*/
311     return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND;         /* GMT (or UTC) in seconds since 1970*/
312 #elif defined(U_WINDOWS)
313 
314     FileTimeConversion winTime;
315     GetSystemTimeAsFileTime(&winTime.fileTime);
316     return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
317 #else
318 
319 #if defined(HAVE_GETTIMEOFDAY)
320     struct timeval posixTime;
321     gettimeofday(&posixTime, NULL);
322     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
323 #else
324     time_t epochtime;
325     time(&epochtime);
326     return (UDate)epochtime * U_MILLIS_PER_SECOND;
327 #endif
328 
329 #endif
330 }
331 
332 /*-----------------------------------------------------------------------------
333   IEEE 754
334   These methods detect and return NaN and infinity values for doubles
335   conforming to IEEE 754.  Platforms which support this standard include X86,
336   Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
337   If this doesn't work on your platform, you have non-IEEE floating-point, and
338   will need to code your own versions.  A naive implementation is to return 0.0
339   for getNaN and getInfinity, and false for isNaN and isInfinite.
340   ---------------------------------------------------------------------------*/
341 
342 U_CAPI UBool U_EXPORT2
uprv_isNaN(double number)343 uprv_isNaN(double number)
344 {
345 #if IEEE_754
346     BitPatternConversion convertedNumber;
347     convertedNumber.d64 = number;
348     /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
349     return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
350 
351 #elif defined(OS390)
352     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
353                         sizeof(uint32_t));
354     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
355                         sizeof(uint32_t));
356 
357     return ((highBits & 0x7F080000L) == 0x7F080000L) &&
358       (lowBits == 0x00000000L);
359 
360 #else
361     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
362     /* you'll need to replace this default implementation with what's correct*/
363     /* for your platform.*/
364     return number != number;
365 #endif
366 }
367 
368 U_CAPI UBool U_EXPORT2
uprv_isInfinite(double number)369 uprv_isInfinite(double number)
370 {
371 #if IEEE_754
372     BitPatternConversion convertedNumber;
373     convertedNumber.d64 = number;
374     /* Infinity is exactly 0x7FF0000000000000U. */
375     return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
376 #elif defined(OS390)
377     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
378                         sizeof(uint32_t));
379     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
380                         sizeof(uint32_t));
381 
382     return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
383 
384 #else
385     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
386     /* value, you'll need to replace this default implementation with what's*/
387     /* correct for your platform.*/
388     return number == (2.0 * number);
389 #endif
390 }
391 
392 U_CAPI UBool U_EXPORT2
uprv_isPositiveInfinity(double number)393 uprv_isPositiveInfinity(double number)
394 {
395 #if IEEE_754 || defined(OS390)
396     return (UBool)(number > 0 && uprv_isInfinite(number));
397 #else
398     return uprv_isInfinite(number);
399 #endif
400 }
401 
402 U_CAPI UBool U_EXPORT2
uprv_isNegativeInfinity(double number)403 uprv_isNegativeInfinity(double number)
404 {
405 #if IEEE_754 || defined(OS390)
406     return (UBool)(number < 0 && uprv_isInfinite(number));
407 
408 #else
409     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
410                         sizeof(uint32_t));
411     return((highBits & SIGN) && uprv_isInfinite(number));
412 
413 #endif
414 }
415 
416 U_CAPI double U_EXPORT2
uprv_getNaN()417 uprv_getNaN()
418 {
419 #if IEEE_754 || defined(OS390)
420     return gNan.d64;
421 #else
422     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
423     /* you'll need to replace this default implementation with what's correct*/
424     /* for your platform.*/
425     return 0.0;
426 #endif
427 }
428 
429 U_CAPI double U_EXPORT2
uprv_getInfinity()430 uprv_getInfinity()
431 {
432 #if IEEE_754 || defined(OS390)
433     return gInf.d64;
434 #else
435     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
436     /* value, you'll need to replace this default implementation with what's*/
437     /* correct for your platform.*/
438     return 0.0;
439 #endif
440 }
441 
442 U_CAPI double U_EXPORT2
uprv_floor(double x)443 uprv_floor(double x)
444 {
445     return floor(x);
446 }
447 
448 U_CAPI double U_EXPORT2
uprv_ceil(double x)449 uprv_ceil(double x)
450 {
451     return ceil(x);
452 }
453 
454 U_CAPI double U_EXPORT2
uprv_round(double x)455 uprv_round(double x)
456 {
457     return uprv_floor(x + 0.5);
458 }
459 
460 U_CAPI double U_EXPORT2
uprv_fabs(double x)461 uprv_fabs(double x)
462 {
463     return fabs(x);
464 }
465 
466 U_CAPI double U_EXPORT2
uprv_modf(double x,double * y)467 uprv_modf(double x, double* y)
468 {
469     return modf(x, y);
470 }
471 
472 U_CAPI double U_EXPORT2
uprv_fmod(double x,double y)473 uprv_fmod(double x, double y)
474 {
475     return fmod(x, y);
476 }
477 
478 U_CAPI double U_EXPORT2
uprv_pow(double x,double y)479 uprv_pow(double x, double y)
480 {
481     /* This is declared as "double pow(double x, double y)" */
482     return pow(x, y);
483 }
484 
485 U_CAPI double U_EXPORT2
uprv_pow10(int32_t x)486 uprv_pow10(int32_t x)
487 {
488     return pow(10.0, (double)x);
489 }
490 
491 U_CAPI double U_EXPORT2
uprv_fmax(double x,double y)492 uprv_fmax(double x, double y)
493 {
494 #if IEEE_754
495     /* first handle NaN*/
496     if(uprv_isNaN(x) || uprv_isNaN(y))
497         return uprv_getNaN();
498 
499     /* check for -0 and 0*/
500     if(x == 0.0 && y == 0.0 && u_signBit(x))
501         return y;
502 
503 #endif
504 
505     /* this should work for all flt point w/o NaN and Inf special cases */
506     return (x > y ? x : y);
507 }
508 
509 U_CAPI double U_EXPORT2
uprv_fmin(double x,double y)510 uprv_fmin(double x, double y)
511 {
512 #if IEEE_754
513     /* first handle NaN*/
514     if(uprv_isNaN(x) || uprv_isNaN(y))
515         return uprv_getNaN();
516 
517     /* check for -0 and 0*/
518     if(x == 0.0 && y == 0.0 && u_signBit(y))
519         return y;
520 
521 #endif
522 
523     /* this should work for all flt point w/o NaN and Inf special cases */
524     return (x > y ? y : x);
525 }
526 
527 /**
528  * Truncates the given double.
529  * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
530  * This is different than calling floor() or ceil():
531  * floor(3.3) = 3, floor(-3.3) = -4
532  * ceil(3.3) = 4, ceil(-3.3) = -3
533  */
534 U_CAPI double U_EXPORT2
uprv_trunc(double d)535 uprv_trunc(double d)
536 {
537 #if IEEE_754
538     /* handle error cases*/
539     if(uprv_isNaN(d))
540         return uprv_getNaN();
541     if(uprv_isInfinite(d))
542         return uprv_getInfinity();
543 
544     if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
545         return ceil(d);
546     else
547         return floor(d);
548 
549 #else
550     return d >= 0 ? floor(d) : ceil(d);
551 
552 #endif
553 }
554 
555 /**
556  * Return the largest positive number that can be represented by an integer
557  * type of arbitrary bit length.
558  */
559 U_CAPI double U_EXPORT2
uprv_maxMantissa(void)560 uprv_maxMantissa(void)
561 {
562     return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
563 }
564 
565 U_CAPI double U_EXPORT2
uprv_log(double d)566 uprv_log(double d)
567 {
568     return log(d);
569 }
570 
571 U_CAPI void * U_EXPORT2
uprv_maximumPtr(void * base)572 uprv_maximumPtr(void * base)
573 {
574 #if defined(OS400)
575     /*
576      * With the provided function we should never be out of range of a given segment
577      * (a traditional/typical segment that is).  Our segments have 5 bytes for the
578      * id and 3 bytes for the offset.  The key is that the casting takes care of
579      * only retrieving the offset portion minus x1000.  Hence, the smallest offset
580      * seen in a program is x001000 and when casted to an int would be 0.
581      * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
582      *
583      * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
584      * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
585      * This function determines the activation based on the pointer that is passed in and
586      * calculates the appropriate maximum available size for
587      * each pointer type (TERASPACE and non-TERASPACE)
588      *
589      * Unlike other operating systems, the pointer model isn't determined at
590      * compile time on i5/OS.
591      */
592     if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
593         /* if it is a TERASPACE pointer the max is 2GB - 4k */
594         return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
595     }
596     /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
597     return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
598 
599 #else
600     return U_MAX_PTR(base);
601 #endif
602 }
603 
604 /*---------------------------------------------------------------------------
605   Platform-specific Implementations
606   Try these, and if they don't work on your platform, then special case your
607   platform with new implementations.
608   ---------------------------------------------------------------------------*/
609 
610 /* Generic time zone layer -------------------------------------------------- */
611 
612 /* Time zone utilities */
613 U_CAPI void U_EXPORT2
uprv_tzset()614 uprv_tzset()
615 {
616 #if defined(U_TZSET)
617     U_TZSET();
618 #else
619     /* no initialization*/
620 #endif
621 }
622 
623 U_CAPI int32_t U_EXPORT2
uprv_timezone()624 uprv_timezone()
625 {
626 #ifdef U_TIMEZONE
627     return U_TIMEZONE;
628 #else
629     time_t t, t1, t2;
630     struct tm tmrec;
631     UBool dst_checked;
632     int32_t tdiff = 0;
633 
634     time(&t);
635     uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
636     dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
637     t1 = mktime(&tmrec);                 /* local time in seconds*/
638     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
639     t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
640     tdiff = t2 - t1;
641     /* imitate NT behaviour, which returns same timezone offset to GMT for
642        winter and summer*/
643     if (dst_checked)
644         tdiff += 3600;
645     return tdiff;
646 #endif
647 }
648 
649 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
650    some platforms need to have it declared here. */
651 
652 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
653 /* RS6000 and others reject char **tzname.  */
654 extern U_IMPORT char *U_TZNAME[];
655 #endif
656 
657 #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
658 /* These platforms are likely to use Olson timezone IDs. */
659 #define CHECK_LOCALTIME_LINK 1
660 #if defined(U_DARWIN)
661 #include <tzfile.h>
662 #define TZZONEINFO      (TZDIR "/")
663 #else
664 #define TZDEFAULT       "/etc/localtime"
665 #define TZZONEINFO      "/usr/share/zoneinfo/"
666 #endif
667 #if U_HAVE_DIRENT_H
668 #define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
669 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
670    symlinked to /etc/localtime, which makes searchForTZFile return
671    'localtime' when it's the first match. */
672 #define TZFILE_SKIP2    "localtime"
673 #define SEARCH_TZFILE
674 #include <dirent.h>  /* Needed to search through system timezone files */
675 #endif
676 static char gTimeZoneBuffer[PATH_MAX];
677 static char *gTimeZoneBufferPtr = NULL;
678 #endif
679 
680 #ifndef U_WINDOWS
681 #define isNonDigit(ch) (ch < '0' || '9' < ch)
isValidOlsonID(const char * id)682 static UBool isValidOlsonID(const char *id) {
683     int32_t idx = 0;
684 
685     /* Determine if this is something like Iceland (Olson ID)
686     or AST4ADT (non-Olson ID) */
687     while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
688         idx++;
689     }
690 
691     /* If we went through the whole string, then it might be okay.
692     The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
693     "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
694     The rest of the time it could be an Olson ID. George */
695     return (UBool)(id[idx] == 0
696         || uprv_strcmp(id, "PST8PDT") == 0
697         || uprv_strcmp(id, "MST7MDT") == 0
698         || uprv_strcmp(id, "CST6CDT") == 0
699         || uprv_strcmp(id, "EST5EDT") == 0);
700 }
701 
702 /* On some Unix-like OS, 'posix' subdirectory in
703    /usr/share/zoneinfo replicates the top-level contents. 'right'
704    subdirectory has the same set of files, but individual files
705    are different from those in the top-level directory or 'posix'
706    because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
707    has files for UTC.
708    When the first match for /etc/localtime is in either of them
709    (usually in posix because 'right' has different file contents),
710    or TZ environment variable points to one of them, createTimeZone
711    fails because, say, 'posix/America/New_York' is not an Olson
712    timezone id ('America/New_York' is). So, we have to skip
713    'posix/' and 'right/' at the beginning. */
skipZoneIDPrefix(const char ** id)714 static void skipZoneIDPrefix(const char** id) {
715     if (uprv_strncmp(*id, "posix/", 6) == 0
716         || uprv_strncmp(*id, "right/", 6) == 0)
717     {
718         *id += 6;
719     }
720 }
721 #endif
722 
723 #if defined(U_TZNAME) && !defined(U_WINDOWS)
724 
725 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
726 typedef struct OffsetZoneMapping {
727     int32_t offsetSeconds;
728     int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/
729     const char *stdID;
730     const char *dstID;
731     const char *olsonID;
732 } OffsetZoneMapping;
733 
734 /*
735 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
736 and maps it to an Olson ID.
737 Before adding anything to this list, take a look at
738 icu/source/tools/tzcode/tz.alias
739 Sometimes no daylight savings (0) is important to define due to aliases.
740 This list can be tested with icu/source/test/compat/tzone.pl
741 More values could be added to daylightType to increase precision.
742 */
743 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
744     {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
745     {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
746     {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
747     {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
748     {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
749     {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
750     {-36000, 2, "EST", "EST", "Australia/Sydney"},
751     {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
752     {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
753     {-34200, 2, "CST", "CST", "Australia/South"},
754     {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
755     {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
756     {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
757     {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
758     {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
759     {-28800, 2, "WST", "WST", "Australia/West"},
760     {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
761     {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
762     {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
763     {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
764     {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
765     {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
766     {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
767     {-14400, 1, "AZT", "AZST", "Asia/Baku"},
768     {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
769     {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
770     {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
771     {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
772     {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
773     {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
774     {-3600, 0, "CET", "WEST", "Africa/Algiers"},
775     {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
776     {0, 1, "GMT", "IST", "Europe/Dublin"},
777     {0, 1, "GMT", "BST", "Europe/London"},
778     {0, 0, "WET", "WEST", "Africa/Casablanca"},
779     {0, 0, "WET", "WET", "Africa/El_Aaiun"},
780     {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
781     {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
782     {10800, 1, "PMST", "PMDT", "America/Miquelon"},
783     {10800, 2, "UYT", "UYST", "America/Montevideo"},
784     {10800, 1, "WGT", "WGST", "America/Godthab"},
785     {10800, 2, "BRT", "BRST", "Brazil/East"},
786     {12600, 1, "NST", "NDT", "America/St_Johns"},
787     {14400, 1, "AST", "ADT", "Canada/Atlantic"},
788     {14400, 2, "AMT", "AMST", "America/Cuiaba"},
789     {14400, 2, "CLT", "CLST", "Chile/Continental"},
790     {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
791     {14400, 2, "PYT", "PYST", "America/Asuncion"},
792     {18000, 1, "CST", "CDT", "America/Havana"},
793     {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
794     {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
795     {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
796     {21600, 0, "CST", "CDT", "America/Guatemala"},
797     {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
798     {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
799     {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
800     {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
801     {32400, 1, "AKST", "AKDT", "US/Alaska"},
802     {36000, 1, "HAST", "HADT", "US/Aleutian"}
803 };
804 
805 /*#define DEBUG_TZNAME*/
806 
remapShortTimeZone(const char * stdID,const char * dstID,int32_t daylightType,int32_t offset)807 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
808 {
809     int32_t idx;
810 #ifdef DEBUG_TZNAME
811     fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
812 #endif
813     for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++)
814     {
815         if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
816             && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
817             && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
818             && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
819         {
820             return OFFSET_ZONE_MAPPINGS[idx].olsonID;
821         }
822     }
823     return NULL;
824 }
825 #endif
826 
827 #ifdef SEARCH_TZFILE
828 #define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
829 #define MAX_READ_SIZE 512
830 
831 typedef struct DefaultTZInfo {
832     char* defaultTZBuffer;
833     int64_t defaultTZFileSize;
834     FILE* defaultTZFilePtr;
835     UBool defaultTZstatus;
836     int32_t defaultTZPosition;
837 } DefaultTZInfo;
838 
839 /*
840  * This method compares the two files given to see if they are a match.
841  * It is currently use to compare two TZ files.
842  */
compareBinaryFiles(const char * defaultTZFileName,const char * TZFileName,DefaultTZInfo * tzInfo)843 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
844     FILE* file;
845     int64_t sizeFile;
846     int64_t sizeFileLeft;
847     int32_t sizeFileRead;
848     int32_t sizeFileToRead;
849     char bufferFile[MAX_READ_SIZE];
850     UBool result = TRUE;
851 
852     if (tzInfo->defaultTZFilePtr == NULL) {
853         tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
854     }
855     file = fopen(TZFileName, "r");
856 
857     tzInfo->defaultTZPosition = 0; /* reset position to begin search */
858 
859     if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
860         /* First check that the file size are equal. */
861         if (tzInfo->defaultTZFileSize == 0) {
862             fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
863             tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
864         }
865         fseek(file, 0, SEEK_END);
866         sizeFile = ftell(file);
867         sizeFileLeft = sizeFile;
868 
869         if (sizeFile != tzInfo->defaultTZFileSize) {
870             result = FALSE;
871         } else {
872             /* Store the data from the files in seperate buffers and
873              * compare each byte to determine equality.
874              */
875             if (tzInfo->defaultTZBuffer == NULL) {
876                 rewind(tzInfo->defaultTZFilePtr);
877                 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
878                 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
879             }
880             rewind(file);
881             while(sizeFileLeft > 0) {
882                 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
883                 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
884 
885                 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
886                 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
887                     result = FALSE;
888                     break;
889                 }
890                 sizeFileLeft -= sizeFileRead;
891                 tzInfo->defaultTZPosition += sizeFileRead;
892             }
893         }
894     } else {
895         result = FALSE;
896     }
897 
898     if (file != NULL) {
899         fclose(file);
900     }
901 
902     return result;
903 }
904 /*
905  * This method recursively traverses the directory given for a matching TZ file and returns the first match.
906  */
907 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
908 #define SKIP1 "."
909 #define SKIP2 ".."
910 static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
searchForTZFile(const char * path,DefaultTZInfo * tzInfo)911 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
912     char curpath[MAX_PATH_SIZE];
913     DIR* dirp = opendir(path);
914     DIR* subDirp = NULL;
915     struct dirent* dirEntry = NULL;
916 
917     char* result = NULL;
918     if (dirp == NULL) {
919         return result;
920     }
921 
922     /* Save the current path */
923     uprv_memset(curpath, 0, MAX_PATH_SIZE);
924     uprv_strcpy(curpath, path);
925 
926     /* Check each entry in the directory. */
927     while((dirEntry = readdir(dirp)) != NULL) {
928         const char* dirName = dirEntry->d_name;
929         if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
930             /* Create a newpath with the new entry to test each entry in the directory. */
931             char newpath[MAX_PATH_SIZE];
932             uprv_strcpy(newpath, curpath);
933             uprv_strcat(newpath, dirName);
934 
935             if ((subDirp = opendir(newpath)) != NULL) {
936                 /* If this new path is a directory, make a recursive call with the newpath. */
937                 closedir(subDirp);
938                 uprv_strcat(newpath, "/");
939                 result = searchForTZFile(newpath, tzInfo);
940                 /*
941                  Have to get out here. Otherwise, we'd keep looking
942                  and return the first match in the top-level directory
943                  if there's a match in the top-level. If not, this function
944                  would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
945                  It worked without this in most cases because we have a fallback of calling
946                  localtime_r to figure out the default timezone.
947                 */
948                 if (result != NULL)
949                     break;
950             } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
951                 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
952                     const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
953                     skipZoneIDPrefix(&zoneid);
954                     uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
955                     result = SEARCH_TZFILE_RESULT;
956                     /* Get out after the first one found. */
957                     break;
958                 }
959             }
960         }
961     }
962     closedir(dirp);
963     return result;
964 }
965 #endif
966 U_CAPI const char* U_EXPORT2
uprv_tzname(int n)967 uprv_tzname(int n)
968 {
969     const char *tzid = NULL;
970 #ifdef U_WINDOWS
971     tzid = uprv_detectWindowsTimeZone();
972 
973     if (tzid != NULL) {
974         return tzid;
975     }
976 #else
977 
978 /*#if defined(U_DARWIN)
979     int ret;
980 
981     tzid = getenv("TZFILE");
982     if (tzid != NULL) {
983         return tzid;
984     }
985 #endif*/
986 
987 /* This code can be temporarily disabled to test tzname resolution later on. */
988 #ifndef DEBUG_TZNAME
989     tzid = getenv("TZ");
990     if (tzid != NULL && isValidOlsonID(tzid))
991     {
992         /* This might be a good Olson ID. */
993         skipZoneIDPrefix(&tzid);
994         return tzid;
995     }
996     /* else U_TZNAME will give a better result. */
997 #endif
998 
999 #if defined(CHECK_LOCALTIME_LINK)
1000     /* Caller must handle threading issues */
1001     if (gTimeZoneBufferPtr == NULL) {
1002         /*
1003         This is a trick to look at the name of the link to get the Olson ID
1004         because the tzfile contents is underspecified.
1005         This isn't guaranteed to work because it may not be a symlink.
1006         */
1007         int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
1008         if (0 < ret) {
1009             int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1010             gTimeZoneBuffer[ret] = 0;
1011             if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1012                 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1013             {
1014                 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1015             }
1016         } else {
1017 #if defined(SEARCH_TZFILE)
1018             DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1019             if (tzInfo != NULL) {
1020                 tzInfo->defaultTZBuffer = NULL;
1021                 tzInfo->defaultTZFileSize = 0;
1022                 tzInfo->defaultTZFilePtr = NULL;
1023                 tzInfo->defaultTZstatus = FALSE;
1024                 tzInfo->defaultTZPosition = 0;
1025 
1026                 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1027 
1028                 /* Free previously allocated memory */
1029                 if (tzInfo->defaultTZBuffer != NULL) {
1030                     uprv_free(tzInfo->defaultTZBuffer);
1031                 }
1032                 if (tzInfo->defaultTZFilePtr != NULL) {
1033                     fclose(tzInfo->defaultTZFilePtr);
1034                 }
1035                 uprv_free(tzInfo);
1036             }
1037 
1038             if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1039                 return gTimeZoneBufferPtr;
1040             }
1041 #endif
1042         }
1043     }
1044     else {
1045         return gTimeZoneBufferPtr;
1046     }
1047 #endif
1048 #endif
1049 
1050 #ifdef U_TZNAME
1051 #if defined(U_WINDOWS) || defined(U_MINGW)
1052     /* The return value is free'd in timezone.cpp on Windows because
1053      * the other code path returns a pointer to a heap location. */
1054     return uprv_strdup(U_TZNAME[n]);
1055 #else
1056     /*
1057     U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1058     So we remap the abbreviation to an olson ID.
1059 
1060     Since Windows exposes a little more timezone information,
1061     we normally don't use this code on Windows because
1062     uprv_detectWindowsTimeZone should have already given the correct answer.
1063     */
1064     {
1065         struct tm juneSol, decemberSol;
1066         int daylightType;
1067         static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1068         static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1069 
1070         /* This probing will tell us when daylight savings occurs.  */
1071         localtime_r(&juneSolstice, &juneSol);
1072         localtime_r(&decemberSolstice, &decemberSol);
1073         daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0);
1074         tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1075         if (tzid != NULL) {
1076             return tzid;
1077         }
1078     }
1079     return U_TZNAME[n];
1080 #endif
1081 #else
1082     return "";
1083 #endif
1084 }
1085 
1086 /* Get and set the ICU data directory --------------------------------------- */
1087 
1088 static char *gDataDirectory = NULL;
1089 #if U_POSIX_LOCALE
1090  static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1091 #endif
1092 
putil_cleanup(void)1093 static UBool U_CALLCONV putil_cleanup(void)
1094 {
1095     if (gDataDirectory && *gDataDirectory) {
1096         uprv_free(gDataDirectory);
1097     }
1098     gDataDirectory = NULL;
1099 #if U_POSIX_LOCALE
1100     if (gCorrectedPOSIXLocale) {
1101         uprv_free(gCorrectedPOSIXLocale);
1102         gCorrectedPOSIXLocale = NULL;
1103     }
1104 #endif
1105     return TRUE;
1106 }
1107 
1108 /*
1109  * Set the data directory.
1110  *    Make a copy of the passed string, and set the global data dir to point to it.
1111  *    TODO:  see bug #2849, regarding thread safety.
1112  */
1113 U_CAPI void U_EXPORT2
u_setDataDirectory(const char * directory)1114 u_setDataDirectory(const char *directory) {
1115     char *newDataDir;
1116     int32_t length;
1117 
1118     if(directory==NULL || *directory==0) {
1119         /* A small optimization to prevent the malloc and copy when the
1120         shared library is used, and this is a way to make sure that NULL
1121         is never returned.
1122         */
1123         newDataDir = (char *)"";
1124     }
1125     else {
1126         length=(int32_t)uprv_strlen(directory);
1127         newDataDir = (char *)uprv_malloc(length + 2);
1128         /* Exit out if newDataDir could not be created. */
1129         if (newDataDir == NULL) {
1130             return;
1131         }
1132         uprv_strcpy(newDataDir, directory);
1133 
1134 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1135         {
1136             char *p;
1137             while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1138                 *p = U_FILE_SEP_CHAR;
1139             }
1140         }
1141 #endif
1142     }
1143 
1144     umtx_lock(NULL);
1145     if (gDataDirectory && *gDataDirectory) {
1146         uprv_free(gDataDirectory);
1147     }
1148     gDataDirectory = newDataDir;
1149     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1150     umtx_unlock(NULL);
1151 }
1152 
1153 U_CAPI UBool U_EXPORT2
uprv_pathIsAbsolute(const char * path)1154 uprv_pathIsAbsolute(const char *path)
1155 {
1156   if(!path || !*path) {
1157     return FALSE;
1158   }
1159 
1160   if(*path == U_FILE_SEP_CHAR) {
1161     return TRUE;
1162   }
1163 
1164 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1165   if(*path == U_FILE_ALT_SEP_CHAR) {
1166     return TRUE;
1167   }
1168 #endif
1169 
1170 #if defined(U_WINDOWS)
1171   if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1172        ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1173       path[1] == ':' ) {
1174     return TRUE;
1175   }
1176 #endif
1177 
1178   return FALSE;
1179 }
1180 
1181 /* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1182    until some client wrapper makefiles are updated */
1183 #if defined(U_DARWIN) && TARGET_IPHONE_SIMULATOR
1184 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1185 #  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1186 # endif
1187 #endif
1188 
1189 U_CAPI const char * U_EXPORT2
u_getDataDirectory(void)1190 u_getDataDirectory(void) {
1191     const char *path = NULL;
1192 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1193     char datadir_path_buffer[PATH_MAX];
1194 #endif
1195 
1196     /* if we have the directory, then return it immediately */
1197     UMTX_CHECK(NULL, gDataDirectory, path);
1198 
1199     if(path) {
1200         return path;
1201     }
1202 
1203     /*
1204     When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1205     override ICU's data with the ICU_DATA environment variable. This prevents
1206     problems where multiple custom copies of ICU's specific version of data
1207     are installed on a system. Either the application must define the data
1208     directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1209     ICU, set the data with udata_setCommonData or trust that all of the
1210     required data is contained in ICU's data library that contains
1211     the entry point defined by U_ICUDATA_ENTRY_POINT.
1212 
1213     There may also be some platforms where environment variables
1214     are not allowed.
1215     */
1216 #   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1217     /* First try to get the environment variable */
1218     path=getenv("ICU_DATA");
1219 #   endif
1220 
1221     /* ICU_DATA_DIR may be set as a compile option.
1222      * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1223      * and is used only when data is built in archive mode eliminating the need
1224      * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1225      * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1226      * set their own path.
1227      */
1228 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1229     if(path==NULL || *path==0) {
1230 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1231         const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1232 # endif
1233 # ifdef ICU_DATA_DIR
1234         path=ICU_DATA_DIR;
1235 # else
1236         path=U_ICU_DATA_DEFAULT_DIR;
1237 # endif
1238 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1239         if (prefix != NULL) {
1240             snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1241             path=datadir_path_buffer;
1242         }
1243 # endif
1244     }
1245 #endif
1246 
1247     if(path==NULL) {
1248         /* It looks really bad, set it to something. */
1249         path = "";
1250     }
1251 
1252     u_setDataDirectory(path);
1253     return gDataDirectory;
1254 }
1255 
1256 
1257 
1258 
1259 
1260 /* Macintosh-specific locale information ------------------------------------ */
1261 #ifdef XP_MAC
1262 
1263 typedef struct {
1264     int32_t script;
1265     int32_t region;
1266     int32_t lang;
1267     int32_t date_region;
1268     const char* posixID;
1269 } mac_lc_rec;
1270 
1271 /* Todo: This will be updated with a newer version from www.unicode.org web
1272    page when it's available.*/
1273 #define MAC_LC_MAGIC_NUMBER -5
1274 #define MAC_LC_INIT_NUMBER -9
1275 
1276 static const mac_lc_rec mac_lc_recs[] = {
1277     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1278     /* United States*/
1279     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1280     /* France*/
1281     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1282     /* Great Britain*/
1283     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1284     /* Germany*/
1285     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1286     /* Italy*/
1287     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1288     /* Metherlands*/
1289     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1290     /* French for Belgium or Lxembourg*/
1291     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1292     /* Sweden*/
1293     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1294     /* Denmark*/
1295     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1296     /* Portugal*/
1297     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1298     /* French Canada*/
1299     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1300     /* Israel*/
1301     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1302     /* Japan*/
1303     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1304     /* Australia*/
1305     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1306     /* the Arabic world (?)*/
1307     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1308     /* Finland*/
1309     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1310     /* French for Switzerland*/
1311     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1312     /* German for Switzerland*/
1313     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1314     /* Greece*/
1315     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1316     /* Iceland ===*/
1317     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1318     /* Malta ===*/
1319     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1320     /* Cyprus ===*/
1321     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1322     /* Turkey ===*/
1323     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1324     /* Croatian system for Yugoslavia*/
1325     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1326     /* Hindi system for India*/
1327     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1328     /* Pakistan*/
1329     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1330     /* Lithuania*/
1331     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1332     /* Poland*/
1333     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1334     /* Hungary*/
1335     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1336     /* Estonia*/
1337     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1338     /* Latvia*/
1339     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1340     /* Lapland  [Ask Rich for the data. HS]*/
1341     /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1342     /* Faeroe Islands*/
1343     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1344     /* Iran*/
1345     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1346     /* Russia*/
1347     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1348     /* Ireland*/
1349     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1350     /* Korea*/
1351     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1352     /* People's Republic of China*/
1353     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1354     /* Taiwan*/
1355     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1356     /* Thailand*/
1357 
1358     /* fallback is en_US*/
1359     MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1360     MAC_LC_MAGIC_NUMBER, "en_US"
1361 };
1362 
1363 #endif
1364 
1365 #if U_POSIX_LOCALE
1366 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1367  * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1368  * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1369  */
uprv_getPOSIXIDForCategory(int category)1370 static const char *uprv_getPOSIXIDForCategory(int category)
1371 {
1372     const char* posixID = NULL;
1373     if (category == LC_MESSAGES || category == LC_CTYPE) {
1374         /*
1375         * On Solaris two different calls to setlocale can result in
1376         * different values. Only get this value once.
1377         *
1378         * We must check this first because an application can set this.
1379         *
1380         * LC_ALL can't be used because it's platform dependent. The LANG
1381         * environment variable seems to affect LC_CTYPE variable by default.
1382         * Here is what setlocale(LC_ALL, NULL) can return.
1383         * HPUX can return 'C C C C C C C'
1384         * Solaris can return /en_US/C/C/C/C/C on the second try.
1385         * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1386         *
1387         * The default codepage detection also needs to use LC_CTYPE.
1388         *
1389         * Do not call setlocale(LC_*, "")! Using an empty string instead
1390         * of NULL, will modify the libc behavior.
1391         */
1392         posixID = setlocale(category, NULL);
1393         if ((posixID == 0)
1394             || (uprv_strcmp("C", posixID) == 0)
1395             || (uprv_strcmp("POSIX", posixID) == 0))
1396         {
1397             /* Maybe we got some garbage.  Try something more reasonable */
1398             posixID = getenv("LC_ALL");
1399             if (posixID == 0) {
1400                 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1401                 if (posixID == 0) {
1402                     posixID = getenv("LANG");
1403                 }
1404             }
1405         }
1406     }
1407     if ((posixID==0)
1408         || (uprv_strcmp("C", posixID) == 0)
1409         || (uprv_strcmp("POSIX", posixID) == 0))
1410     {
1411         /* Nothing worked.  Give it a nice POSIX default value. */
1412         posixID = "en_US_POSIX";
1413     }
1414     return posixID;
1415 }
1416 
1417 /* Return just the POSIX id for the default locale, whatever happens to be in
1418  * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1419  */
uprv_getPOSIXIDForDefaultLocale(void)1420 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1421 {
1422     static const char* posixID = NULL;
1423     if (posixID == 0) {
1424         posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1425     }
1426     return posixID;
1427 }
1428 
1429 /* Return just the POSIX id for the default codepage, whatever happens to be in
1430  * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1431  */
uprv_getPOSIXIDForDefaultCodepage(void)1432 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1433 {
1434     static const char* posixID = NULL;
1435     if (posixID == 0) {
1436         posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1437     }
1438     return posixID;
1439 }
1440 #endif
1441 
1442 /* NOTE: The caller should handle thread safety */
1443 U_CAPI const char* U_EXPORT2
uprv_getDefaultLocaleID()1444 uprv_getDefaultLocaleID()
1445 {
1446 #if U_POSIX_LOCALE
1447 /*
1448   Note that:  (a '!' means the ID is improper somehow)
1449      LC_ALL  ---->     default_loc          codepage
1450 --------------------------------------------------------
1451      ab.CD             ab                   CD
1452      ab@CD             ab__CD               -
1453      ab@CD.EF          ab__CD               EF
1454 
1455      ab_CD.EF@GH       ab_CD_GH             EF
1456 
1457 Some 'improper' ways to do the same as above:
1458   !  ab_CD@GH.EF       ab_CD_GH             EF
1459   !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1460   !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1461 
1462      _CD@GH            _CD_GH               -
1463      _CD.EF@GH         _CD_GH               EF
1464 
1465 The variant cannot have dots in it.
1466 The 'rightmost' variant (@xxx) wins.
1467 The leftmost codepage (.xxx) wins.
1468 */
1469     char *correctedPOSIXLocale = 0;
1470     const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1471     const char *p;
1472     const char *q;
1473     int32_t len;
1474 
1475     /* Format: (no spaces)
1476     ll [ _CC ] [ . MM ] [ @ VV]
1477 
1478       l = lang, C = ctry, M = charmap, V = variant
1479     */
1480 
1481     if (gCorrectedPOSIXLocale != NULL) {
1482         return gCorrectedPOSIXLocale;
1483     }
1484 
1485     if ((p = uprv_strchr(posixID, '.')) != NULL) {
1486         /* assume new locale can't be larger than old one? */
1487         correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1488         /* Exit on memory allocation error. */
1489         if (correctedPOSIXLocale == NULL) {
1490             return NULL;
1491         }
1492         uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1493         correctedPOSIXLocale[p-posixID] = 0;
1494 
1495         /* do not copy after the @ */
1496         if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1497             correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1498         }
1499     }
1500 
1501     /* Note that we scan the *uncorrected* ID. */
1502     if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1503         if (correctedPOSIXLocale == NULL) {
1504             correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1505             /* Exit on memory allocation error. */
1506             if (correctedPOSIXLocale == NULL) {
1507                 return NULL;
1508             }
1509             uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1510             correctedPOSIXLocale[p-posixID] = 0;
1511         }
1512         p++;
1513 
1514         /* Take care of any special cases here.. */
1515         if (!uprv_strcmp(p, "nynorsk")) {
1516             p = "NY";
1517             /* Don't worry about no__NY. In practice, it won't appear. */
1518         }
1519 
1520         if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1521             uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1522         }
1523         else {
1524             uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1525         }
1526 
1527         if ((q = uprv_strchr(p, '.')) != NULL) {
1528             /* How big will the resulting string be? */
1529             len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1530             uprv_strncat(correctedPOSIXLocale, p, q-p);
1531             correctedPOSIXLocale[len] = 0;
1532         }
1533         else {
1534             /* Anything following the @ sign */
1535             uprv_strcat(correctedPOSIXLocale, p);
1536         }
1537 
1538         /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1539          * How about 'russian' -> 'ru'?
1540          * Many of the other locales using ISO codes will be handled by the
1541          * canonicalization functions in uloc_getDefault.
1542          */
1543     }
1544 
1545     /* Was a correction made? */
1546     if (correctedPOSIXLocale != NULL) {
1547         posixID = correctedPOSIXLocale;
1548     }
1549     else {
1550         /* copy it, just in case the original pointer goes away.  See j2395 */
1551         correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1552         /* Exit on memory allocation error. */
1553         if (correctedPOSIXLocale == NULL) {
1554             return NULL;
1555         }
1556         posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1557     }
1558 
1559     if (gCorrectedPOSIXLocale == NULL) {
1560         gCorrectedPOSIXLocale = correctedPOSIXLocale;
1561         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1562         correctedPOSIXLocale = NULL;
1563     }
1564 
1565     if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
1566         uprv_free(correctedPOSIXLocale);
1567     }
1568 
1569     return posixID;
1570 
1571 #elif defined(U_WINDOWS) || defined(U_MINGW)
1572     UErrorCode status = U_ZERO_ERROR;
1573     LCID id = GetThreadLocale();
1574     const char* locID = uprv_convertToPosix(id, &status);
1575 
1576     if (U_FAILURE(status)) {
1577         locID = "en_US";
1578     }
1579     return locID;
1580 
1581 #elif defined(XP_MAC)
1582     int32_t script = MAC_LC_INIT_NUMBER;
1583     /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1584     int32_t region = MAC_LC_INIT_NUMBER;
1585     /* = GetScriptManagerVariable(smRegionCode);*/
1586     int32_t lang = MAC_LC_INIT_NUMBER;
1587     /* = GetScriptManagerVariable(smScriptLang);*/
1588     int32_t date_region = MAC_LC_INIT_NUMBER;
1589     const char* posixID = 0;
1590     int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1591     int32_t i;
1592     Intl1Hndl ih;
1593 
1594     ih = (Intl1Hndl) GetIntlResource(1);
1595     if (ih)
1596         date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1597 
1598     for (i = 0; i < count; i++) {
1599         if (   ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1600              || (mac_lc_recs[i].script == script))
1601             && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1602              || (mac_lc_recs[i].region == region))
1603             && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1604              || (mac_lc_recs[i].lang == lang))
1605             && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1606              || (mac_lc_recs[i].date_region == date_region))
1607             )
1608         {
1609             posixID = mac_lc_recs[i].posixID;
1610             break;
1611         }
1612     }
1613 
1614     return posixID;
1615 
1616 #elif defined(OS400)
1617     /* locales are process scoped and are by definition thread safe */
1618     static char correctedLocale[64];
1619     const  char *localeID = getenv("LC_ALL");
1620            char *p;
1621 
1622     if (localeID == NULL)
1623         localeID = getenv("LANG");
1624     if (localeID == NULL)
1625         localeID = setlocale(LC_ALL, NULL);
1626     /* Make sure we have something... */
1627     if (localeID == NULL)
1628         return "en_US_POSIX";
1629 
1630     /* Extract the locale name from the path. */
1631     if((p = uprv_strrchr(localeID, '/')) != NULL)
1632     {
1633         /* Increment p to start of locale name. */
1634         p++;
1635         localeID = p;
1636     }
1637 
1638     /* Copy to work location. */
1639     uprv_strcpy(correctedLocale, localeID);
1640 
1641     /* Strip off the '.locale' extension. */
1642     if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1643         *p = 0;
1644     }
1645 
1646     /* Upper case the locale name. */
1647     T_CString_toUpperCase(correctedLocale);
1648 
1649     /* See if we are using the POSIX locale.  Any of the
1650     * following are equivalent and use the same QLGPGCMA
1651     * (POSIX) locale.
1652     * QLGPGCMA2 means UCS2
1653     * QLGPGCMA_4 means UTF-32
1654     * QLGPGCMA_8 means UTF-8
1655     */
1656     if ((uprv_strcmp("C", correctedLocale) == 0) ||
1657         (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1658         (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1659     {
1660         uprv_strcpy(correctedLocale, "en_US_POSIX");
1661     }
1662     else
1663     {
1664         int16_t LocaleLen;
1665 
1666         /* Lower case the lang portion. */
1667         for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1668         {
1669             *p = uprv_tolower(*p);
1670         }
1671 
1672         /* Adjust for Euro.  After '_E' add 'URO'. */
1673         LocaleLen = uprv_strlen(correctedLocale);
1674         if (correctedLocale[LocaleLen - 2] == '_' &&
1675             correctedLocale[LocaleLen - 1] == 'E')
1676         {
1677             uprv_strcat(correctedLocale, "URO");
1678         }
1679 
1680         /* If using Lotus-based locale then convert to
1681          * equivalent non Lotus.
1682          */
1683         else if (correctedLocale[LocaleLen - 2] == '_' &&
1684             correctedLocale[LocaleLen - 1] == 'L')
1685         {
1686             correctedLocale[LocaleLen - 2] = 0;
1687         }
1688 
1689         /* There are separate simplified and traditional
1690          * locales called zh_HK_S and zh_HK_T.
1691          */
1692         else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1693         {
1694             uprv_strcpy(correctedLocale, "zh_HK");
1695         }
1696 
1697         /* A special zh_CN_GBK locale...
1698         */
1699         else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1700         {
1701             uprv_strcpy(correctedLocale, "zh_CN");
1702         }
1703 
1704     }
1705 
1706     return correctedLocale;
1707 #endif
1708 
1709 }
1710 
1711 #if !U_CHARSET_IS_UTF8
1712 #if U_POSIX_LOCALE
1713 /*
1714 Due to various platform differences, one platform may specify a charset,
1715 when they really mean a different charset. Remap the names so that they are
1716 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1717 here. Before adding anything to this function, please consider adding unique
1718 names to the ICU alias table in the data directory.
1719 */
1720 static const char*
remapPlatformDependentCodepage(const char * locale,const char * name)1721 remapPlatformDependentCodepage(const char *locale, const char *name) {
1722     if (locale != NULL && *locale == 0) {
1723         /* Make sure that an empty locale is handled the same way. */
1724         locale = NULL;
1725     }
1726     if (name == NULL) {
1727         return NULL;
1728     }
1729 #if defined(U_AIX)
1730     if (uprv_strcmp(name, "IBM-943") == 0) {
1731         /* Use the ASCII compatible ibm-943 */
1732         name = "Shift-JIS";
1733     }
1734     else if (uprv_strcmp(name, "IBM-1252") == 0) {
1735         /* Use the windows-1252 that contains the Euro */
1736         name = "IBM-5348";
1737     }
1738 #elif defined(U_SOLARIS)
1739     if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1740         /* Solaris underspecifies the "EUC" name. */
1741         if (uprv_strcmp(locale, "zh_CN") == 0) {
1742             name = "EUC-CN";
1743         }
1744         else if (uprv_strcmp(locale, "zh_TW") == 0) {
1745             name = "EUC-TW";
1746         }
1747         else if (uprv_strcmp(locale, "ko_KR") == 0) {
1748             name = "EUC-KR";
1749         }
1750     }
1751     else if (uprv_strcmp(name, "eucJP") == 0) {
1752         /*
1753         ibm-954 is the best match.
1754         ibm-33722 is the default for eucJP (similar to Windows).
1755         */
1756         name = "eucjis";
1757     }
1758     else if (uprv_strcmp(name, "646") == 0) {
1759         /*
1760          * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1761          * ISO-8859-1 instead of US-ASCII(646).
1762          */
1763         name = "ISO-8859-1";
1764     }
1765 #elif defined(U_DARWIN)
1766     if (locale == NULL && *name == 0) {
1767         /*
1768         No locale was specified, and an empty name was passed in.
1769         This usually indicates that nl_langinfo didn't return valid information.
1770         Mac OS X uses UTF-8 by default (especially the locale data and console).
1771         */
1772         name = "UTF-8";
1773     }
1774     else if (uprv_strcmp(name, "CP949") == 0) {
1775         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1776         name = "EUC-KR";
1777     }
1778     else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1779         /*
1780          * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1781          */
1782         name = "UTF-8";
1783     }
1784 #elif defined(U_BSD)
1785     if (uprv_strcmp(name, "CP949") == 0) {
1786         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1787         name = "EUC-KR";
1788     }
1789 #elif defined(U_HPUX)
1790     if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1791         /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1792         /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1793         name = "hkbig5";
1794     }
1795     else if (uprv_strcmp(name, "eucJP") == 0) {
1796         /*
1797         ibm-1350 is the best match, but unavailable.
1798         ibm-954 is mostly a superset of ibm-1350.
1799         ibm-33722 is the default for eucJP (similar to Windows).
1800         */
1801         name = "eucjis";
1802     }
1803 #elif defined(U_LINUX)
1804     if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1805         /* Linux underspecifies the "EUC" name. */
1806         if (uprv_strcmp(locale, "korean") == 0) {
1807             name = "EUC-KR";
1808         }
1809         else if (uprv_strcmp(locale, "japanese") == 0) {
1810             /* See comment below about eucJP */
1811             name = "eucjis";
1812         }
1813     }
1814     else if (uprv_strcmp(name, "eucjp") == 0) {
1815         /*
1816         ibm-1350 is the best match, but unavailable.
1817         ibm-954 is mostly a superset of ibm-1350.
1818         ibm-33722 is the default for eucJP (similar to Windows).
1819         */
1820         name = "eucjis";
1821     }
1822     else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1823             (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1824         /*
1825          * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1826          */
1827         name = "UTF-8";
1828     }
1829     /*
1830      * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1831      * it by falling back to 'US-ASCII' when NULL is returned from this
1832      * function. So, we don't have to worry about it here.
1833      */
1834 #endif
1835     /* return NULL when "" is passed in */
1836     if (*name == 0) {
1837         name = NULL;
1838     }
1839     return name;
1840 }
1841 
1842 static const char*
getCodepageFromPOSIXID(const char * localeName,char * buffer,int32_t buffCapacity)1843 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1844 {
1845     char localeBuf[100];
1846     const char *name = NULL;
1847     char *variant = NULL;
1848 
1849     if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1850         size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1851         uprv_strncpy(localeBuf, localeName, localeCapacity);
1852         localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1853         name = uprv_strncpy(buffer, name+1, buffCapacity);
1854         buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1855         if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1856             *variant = 0;
1857         }
1858         name = remapPlatformDependentCodepage(localeBuf, name);
1859     }
1860     return name;
1861 }
1862 #endif
1863 
1864 static const char*
int_getDefaultCodepage()1865 int_getDefaultCodepage()
1866 {
1867 #if defined(OS400)
1868     uint32_t ccsid = 37; /* Default to ibm-37 */
1869     static char codepage[64];
1870     Qwc_JOBI0400_t jobinfo;
1871     Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1872 
1873     EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1874         "*                         ", "                ", &error);
1875 
1876     if (error.Bytes_Available == 0) {
1877         if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1878             ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1879         }
1880         else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1881             ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1882         }
1883         /* else use the default */
1884     }
1885     sprintf(codepage,"ibm-%d", ccsid);
1886     return codepage;
1887 
1888 #elif defined(OS390)
1889     static char codepage[64];
1890 
1891     strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1892     strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1893     codepage[63] = 0; /* NULL terminate */
1894 
1895     return codepage;
1896 
1897 #elif defined(XP_MAC)
1898     return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1899 
1900 #elif defined(U_WINDOWS)
1901     static char codepage[64];
1902     sprintf(codepage, "windows-%d", GetACP());
1903     return codepage;
1904 
1905 #elif U_POSIX_LOCALE
1906     static char codesetName[100];
1907     const char *localeName = NULL;
1908     const char *name = NULL;
1909 
1910     localeName = uprv_getPOSIXIDForDefaultCodepage();
1911     uprv_memset(codesetName, 0, sizeof(codesetName));
1912 #if U_HAVE_NL_LANGINFO_CODESET
1913     /* When available, check nl_langinfo first because it usually gives more
1914        useful names. It depends on LC_CTYPE.
1915        nl_langinfo may use the same buffer as setlocale. */
1916     {
1917         const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1918 #if defined(U_DARWIN) || defined(U_LINUX)
1919         /*
1920          * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1921          * instead of ASCII.
1922          */
1923         if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1924             codeset = remapPlatformDependentCodepage(localeName, codeset);
1925         } else
1926 #endif
1927         {
1928             codeset = remapPlatformDependentCodepage(NULL, codeset);
1929         }
1930 
1931         if (codeset != NULL) {
1932             uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1933             codesetName[sizeof(codesetName)-1] = 0;
1934             return codesetName;
1935         }
1936     }
1937 #endif
1938 
1939     /* Use setlocale in a nice way, and then check some environment variables.
1940        Maybe the application used setlocale already.
1941     */
1942     uprv_memset(codesetName, 0, sizeof(codesetName));
1943     name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1944     if (name) {
1945         /* if we can find the codeset name from setlocale, return that. */
1946         return name;
1947     }
1948 
1949     if (*codesetName == 0)
1950     {
1951         /* Everything failed. Return US ASCII (ISO 646). */
1952         (void)uprv_strcpy(codesetName, "US-ASCII");
1953     }
1954     return codesetName;
1955 #else
1956     return "US-ASCII";
1957 #endif
1958 }
1959 
1960 
1961 U_CAPI const char*  U_EXPORT2
uprv_getDefaultCodepage()1962 uprv_getDefaultCodepage()
1963 {
1964     static char const  *name = NULL;
1965     umtx_lock(NULL);
1966     if (name == NULL) {
1967         name = int_getDefaultCodepage();
1968     }
1969     umtx_unlock(NULL);
1970     return name;
1971 }
1972 #endif  /* !U_CHARSET_IS_UTF8 */
1973 
1974 
1975 /* end of platform-specific implementation -------------- */
1976 
1977 /* version handling --------------------------------------------------------- */
1978 
1979 U_CAPI void U_EXPORT2
u_versionFromString(UVersionInfo versionArray,const char * versionString)1980 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1981     char *end;
1982     uint16_t part=0;
1983 
1984     if(versionArray==NULL) {
1985         return;
1986     }
1987 
1988     if(versionString!=NULL) {
1989         for(;;) {
1990             versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1991             if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1992                 break;
1993             }
1994             versionString=end+1;
1995         }
1996     }
1997 
1998     while(part<U_MAX_VERSION_LENGTH) {
1999         versionArray[part++]=0;
2000     }
2001 }
2002 
2003 U_CAPI void U_EXPORT2
u_versionFromUString(UVersionInfo versionArray,const UChar * versionString)2004 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2005     if(versionArray!=NULL && versionString!=NULL) {
2006         char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2007         int32_t len = u_strlen(versionString);
2008         if(len>U_MAX_VERSION_STRING_LENGTH) {
2009             len = U_MAX_VERSION_STRING_LENGTH;
2010         }
2011         u_UCharsToChars(versionString, versionChars, len);
2012         versionChars[len]=0;
2013         u_versionFromString(versionArray, versionChars);
2014     }
2015 }
2016 
2017 U_CAPI void U_EXPORT2
u_versionToString(UVersionInfo versionArray,char * versionString)2018 u_versionToString(UVersionInfo versionArray, char *versionString) {
2019     uint16_t count, part;
2020     uint8_t field;
2021 
2022     if(versionString==NULL) {
2023         return;
2024     }
2025 
2026     if(versionArray==NULL) {
2027         versionString[0]=0;
2028         return;
2029     }
2030 
2031     /* count how many fields need to be written */
2032     for(count=4; count>0 && versionArray[count-1]==0; --count) {
2033     }
2034 
2035     if(count <= 1) {
2036         count = 2;
2037     }
2038 
2039     /* write the first part */
2040     /* write the decimal field value */
2041     field=versionArray[0];
2042     if(field>=100) {
2043         *versionString++=(char)('0'+field/100);
2044         field%=100;
2045     }
2046     if(field>=10) {
2047         *versionString++=(char)('0'+field/10);
2048         field%=10;
2049     }
2050     *versionString++=(char)('0'+field);
2051 
2052     /* write the following parts */
2053     for(part=1; part<count; ++part) {
2054         /* write a dot first */
2055         *versionString++=U_VERSION_DELIMITER;
2056 
2057         /* write the decimal field value */
2058         field=versionArray[part];
2059         if(field>=100) {
2060             *versionString++=(char)('0'+field/100);
2061             field%=100;
2062         }
2063         if(field>=10) {
2064             *versionString++=(char)('0'+field/10);
2065             field%=10;
2066         }
2067         *versionString++=(char)('0'+field);
2068     }
2069 
2070     /* NUL-terminate */
2071     *versionString=0;
2072 }
2073 
2074 U_CAPI void U_EXPORT2
u_getVersion(UVersionInfo versionArray)2075 u_getVersion(UVersionInfo versionArray) {
2076     u_versionFromString(versionArray, U_ICU_VERSION);
2077 }
2078 
2079 /**
2080  * icucfg.h dependent code
2081  */
2082 
2083 #if U_ENABLE_DYLOAD
2084 
2085 #if defined(U_CHECK_DYLOAD)
2086 
2087 #if defined(HAVE_DLOPEN)
2088 
2089 #ifdef HAVE_DLFCN_H
2090 #ifdef __MVS__
2091 #ifndef __SUSV3
2092 #define __SUSV3 1
2093 #endif
2094 #endif
2095 #include <dlfcn.h>
2096 #endif
2097 
2098 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2099 uprv_dl_open(const char *libName, UErrorCode *status) {
2100   void *ret = NULL;
2101   if(U_FAILURE(*status)) return ret;
2102   ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2103   if(ret==NULL) {
2104 #ifndef U_TRACE_DYLOAD
2105     perror("dlopen");
2106 #endif
2107     *status = U_MISSING_RESOURCE_ERROR;
2108   }
2109   return ret;
2110 }
2111 
2112 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2113 uprv_dl_close(void *lib, UErrorCode *status) {
2114   if(U_FAILURE(*status)) return;
2115   dlclose(lib);
2116 }
2117 
2118 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void * lib,const char * sym,UErrorCode * status)2119 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2120   union {
2121       void* voidPtr;
2122       UVoidFunction* voidFunc;
2123   } ret;
2124   ret.voidPtr = NULL;
2125   if(U_FAILURE(*status)) return NULL;
2126   /*
2127    * ISO forbids the following cast, but it's needed for dlsym.
2128    *  See: http://pubs.opengroup.org/onlinepubs/009695399/functions/dlsym.html
2129    *  See: http://www.trilithium.com/johan/2004/12/problem-with-dlsym/
2130    */
2131   ret.voidPtr = dlsym(lib, sym);
2132   if(ret.voidPtr == NULL) {
2133     *status = U_MISSING_RESOURCE_ERROR;
2134   }
2135   return ret.voidFunc;
2136 }
2137 
2138 #else
2139 
2140 /* null (nonexistent) implementation. */
2141 
2142 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2143 uprv_dl_open(const char *libName, UErrorCode *status) {
2144   if(U_FAILURE(*status)) return NULL;
2145   *status = U_UNSUPPORTED_ERROR;
2146   return NULL;
2147 }
2148 
2149 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2150 uprv_dl_close(void *lib, UErrorCode *status) {
2151   if(U_FAILURE(*status)) return;
2152   *status = U_UNSUPPORTED_ERROR;
2153   return;
2154 }
2155 
2156 
2157 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void * lib,const char * sym,UErrorCode * status)2158 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2159   if(U_SUCCESS(*status)) {
2160     *status = U_UNSUPPORTED_ERROR;
2161   }
2162   return (UVoidFunction*)NULL;
2163 }
2164 
2165 
2166 
2167 #endif
2168 
2169 #elif defined U_WINDOWS
2170 
2171 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2172 uprv_dl_open(const char *libName, UErrorCode *status) {
2173   HMODULE lib = NULL;
2174 
2175   if(U_FAILURE(*status)) return NULL;
2176 
2177   lib = LoadLibraryA(libName);
2178 
2179   if(lib==NULL) {
2180     *status = U_MISSING_RESOURCE_ERROR;
2181   }
2182 
2183   return (void*)lib;
2184 }
2185 
2186 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2187 uprv_dl_close(void *lib, UErrorCode *status) {
2188   HMODULE handle = (HMODULE)lib;
2189   if(U_FAILURE(*status)) return;
2190 
2191   FreeLibrary(handle);
2192 
2193   return;
2194 }
2195 
2196 
2197 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void * lib,const char * sym,UErrorCode * status)2198 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2199   HMODULE handle = (HMODULE)lib;
2200   UVoidFunction* addr = NULL;
2201 
2202   if(U_FAILURE(*status) || lib==NULL) return NULL;
2203 
2204   addr = (UVoidFunction*)GetProcAddress(handle, sym);
2205 
2206   if(addr==NULL) {
2207     DWORD lastError = GetLastError();
2208     if(lastError == ERROR_PROC_NOT_FOUND) {
2209       *status = U_MISSING_RESOURCE_ERROR;
2210     } else {
2211       *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2212     }
2213   }
2214 
2215   return addr;
2216 }
2217 
2218 
2219 #else
2220 
2221 /* No dynamic loading set. */
2222 
2223 U_INTERNAL void * U_EXPORT2
uprv_dl_open(const char * libName,UErrorCode * status)2224 uprv_dl_open(const char *libName, UErrorCode *status) {
2225     if(U_FAILURE(*status)) return NULL;
2226     *status = U_UNSUPPORTED_ERROR;
2227     return NULL;
2228 }
2229 
2230 U_INTERNAL void U_EXPORT2
uprv_dl_close(void * lib,UErrorCode * status)2231 uprv_dl_close(void *lib, UErrorCode *status) {
2232     if(U_FAILURE(*status)) return;
2233     *status = U_UNSUPPORTED_ERROR;
2234     return;
2235 }
2236 
2237 
2238 U_INTERNAL UVoidFunction* U_EXPORT2
uprv_dlsym_func(void * lib,const char * sym,UErrorCode * status)2239 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2240   if(U_SUCCESS(*status)) {
2241     *status = U_UNSUPPORTED_ERROR;
2242   }
2243   return (UVoidFunction*)NULL;
2244 }
2245 
2246 
2247 #endif
2248 
2249 #endif /* U_ENABLE_DYLOAD */
2250 
2251 /*
2252  * Hey, Emacs, please set the following:
2253  *
2254  * Local Variables:
2255  * indent-tabs-mode: nil
2256  * End:
2257  *
2258  */
2259