1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1997-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10 *
11 * Date Name Description
12 * 04/14/97 aliu Creation.
13 * 04/24/97 aliu Added getDefaultDataDirectory() and
14 * getDefaultLocaleID().
15 * 04/28/97 aliu Rewritten to assume Unix and apply general methods
16 * for assumed case. Non-UNIX platforms must be
17 * special-cased. Rewrote numeric methods dealing
18 * with NaN and Infinity to be platform independent
19 * over all IEEE 754 platforms.
20 * 05/13/97 aliu Restored sign of timezone
21 * (semantics are hours West of GMT)
22 * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23 * nextDouble..
24 * 07/22/98 stephen Added remainder, max, min, trunc
25 * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
26 * 08/24/98 stephen Added longBitsFromDouble
27 * 09/08/98 stephen Minor changes for Mac Port
28 * 03/02/99 stephen Removed openFile(). Added AS400 support.
29 * Fixed EBCDIC tables
30 * 04/15/99 stephen Converted to C.
31 * 06/28/99 stephen Removed mutex locking in u_isBigEndian().
32 * 08/04/99 jeffrey R. Added OS/2 changes
33 * 11/15/99 helena Integrated S/390 IEEE support.
34 * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
35 * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
36 ******************************************************************************
37 */
38
39 /* Define _XOPEN_SOURCE for Solaris and friends. */
40 /* NetBSD needs it to be >= 4 */
41 #if !defined(_XOPEN_SOURCE)
42 #if __STDC_VERSION__ >= 199901L
43 /* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */
44 #define _XOPEN_SOURCE 600
45 #else
46 #define _XOPEN_SOURCE 4
47 #endif
48 #endif
49
50 /* Make sure things like readlink and such functions work.
51 Poorly upgraded Solaris machines can't have this defined.
52 Cleanly installed Solaris can use this #define.
53 */
54 #if !defined(_XOPEN_SOURCE_EXTENDED) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L)
55 #define _XOPEN_SOURCE_EXTENDED 1
56 #endif
57
58 /* include ICU headers */
59 #include "unicode/utypes.h"
60 #include "unicode/putil.h"
61 #include "unicode/ustring.h"
62 #include "putilimp.h"
63 #include "uassert.h"
64 #include "umutex.h"
65 #include "cmemory.h"
66 #include "cstring.h"
67 #include "locmap.h"
68 #include "ucln_cmn.h"
69
70 /* Include standard headers. */
71 #include <stdio.h>
72 #include <stdlib.h>
73 #include <string.h>
74 #include <math.h>
75 #include <locale.h>
76 #include <float.h>
77 #include <time.h>
78
79 /* include system headers */
80 #ifdef U_WINDOWS
81 # define WIN32_LEAN_AND_MEAN
82 # define VC_EXTRALEAN
83 # define NOUSER
84 # define NOSERVICE
85 # define NOIME
86 # define NOMCX
87 # include <windows.h>
88 # include "wintz.h"
89 #elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
90 /* tzset isn't defined in strict ANSI on Cygwin. */
91 # undef __STRICT_ANSI__
92 #elif defined(OS400)
93 # include <float.h>
94 # include <qusec.h> /* error code structure */
95 # include <qusrjobi.h>
96 # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
97 # include <mih/testptr.h> /* For uprv_maximumPtr */
98 #elif defined(XP_MAC)
99 # include <Files.h>
100 # include <IntlResources.h>
101 # include <Script.h>
102 # include <Folders.h>
103 # include <MacTypes.h>
104 # include <TextUtils.h>
105 # define ICU_NO_USER_DATA_OVERRIDE 1
106 #elif defined(OS390)
107 #include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
108 #elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
109 #include <limits.h>
110 #include <unistd.h>
111 #elif defined(U_QNX)
112 #include <sys/neutrino.h>
113 #endif
114
115 #ifndef U_WINDOWS
116 #include <sys/time.h>
117 #endif
118
119 /*
120 * Only include langinfo.h if we have a way to get the codeset. If we later
121 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
122 *
123 */
124
125 #if U_HAVE_NL_LANGINFO_CODESET
126 #include <langinfo.h>
127 #endif
128
129 /* Define the extension for data files, again... */
130 #define DATA_TYPE "dat"
131
132 /* Leave this copyright notice here! */
133 static const char copyright[] = U_COPYRIGHT_STRING;
134
135 /* floating point implementations ------------------------------------------- */
136
137 /* We return QNAN rather than SNAN*/
138 #define SIGN 0x80000000U
139
140 /* Make it easy to define certain types of constants */
141 typedef union {
142 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
143 double d64;
144 } BitPatternConversion;
145 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
146 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
147
148 /*---------------------------------------------------------------------------
149 Platform utilities
150 Our general strategy is to assume we're on a POSIX platform. Platforms which
151 are non-POSIX must declare themselves so. The default POSIX implementation
152 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
153 functions).
154 ---------------------------------------------------------------------------*/
155
156 #if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
157 # undef U_POSIX_LOCALE
158 #else
159 # define U_POSIX_LOCALE 1
160 #endif
161
162 /*
163 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
164 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
165 */
166 #if !IEEE_754
167 static char*
u_topNBytesOfDouble(double * d,int n)168 u_topNBytesOfDouble(double* d, int n)
169 {
170 #if U_IS_BIG_ENDIAN
171 return (char*)d;
172 #else
173 return (char*)(d + 1) - n;
174 #endif
175 }
176 #endif
177
178 static char*
u_bottomNBytesOfDouble(double * d,int n)179 u_bottomNBytesOfDouble(double* d, int n)
180 {
181 #if U_IS_BIG_ENDIAN
182 return (char*)(d + 1) - n;
183 #else
184 return (char*)d;
185 #endif
186 }
187
188 #if defined(U_WINDOWS)
189 typedef union {
190 int64_t int64;
191 FILETIME fileTime;
192 } FileTimeConversion; /* This is like a ULARGE_INTEGER */
193
194 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
195 #define EPOCH_BIAS INT64_C(116444736000000000)
196 #define HECTONANOSECOND_PER_MILLISECOND 10000
197
198 #endif
199
200 /*---------------------------------------------------------------------------
201 Universal Implementations
202 These are designed to work on all platforms. Try these, and if they
203 don't work on your platform, then special case your platform with new
204 implementations.
205 ---------------------------------------------------------------------------*/
206
207 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
208 U_CAPI UDate U_EXPORT2
uprv_getUTCtime()209 uprv_getUTCtime()
210 {
211 #ifdef XP_MAC
212 time_t t, t1, t2;
213 struct tm tmrec;
214
215 uprv_memset( &tmrec, 0, sizeof(tmrec) );
216 tmrec.tm_year = 70;
217 tmrec.tm_mon = 0;
218 tmrec.tm_mday = 1;
219 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/
220
221 time(&t);
222 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
223 t2 = mktime(&tmrec); /* seconds of current GMT*/
224 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/
225 #elif defined(U_WINDOWS)
226
227 FileTimeConversion winTime;
228 GetSystemTimeAsFileTime(&winTime.fileTime);
229 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
230 #else
231 /*
232 struct timeval posixTime;
233 gettimeofday(&posixTime, NULL);
234 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
235 */
236 time_t epochtime;
237 time(&epochtime);
238 return (UDate)epochtime * U_MILLIS_PER_SECOND;
239 #endif
240 }
241
242 /*-----------------------------------------------------------------------------
243 IEEE 754
244 These methods detect and return NaN and infinity values for doubles
245 conforming to IEEE 754. Platforms which support this standard include X86,
246 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
247 If this doesn't work on your platform, you have non-IEEE floating-point, and
248 will need to code your own versions. A naive implementation is to return 0.0
249 for getNaN and getInfinity, and false for isNaN and isInfinite.
250 ---------------------------------------------------------------------------*/
251
252 U_CAPI UBool U_EXPORT2
uprv_isNaN(double number)253 uprv_isNaN(double number)
254 {
255 #if IEEE_754
256 BitPatternConversion convertedNumber;
257 convertedNumber.d64 = number;
258 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
259 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
260
261 #elif defined(OS390)
262 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
263 sizeof(uint32_t));
264 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
265 sizeof(uint32_t));
266
267 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
268 (lowBits == 0x00000000L);
269
270 #else
271 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
272 /* you'll need to replace this default implementation with what's correct*/
273 /* for your platform.*/
274 return number != number;
275 #endif
276 }
277
278 U_CAPI UBool U_EXPORT2
uprv_isInfinite(double number)279 uprv_isInfinite(double number)
280 {
281 #if IEEE_754
282 BitPatternConversion convertedNumber;
283 convertedNumber.d64 = number;
284 /* Infinity is exactly 0x7FF0000000000000U. */
285 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
286 #elif defined(OS390)
287 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
288 sizeof(uint32_t));
289 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
290 sizeof(uint32_t));
291
292 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
293
294 #else
295 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
296 /* value, you'll need to replace this default implementation with what's*/
297 /* correct for your platform.*/
298 return number == (2.0 * number);
299 #endif
300 }
301
302 U_CAPI UBool U_EXPORT2
uprv_isPositiveInfinity(double number)303 uprv_isPositiveInfinity(double number)
304 {
305 #if IEEE_754 || defined(OS390)
306 return (UBool)(number > 0 && uprv_isInfinite(number));
307 #else
308 return uprv_isInfinite(number);
309 #endif
310 }
311
312 U_CAPI UBool U_EXPORT2
uprv_isNegativeInfinity(double number)313 uprv_isNegativeInfinity(double number)
314 {
315 #if IEEE_754 || defined(OS390)
316 return (UBool)(number < 0 && uprv_isInfinite(number));
317
318 #else
319 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
320 sizeof(uint32_t));
321 return((highBits & SIGN) && uprv_isInfinite(number));
322
323 #endif
324 }
325
326 U_CAPI double U_EXPORT2
uprv_getNaN()327 uprv_getNaN()
328 {
329 #if IEEE_754 || defined(OS390)
330 return gNan.d64;
331 #else
332 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
333 /* you'll need to replace this default implementation with what's correct*/
334 /* for your platform.*/
335 return 0.0;
336 #endif
337 }
338
339 U_CAPI double U_EXPORT2
uprv_getInfinity()340 uprv_getInfinity()
341 {
342 #if IEEE_754 || defined(OS390)
343 return gInf.d64;
344 #else
345 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
346 /* value, you'll need to replace this default implementation with what's*/
347 /* correct for your platform.*/
348 return 0.0;
349 #endif
350 }
351
352 U_CAPI double U_EXPORT2
uprv_floor(double x)353 uprv_floor(double x)
354 {
355 return floor(x);
356 }
357
358 U_CAPI double U_EXPORT2
uprv_ceil(double x)359 uprv_ceil(double x)
360 {
361 return ceil(x);
362 }
363
364 U_CAPI double U_EXPORT2
uprv_round(double x)365 uprv_round(double x)
366 {
367 return uprv_floor(x + 0.5);
368 }
369
370 U_CAPI double U_EXPORT2
uprv_fabs(double x)371 uprv_fabs(double x)
372 {
373 return fabs(x);
374 }
375
376 U_CAPI double U_EXPORT2
uprv_modf(double x,double * y)377 uprv_modf(double x, double* y)
378 {
379 return modf(x, y);
380 }
381
382 U_CAPI double U_EXPORT2
uprv_fmod(double x,double y)383 uprv_fmod(double x, double y)
384 {
385 return fmod(x, y);
386 }
387
388 U_CAPI double U_EXPORT2
uprv_pow(double x,double y)389 uprv_pow(double x, double y)
390 {
391 /* This is declared as "double pow(double x, double y)" */
392 return pow(x, y);
393 }
394
395 U_CAPI double U_EXPORT2
uprv_pow10(int32_t x)396 uprv_pow10(int32_t x)
397 {
398 return pow(10.0, (double)x);
399 }
400
401 U_CAPI double U_EXPORT2
uprv_fmax(double x,double y)402 uprv_fmax(double x, double y)
403 {
404 #if IEEE_754
405 int32_t lowBits;
406
407 /* first handle NaN*/
408 if(uprv_isNaN(x) || uprv_isNaN(y))
409 return uprv_getNaN();
410
411 /* check for -0 and 0*/
412 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
413 if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
414 return y;
415
416 #endif
417
418 /* this should work for all flt point w/o NaN and Infpecial cases */
419 return (x > y ? x : y);
420 }
421
422 U_CAPI double U_EXPORT2
uprv_fmin(double x,double y)423 uprv_fmin(double x, double y)
424 {
425 #if IEEE_754
426 int32_t lowBits;
427
428 /* first handle NaN*/
429 if(uprv_isNaN(x) || uprv_isNaN(y))
430 return uprv_getNaN();
431
432 /* check for -0 and 0*/
433 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
434 if(x == 0.0 && y == 0.0 && (lowBits & SIGN))
435 return y;
436
437 #endif
438
439 /* this should work for all flt point w/o NaN and Inf special cases */
440 return (x > y ? y : x);
441 }
442
443 /**
444 * Truncates the given double.
445 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
446 * This is different than calling floor() or ceil():
447 * floor(3.3) = 3, floor(-3.3) = -4
448 * ceil(3.3) = 4, ceil(-3.3) = -3
449 */
450 U_CAPI double U_EXPORT2
uprv_trunc(double d)451 uprv_trunc(double d)
452 {
453 #if IEEE_754
454 int32_t lowBits;
455
456 /* handle error cases*/
457 if(uprv_isNaN(d))
458 return uprv_getNaN();
459 if(uprv_isInfinite(d))
460 return uprv_getInfinity();
461
462 lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
463 if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
464 return ceil(d);
465 else
466 return floor(d);
467
468 #else
469 return d >= 0 ? floor(d) : ceil(d);
470
471 #endif
472 }
473
474 /**
475 * Return the largest positive number that can be represented by an integer
476 * type of arbitrary bit length.
477 */
478 U_CAPI double U_EXPORT2
uprv_maxMantissa(void)479 uprv_maxMantissa(void)
480 {
481 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
482 }
483
484 U_CAPI double U_EXPORT2
uprv_log(double d)485 uprv_log(double d)
486 {
487 return log(d);
488 }
489
490 U_CAPI void * U_EXPORT2
uprv_maximumPtr(void * base)491 uprv_maximumPtr(void * base)
492 {
493 #if defined(OS400)
494 /*
495 * With the provided function we should never be out of range of a given segment
496 * (a traditional/typical segment that is). Our segments have 5 bytes for the
497 * id and 3 bytes for the offset. The key is that the casting takes care of
498 * only retrieving the offset portion minus x1000. Hence, the smallest offset
499 * seen in a program is x001000 and when casted to an int would be 0.
500 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
501 *
502 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
503 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
504 * This function determines the activation based on the pointer that is passed in and
505 * calculates the appropriate maximum available size for
506 * each pointer type (TERASPACE and non-TERASPACE)
507 *
508 * Unlike other operating systems, the pointer model isn't determined at
509 * compile time on i5/OS.
510 */
511 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
512 /* if it is a TERASPACE pointer the max is 2GB - 4k */
513 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
514 }
515 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
516 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
517
518 #else
519 return U_MAX_PTR(base);
520 #endif
521 }
522
523 /*---------------------------------------------------------------------------
524 Platform-specific Implementations
525 Try these, and if they don't work on your platform, then special case your
526 platform with new implementations.
527 ---------------------------------------------------------------------------*/
528
529 /* Generic time zone layer -------------------------------------------------- */
530
531 /* Time zone utilities */
532 U_CAPI void U_EXPORT2
uprv_tzset()533 uprv_tzset()
534 {
535 #ifdef U_TZSET
536 U_TZSET();
537 #else
538 /* no initialization*/
539 #endif
540 }
541
542 U_CAPI int32_t U_EXPORT2
uprv_timezone()543 uprv_timezone()
544 {
545 #ifdef U_TIMEZONE
546 return U_TIMEZONE;
547 #else
548 time_t t, t1, t2;
549 struct tm tmrec;
550 UBool dst_checked;
551 int32_t tdiff = 0;
552
553 time(&t);
554 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
555 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
556 t1 = mktime(&tmrec); /* local time in seconds*/
557 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
558 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
559 tdiff = t2 - t1;
560 /* imitate NT behaviour, which returns same timezone offset to GMT for
561 winter and summer*/
562 if (dst_checked)
563 tdiff += 3600;
564 return tdiff;
565 #endif
566 }
567
568 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
569 some platforms need to have it declared here. */
570
571 #if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
572 /* RS6000 and others reject char **tzname. */
573 extern U_IMPORT char *U_TZNAME[];
574 #endif
575
576 #if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
577 /* These platforms are likely to use Olson timezone IDs. */
578 #define CHECK_LOCALTIME_LINK 1
579 #if defined(U_LINUX)
580 #define TZDEFAULT "/etc/localtime"
581 #define TZZONEINFO "/usr/share/zoneinfo/"
582 #else
583 #include <tzfile.h>
584 #define TZZONEINFO (TZDIR "/")
585 #endif
586 static char gTimeZoneBuffer[PATH_MAX];
587 static char *gTimeZoneBufferPtr = NULL;
588 #endif
589
590 #ifndef U_WINDOWS
591 #define isNonDigit(ch) (ch < '0' || '9' < ch)
isValidOlsonID(const char * id)592 static UBool isValidOlsonID(const char *id) {
593 int32_t idx = 0;
594
595 /* Determine if this is something like Iceland (Olson ID)
596 or AST4ADT (non-Olson ID) */
597 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
598 idx++;
599 }
600
601 /* If we went through the whole string, then it might be okay.
602 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
603 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
604 The rest of the time it could be an Olson ID. George */
605 return (UBool)(id[idx] == 0
606 || uprv_strcmp(id, "PST8PDT") == 0
607 || uprv_strcmp(id, "MST7MDT") == 0
608 || uprv_strcmp(id, "CST6CDT") == 0
609 || uprv_strcmp(id, "EST5EDT") == 0);
610 }
611 #endif
612
613 #if defined(U_TZNAME) && !defined(U_WINDOWS)
614
615 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
616 typedef struct OffsetZoneMapping {
617 int32_t offsetSeconds;
618 int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/
619 const char *stdID;
620 const char *dstID;
621 const char *olsonID;
622 } OffsetZoneMapping;
623
624 /*
625 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
626 and maps it to an Olson ID.
627 Before adding anything to this list, take a look at
628 icu/source/tools/tzcode/tz.alias
629 Sometimes no daylight savings (0) is important to define due to aliases.
630 This list can be tested with icu/source/test/compat/tzone.pl
631 More values could be added to daylightType to increase precision.
632 */
633 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
634 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
635 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
636 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
637 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
638 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
639 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
640 {-36000, 2, "EST", "EST", "Australia/Sydney"},
641 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
642 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
643 {-34200, 2, "CST", "CST", "Australia/South"},
644 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
645 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
646 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
647 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
648 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
649 {-28800, 2, "WST", "WST", "Australia/West"},
650 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
651 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
652 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
653 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
654 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
655 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
656 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
657 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
658 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
659 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
660 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
661 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
662 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
663 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
664 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
665 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
666 {0, 1, "GMT", "IST", "Europe/Dublin"},
667 {0, 1, "GMT", "BST", "Europe/London"},
668 {0, 0, "WET", "WEST", "Africa/Casablanca"},
669 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
670 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
671 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
672 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
673 {10800, 2, "UYT", "UYST", "America/Montevideo"},
674 {10800, 1, "WGT", "WGST", "America/Godthab"},
675 {10800, 2, "BRT", "BRST", "Brazil/East"},
676 {12600, 1, "NST", "NDT", "America/St_Johns"},
677 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
678 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
679 {14400, 2, "CLT", "CLST", "Chile/Continental"},
680 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
681 {14400, 2, "PYT", "PYST", "America/Asuncion"},
682 {18000, 1, "CST", "CDT", "America/Havana"},
683 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
684 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
685 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
686 {21600, 0, "CST", "CDT", "America/Guatemala"},
687 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
688 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
689 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
690 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
691 {32400, 1, "AKST", "AKDT", "US/Alaska"},
692 {36000, 1, "HAST", "HADT", "US/Aleutian"}
693 };
694
695 /*#define DEBUG_TZNAME*/
696
remapShortTimeZone(const char * stdID,const char * dstID,int32_t daylightType,int32_t offset)697 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
698 {
699 int32_t idx;
700 #ifdef DEBUG_TZNAME
701 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
702 #endif
703 for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++)
704 {
705 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
706 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
707 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
708 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
709 {
710 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
711 }
712 }
713 return NULL;
714 }
715 #endif
716
717 U_CAPI const char* U_EXPORT2
uprv_tzname(int n)718 uprv_tzname(int n)
719 {
720 const char *tzid = NULL;
721 #ifdef U_WINDOWS
722 tzid = uprv_detectWindowsTimeZone();
723
724 if (tzid != NULL) {
725 return tzid;
726 }
727 #else
728
729 /*#if defined(U_DARWIN)
730 int ret;
731
732 tzid = getenv("TZFILE");
733 if (tzid != NULL) {
734 return tzid;
735 }
736 #endif*/
737
738 /* This code can be temporarily disabled to test tzname resolution later on. */
739 #ifndef DEBUG_TZNAME
740 tzid = getenv("TZ");
741 if (tzid != NULL && isValidOlsonID(tzid))
742 {
743 /* This might be a good Olson ID. */
744 if (uprv_strncmp(tzid, "posix/", 6) == 0
745 || uprv_strncmp(tzid, "right/", 6) == 0)
746 {
747 /* Remove the posix/ or right/ prefix. */
748 tzid += 6;
749 }
750 return tzid;
751 }
752 /* else U_TZNAME will give a better result. */
753 #endif
754
755 #if defined(CHECK_LOCALTIME_LINK)
756 /* Caller must handle threading issues */
757 if (gTimeZoneBufferPtr == NULL) {
758 /*
759 This is a trick to look at the name of the link to get the Olson ID
760 because the tzfile contents is underspecified.
761 This isn't guaranteed to work because it may not be a symlink.
762 */
763 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
764 if (0 < ret) {
765 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
766 gTimeZoneBuffer[ret] = 0;
767 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
768 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
769 {
770 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
771 }
772 }
773 }
774 else {
775 return gTimeZoneBufferPtr;
776 }
777 #endif
778 #endif
779
780 #ifdef U_TZNAME
781 #if !defined(U_WINDOWS)
782 /*
783 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
784 So we remap the abbreviation to an olson ID.
785
786 Since Windows exposes a little more timezone information,
787 we normally don't use this code on Windows because
788 uprv_detectWindowsTimeZone should have already given the correct answer.
789 */
790 {
791 struct tm juneSol, decemberSol;
792 int daylightType;
793 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
794 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
795
796 /* This probing will tell us when daylight savings occurs. */
797 localtime_r(&juneSolstice, &juneSol);
798 localtime_r(&decemberSolstice, &decemberSol);
799 daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0);
800 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
801 if (tzid != NULL) {
802 return tzid;
803 }
804 }
805 #endif
806 return U_TZNAME[n];
807 #else
808 return "";
809 #endif
810 }
811
812 /* Get and set the ICU data directory --------------------------------------- */
813
814 static char *gDataDirectory = NULL;
815 #if U_POSIX_LOCALE
816 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
817 #endif
818
putil_cleanup(void)819 static UBool U_CALLCONV putil_cleanup(void)
820 {
821 if (gDataDirectory && *gDataDirectory) {
822 uprv_free(gDataDirectory);
823 }
824 gDataDirectory = NULL;
825 #if U_POSIX_LOCALE
826 if (gCorrectedPOSIXLocale) {
827 uprv_free(gCorrectedPOSIXLocale);
828 gCorrectedPOSIXLocale = NULL;
829 }
830 #endif
831 return TRUE;
832 }
833
834 /*
835 * Set the data directory.
836 * Make a copy of the passed string, and set the global data dir to point to it.
837 * TODO: see bug #2849, regarding thread safety.
838 */
839 U_CAPI void U_EXPORT2
u_setDataDirectory(const char * directory)840 u_setDataDirectory(const char *directory) {
841 char *newDataDir;
842 int32_t length;
843
844 if(directory==NULL || *directory==0) {
845 /* A small optimization to prevent the malloc and copy when the
846 shared library is used, and this is a way to make sure that NULL
847 is never returned.
848 */
849 newDataDir = (char *)"";
850 }
851 else {
852 length=(int32_t)uprv_strlen(directory);
853 newDataDir = (char *)uprv_malloc(length + 2);
854 uprv_strcpy(newDataDir, directory);
855
856 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
857 {
858 char *p;
859 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
860 *p = U_FILE_SEP_CHAR;
861 }
862 }
863 #endif
864 }
865
866 umtx_lock(NULL);
867 if (gDataDirectory && *gDataDirectory) {
868 uprv_free(gDataDirectory);
869 }
870 gDataDirectory = newDataDir;
871 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
872 umtx_unlock(NULL);
873 }
874
875 U_CAPI UBool U_EXPORT2
uprv_pathIsAbsolute(const char * path)876 uprv_pathIsAbsolute(const char *path)
877 {
878 if(!path || !*path) {
879 return FALSE;
880 }
881
882 if(*path == U_FILE_SEP_CHAR) {
883 return TRUE;
884 }
885
886 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
887 if(*path == U_FILE_ALT_SEP_CHAR) {
888 return TRUE;
889 }
890 #endif
891
892 #if defined(U_WINDOWS)
893 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
894 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
895 path[1] == ':' ) {
896 return TRUE;
897 }
898 #endif
899
900 return FALSE;
901 }
902
903 U_CAPI const char * U_EXPORT2
u_getDataDirectory(void)904 u_getDataDirectory(void) {
905 const char *path = NULL;
906
907 /* if we have the directory, then return it immediately */
908 UMTX_CHECK(NULL, gDataDirectory, path);
909
910 if(path) {
911 return path;
912 }
913
914 /*
915 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
916 override ICU's data with the ICU_DATA environment variable. This prevents
917 problems where multiple custom copies of ICU's specific version of data
918 are installed on a system. Either the application must define the data
919 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
920 ICU, set the data with udata_setCommonData or trust that all of the
921 required data is contained in ICU's data library that contains
922 the entry point defined by U_ICUDATA_ENTRY_POINT.
923
924 There may also be some platforms where environment variables
925 are not allowed.
926 */
927 # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
928 /* First try to get the environment variable */
929 path=getenv("ICU_DATA");
930 # endif
931
932 /* ICU_DATA_DIR may be set as a compile option */
933 # ifdef ICU_DATA_DIR
934 if(path==NULL || *path==0) {
935 path=ICU_DATA_DIR;
936 }
937 # endif
938
939 if(path==NULL) {
940 /* It looks really bad, set it to something. */
941 path = "";
942 }
943
944 u_setDataDirectory(path);
945 return gDataDirectory;
946 }
947
948
949
950
951
952 /* Macintosh-specific locale information ------------------------------------ */
953 #ifdef XP_MAC
954
955 typedef struct {
956 int32_t script;
957 int32_t region;
958 int32_t lang;
959 int32_t date_region;
960 const char* posixID;
961 } mac_lc_rec;
962
963 /* Todo: This will be updated with a newer version from www.unicode.org web
964 page when it's available.*/
965 #define MAC_LC_MAGIC_NUMBER -5
966 #define MAC_LC_INIT_NUMBER -9
967
968 static const mac_lc_rec mac_lc_recs[] = {
969 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
970 /* United States*/
971 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
972 /* France*/
973 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
974 /* Great Britain*/
975 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
976 /* Germany*/
977 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
978 /* Italy*/
979 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
980 /* Metherlands*/
981 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
982 /* French for Belgium or Lxembourg*/
983 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
984 /* Sweden*/
985 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
986 /* Denmark*/
987 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
988 /* Portugal*/
989 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
990 /* French Canada*/
991 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
992 /* Israel*/
993 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
994 /* Japan*/
995 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
996 /* Australia*/
997 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
998 /* the Arabic world (?)*/
999 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1000 /* Finland*/
1001 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1002 /* French for Switzerland*/
1003 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1004 /* German for Switzerland*/
1005 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1006 /* Greece*/
1007 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1008 /* Iceland ===*/
1009 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1010 /* Malta ===*/
1011 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1012 /* Cyprus ===*/
1013 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1014 /* Turkey ===*/
1015 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1016 /* Croatian system for Yugoslavia*/
1017 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1018 /* Hindi system for India*/
1019 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1020 /* Pakistan*/
1021 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1022 /* Lithuania*/
1023 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1024 /* Poland*/
1025 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1026 /* Hungary*/
1027 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1028 /* Estonia*/
1029 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1030 /* Latvia*/
1031 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1032 /* Lapland [Ask Rich for the data. HS]*/
1033 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1034 /* Faeroe Islands*/
1035 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1036 /* Iran*/
1037 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1038 /* Russia*/
1039 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1040 /* Ireland*/
1041 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1042 /* Korea*/
1043 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1044 /* People's Republic of China*/
1045 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1046 /* Taiwan*/
1047 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1048 /* Thailand*/
1049
1050 /* fallback is en_US*/
1051 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1052 MAC_LC_MAGIC_NUMBER, "en_US"
1053 };
1054
1055 #endif
1056
1057 #if U_POSIX_LOCALE
1058 /* Return just the POSIX id, whatever happens to be in it */
uprv_getPOSIXID(void)1059 static const char *uprv_getPOSIXID(void)
1060 {
1061 static const char* posixID = NULL;
1062 if (posixID == 0) {
1063 /*
1064 * On Solaris two different calls to setlocale can result in
1065 * different values. Only get this value once.
1066 *
1067 * We must check this first because an application can set this.
1068 *
1069 * LC_ALL can't be used because it's platform dependent. The LANG
1070 * environment variable seems to affect LC_CTYPE variable by default.
1071 * Here is what setlocale(LC_ALL, NULL) can return.
1072 * HPUX can return 'C C C C C C C'
1073 * Solaris can return /en_US/C/C/C/C/C on the second try.
1074 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1075 *
1076 * The default codepage detection also needs to use LC_CTYPE.
1077 *
1078 * Do not call setlocale(LC_*, "")! Using an empty string instead
1079 * of NULL, will modify the libc behavior.
1080 */
1081 posixID = setlocale(LC_CTYPE, NULL);
1082 if ((posixID == 0)
1083 || (uprv_strcmp("C", posixID) == 0)
1084 || (uprv_strcmp("POSIX", posixID) == 0))
1085 {
1086 /* Maybe we got some garbage. Try something more reasonable */
1087 posixID = getenv("LC_ALL");
1088 if (posixID == 0) {
1089 posixID = getenv("LC_CTYPE");
1090 if (posixID == 0) {
1091 posixID = getenv("LANG");
1092 }
1093 }
1094 }
1095
1096 if ((posixID==0)
1097 || (uprv_strcmp("C", posixID) == 0)
1098 || (uprv_strcmp("POSIX", posixID) == 0))
1099 {
1100 /* Nothing worked. Give it a nice POSIX default value. */
1101 posixID = "en_US_POSIX";
1102 }
1103 }
1104
1105 return posixID;
1106 }
1107 #endif
1108
1109 /* NOTE: The caller should handle thread safety */
1110 U_CAPI const char* U_EXPORT2
uprv_getDefaultLocaleID()1111 uprv_getDefaultLocaleID()
1112 {
1113 #if U_POSIX_LOCALE
1114 /*
1115 Note that: (a '!' means the ID is improper somehow)
1116 LC_ALL ----> default_loc codepage
1117 --------------------------------------------------------
1118 ab.CD ab CD
1119 ab@CD ab__CD -
1120 ab@CD.EF ab__CD EF
1121
1122 ab_CD.EF@GH ab_CD_GH EF
1123
1124 Some 'improper' ways to do the same as above:
1125 ! ab_CD@GH.EF ab_CD_GH EF
1126 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1127 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1128
1129 _CD@GH _CD_GH -
1130 _CD.EF@GH _CD_GH EF
1131
1132 The variant cannot have dots in it.
1133 The 'rightmost' variant (@xxx) wins.
1134 The leftmost codepage (.xxx) wins.
1135 */
1136 char *correctedPOSIXLocale = 0;
1137 const char* posixID = uprv_getPOSIXID();
1138 const char *p;
1139 const char *q;
1140 int32_t len;
1141
1142 /* Format: (no spaces)
1143 ll [ _CC ] [ . MM ] [ @ VV]
1144
1145 l = lang, C = ctry, M = charmap, V = variant
1146 */
1147
1148 if (gCorrectedPOSIXLocale != NULL) {
1149 return gCorrectedPOSIXLocale;
1150 }
1151
1152 if ((p = uprv_strchr(posixID, '.')) != NULL) {
1153 /* assume new locale can't be larger than old one? */
1154 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1155 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1156 correctedPOSIXLocale[p-posixID] = 0;
1157
1158 /* do not copy after the @ */
1159 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1160 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1161 }
1162 }
1163
1164 /* Note that we scan the *uncorrected* ID. */
1165 if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1166 if (correctedPOSIXLocale == NULL) {
1167 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1168 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1169 correctedPOSIXLocale[p-posixID] = 0;
1170 }
1171 p++;
1172
1173 /* Take care of any special cases here.. */
1174 if (!uprv_strcmp(p, "nynorsk")) {
1175 p = "NY";
1176 /* Don't worry about no__NY. In practice, it won't appear. */
1177 }
1178
1179 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1180 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1181 }
1182 else {
1183 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1184 }
1185
1186 if ((q = uprv_strchr(p, '.')) != NULL) {
1187 /* How big will the resulting string be? */
1188 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1189 uprv_strncat(correctedPOSIXLocale, p, q-p);
1190 correctedPOSIXLocale[len] = 0;
1191 }
1192 else {
1193 /* Anything following the @ sign */
1194 uprv_strcat(correctedPOSIXLocale, p);
1195 }
1196
1197 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1198 * How about 'russian' -> 'ru'?
1199 * Many of the other locales using ISO codes will be handled by the
1200 * canonicalization functions in uloc_getDefault.
1201 */
1202 }
1203
1204 /* Was a correction made? */
1205 if (correctedPOSIXLocale != NULL) {
1206 posixID = correctedPOSIXLocale;
1207 }
1208 else {
1209 /* copy it, just in case the original pointer goes away. See j2395 */
1210 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1211 posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1212 }
1213
1214 if (gCorrectedPOSIXLocale == NULL) {
1215 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1216 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1217 correctedPOSIXLocale = NULL;
1218 }
1219
1220 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */
1221 uprv_free(correctedPOSIXLocale);
1222 }
1223
1224 return posixID;
1225
1226 #elif defined(U_WINDOWS)
1227 UErrorCode status = U_ZERO_ERROR;
1228 LCID id = GetThreadLocale();
1229 const char* locID = uprv_convertToPosix(id, &status);
1230
1231 if (U_FAILURE(status)) {
1232 locID = "en_US";
1233 }
1234 return locID;
1235
1236 #elif defined(XP_MAC)
1237 int32_t script = MAC_LC_INIT_NUMBER;
1238 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1239 int32_t region = MAC_LC_INIT_NUMBER;
1240 /* = GetScriptManagerVariable(smRegionCode);*/
1241 int32_t lang = MAC_LC_INIT_NUMBER;
1242 /* = GetScriptManagerVariable(smScriptLang);*/
1243 int32_t date_region = MAC_LC_INIT_NUMBER;
1244 const char* posixID = 0;
1245 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1246 int32_t i;
1247 Intl1Hndl ih;
1248
1249 ih = (Intl1Hndl) GetIntlResource(1);
1250 if (ih)
1251 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1252
1253 for (i = 0; i < count; i++) {
1254 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1255 || (mac_lc_recs[i].script == script))
1256 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1257 || (mac_lc_recs[i].region == region))
1258 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1259 || (mac_lc_recs[i].lang == lang))
1260 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1261 || (mac_lc_recs[i].date_region == date_region))
1262 )
1263 {
1264 posixID = mac_lc_recs[i].posixID;
1265 break;
1266 }
1267 }
1268
1269 return posixID;
1270
1271 #elif defined(OS400)
1272 /* locales are process scoped and are by definition thread safe */
1273 static char correctedLocale[64];
1274 const char *localeID = getenv("LC_ALL");
1275 char *p;
1276
1277 if (localeID == NULL)
1278 localeID = getenv("LANG");
1279 if (localeID == NULL)
1280 localeID = setlocale(LC_ALL, NULL);
1281 /* Make sure we have something... */
1282 if (localeID == NULL)
1283 return "en_US_POSIX";
1284
1285 /* Extract the locale name from the path. */
1286 if((p = uprv_strrchr(localeID, '/')) != NULL)
1287 {
1288 /* Increment p to start of locale name. */
1289 p++;
1290 localeID = p;
1291 }
1292
1293 /* Copy to work location. */
1294 uprv_strcpy(correctedLocale, localeID);
1295
1296 /* Strip off the '.locale' extension. */
1297 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1298 *p = 0;
1299 }
1300
1301 /* Upper case the locale name. */
1302 T_CString_toUpperCase(correctedLocale);
1303
1304 /* See if we are using the POSIX locale. Any of the
1305 * following are equivalent and use the same QLGPGCMA
1306 * (POSIX) locale.
1307 * QLGPGCMA2 means UCS2
1308 * QLGPGCMA_4 means UTF-32
1309 * QLGPGCMA_8 means UTF-8
1310 */
1311 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1312 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1313 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1314 {
1315 uprv_strcpy(correctedLocale, "en_US_POSIX");
1316 }
1317 else
1318 {
1319 int16_t LocaleLen;
1320
1321 /* Lower case the lang portion. */
1322 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1323 {
1324 *p = uprv_tolower(*p);
1325 }
1326
1327 /* Adjust for Euro. After '_E' add 'URO'. */
1328 LocaleLen = uprv_strlen(correctedLocale);
1329 if (correctedLocale[LocaleLen - 2] == '_' &&
1330 correctedLocale[LocaleLen - 1] == 'E')
1331 {
1332 uprv_strcat(correctedLocale, "URO");
1333 }
1334
1335 /* If using Lotus-based locale then convert to
1336 * equivalent non Lotus.
1337 */
1338 else if (correctedLocale[LocaleLen - 2] == '_' &&
1339 correctedLocale[LocaleLen - 1] == 'L')
1340 {
1341 correctedLocale[LocaleLen - 2] = 0;
1342 }
1343
1344 /* There are separate simplified and traditional
1345 * locales called zh_HK_S and zh_HK_T.
1346 */
1347 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1348 {
1349 uprv_strcpy(correctedLocale, "zh_HK");
1350 }
1351
1352 /* A special zh_CN_GBK locale...
1353 */
1354 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1355 {
1356 uprv_strcpy(correctedLocale, "zh_CN");
1357 }
1358
1359 }
1360
1361 return correctedLocale;
1362 #endif
1363
1364 }
1365
1366 #if U_POSIX_LOCALE
1367 /*
1368 Due to various platform differences, one platform may specify a charset,
1369 when they really mean a different charset. Remap the names so that they are
1370 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1371 here. Before adding anything to this function, please consider adding unique
1372 names to the ICU alias table in the data directory.
1373 */
1374 static const char*
remapPlatformDependentCodepage(const char * locale,const char * name)1375 remapPlatformDependentCodepage(const char *locale, const char *name) {
1376 if (locale != NULL && *locale == 0) {
1377 /* Make sure that an empty locale is handled the same way. */
1378 locale = NULL;
1379 }
1380 if (name == NULL) {
1381 return NULL;
1382 }
1383 #if defined(U_AIX)
1384 if (uprv_strcmp(name, "IBM-943") == 0) {
1385 /* Use the ASCII compatible ibm-943 */
1386 name = "Shift-JIS";
1387 }
1388 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1389 /* Use the windows-1252 that contains the Euro */
1390 name = "IBM-5348";
1391 }
1392 #elif defined(U_SOLARIS)
1393 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1394 /* Solaris underspecifies the "EUC" name. */
1395 if (uprv_strcmp(locale, "zh_CN") == 0) {
1396 name = "EUC-CN";
1397 }
1398 else if (uprv_strcmp(locale, "zh_TW") == 0) {
1399 name = "EUC-TW";
1400 }
1401 else if (uprv_strcmp(locale, "ko_KR") == 0) {
1402 name = "EUC-KR";
1403 }
1404 }
1405 else if (uprv_strcmp(name, "eucJP") == 0) {
1406 /*
1407 ibm-954 is the best match.
1408 ibm-33722 is the default for eucJP (similar to Windows).
1409 */
1410 name = "eucjis";
1411 }
1412 #elif defined(U_DARWIN)
1413 if (locale == NULL && *name == 0) {
1414 /*
1415 No locale was specified, and an empty name was passed in.
1416 This usually indicates that nl_langinfo didn't return valid information.
1417 Mac OS X uses UTF-8 by default (especially the locale data and console).
1418 */
1419 name = "UTF-8";
1420 }
1421 #elif defined(U_HPUX)
1422 if (uprv_strcmp(name, "eucJP") == 0) {
1423 /*
1424 ibm-1350 is the best match, but unavailable.
1425 ibm-954 is mostly a superset of ibm-1350.
1426 ibm-33722 is the default for eucJP (similar to Windows).
1427 */
1428 name = "eucjis";
1429 }
1430 #elif defined(U_LINUX)
1431 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1432 /* Linux underspecifies the "EUC" name. */
1433 if (uprv_strcmp(locale, "korean") == 0) {
1434 name = "EUC-KR";
1435 }
1436 else if (uprv_strcmp(locale, "japanese") == 0) {
1437 /* See comment below about eucJP */
1438 name = "eucjis";
1439 }
1440 }
1441 else if (uprv_strcmp(name, "eucjp") == 0) {
1442 /*
1443 ibm-1350 is the best match, but unavailable.
1444 ibm-954 is mostly a superset of ibm-1350.
1445 ibm-33722 is the default for eucJP (similar to Windows).
1446 */
1447 name = "eucjis";
1448 }
1449 #endif
1450 /* return NULL when "" is passed in */
1451 if (*name == 0) {
1452 name = NULL;
1453 }
1454 return name;
1455 }
1456
1457 static const char*
getCodepageFromPOSIXID(const char * localeName,char * buffer,int32_t buffCapacity)1458 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1459 {
1460 char localeBuf[100];
1461 const char *name = NULL;
1462 char *variant = NULL;
1463
1464 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1465 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1466 uprv_strncpy(localeBuf, localeName, localeCapacity);
1467 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1468 name = uprv_strncpy(buffer, name+1, buffCapacity);
1469 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1470 if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1471 *variant = 0;
1472 }
1473 name = remapPlatformDependentCodepage(localeBuf, name);
1474 }
1475 return name;
1476 }
1477 #endif
1478
1479 static const char*
int_getDefaultCodepage()1480 int_getDefaultCodepage()
1481 {
1482 #if defined(OS400)
1483 uint32_t ccsid = 37; /* Default to ibm-37 */
1484 static char codepage[64];
1485 Qwc_JOBI0400_t jobinfo;
1486 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1487
1488 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1489 "* ", " ", &error);
1490
1491 if (error.Bytes_Available == 0) {
1492 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1493 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1494 }
1495 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1496 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1497 }
1498 /* else use the default */
1499 }
1500 sprintf(codepage,"ibm-%d", ccsid);
1501 return codepage;
1502
1503 #elif defined(OS390)
1504 static char codepage[64];
1505 sprintf(codepage,"%63s" UCNV_SWAP_LFNL_OPTION_STRING, nl_langinfo(CODESET));
1506 codepage[63] = 0; /* NULL terminate */
1507 return codepage;
1508
1509 #elif defined(XP_MAC)
1510 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1511
1512 #elif defined(U_WINDOWS)
1513 static char codepage[64];
1514 sprintf(codepage, "windows-%d", GetACP());
1515 return codepage;
1516
1517 #elif U_POSIX_LOCALE
1518 static char codesetName[100];
1519 const char *localeName = NULL;
1520 const char *name = NULL;
1521
1522 uprv_memset(codesetName, 0, sizeof(codesetName));
1523
1524 /* Use setlocale in a nice way, and then check some environment variables.
1525 Maybe the application used setlocale already.
1526 */
1527 localeName = uprv_getPOSIXID();
1528 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1529 if (name) {
1530 /* if we can find the codeset name from setlocale, return that. */
1531 return name;
1532 }
1533 /* else "C" was probably returned. That's underspecified. */
1534
1535 #if U_HAVE_NL_LANGINFO_CODESET
1536 if (*codesetName) {
1537 uprv_memset(codesetName, 0, sizeof(codesetName));
1538 }
1539 /* When available, check nl_langinfo because it usually gives more
1540 useful names. It depends on LC_CTYPE and not LANG or LC_ALL.
1541 nl_langinfo may use the same buffer as setlocale. */
1542 {
1543 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1544 codeset = remapPlatformDependentCodepage(NULL, codeset);
1545 if (codeset != NULL) {
1546 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1547 codesetName[sizeof(codesetName)-1] = 0;
1548 return codesetName;
1549 }
1550 }
1551 #endif
1552
1553 if (*codesetName == 0)
1554 {
1555 /* Everything failed. Return US ASCII (ISO 646). */
1556 (void)uprv_strcpy(codesetName, "US-ASCII");
1557 }
1558 return codesetName;
1559 #else
1560 return "US-ASCII";
1561 #endif
1562 }
1563
1564
1565 U_CAPI const char* U_EXPORT2
uprv_getDefaultCodepage()1566 uprv_getDefaultCodepage()
1567 {
1568 static char const *name = NULL;
1569 umtx_lock(NULL);
1570 if (name == NULL) {
1571 name = int_getDefaultCodepage();
1572 }
1573 umtx_unlock(NULL);
1574 return name;
1575 }
1576
1577
1578 /* end of platform-specific implementation -------------- */
1579
1580 /* version handling --------------------------------------------------------- */
1581
1582 U_CAPI void U_EXPORT2
u_versionFromString(UVersionInfo versionArray,const char * versionString)1583 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1584 char *end;
1585 uint16_t part=0;
1586
1587 if(versionArray==NULL) {
1588 return;
1589 }
1590
1591 if(versionString!=NULL) {
1592 for(;;) {
1593 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1594 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1595 break;
1596 }
1597 versionString=end+1;
1598 }
1599 }
1600
1601 while(part<U_MAX_VERSION_LENGTH) {
1602 versionArray[part++]=0;
1603 }
1604 }
1605
1606 U_CAPI void U_EXPORT2
u_versionToString(UVersionInfo versionArray,char * versionString)1607 u_versionToString(UVersionInfo versionArray, char *versionString) {
1608 uint16_t count, part;
1609 uint8_t field;
1610
1611 if(versionString==NULL) {
1612 return;
1613 }
1614
1615 if(versionArray==NULL) {
1616 versionString[0]=0;
1617 return;
1618 }
1619
1620 /* count how many fields need to be written */
1621 for(count=4; count>0 && versionArray[count-1]==0; --count) {
1622 }
1623
1624 if(count <= 1) {
1625 count = 2;
1626 }
1627
1628 /* write the first part */
1629 /* write the decimal field value */
1630 field=versionArray[0];
1631 if(field>=100) {
1632 *versionString++=(char)('0'+field/100);
1633 field%=100;
1634 }
1635 if(field>=10) {
1636 *versionString++=(char)('0'+field/10);
1637 field%=10;
1638 }
1639 *versionString++=(char)('0'+field);
1640
1641 /* write the following parts */
1642 for(part=1; part<count; ++part) {
1643 /* write a dot first */
1644 *versionString++=U_VERSION_DELIMITER;
1645
1646 /* write the decimal field value */
1647 field=versionArray[part];
1648 if(field>=100) {
1649 *versionString++=(char)('0'+field/100);
1650 field%=100;
1651 }
1652 if(field>=10) {
1653 *versionString++=(char)('0'+field/10);
1654 field%=10;
1655 }
1656 *versionString++=(char)('0'+field);
1657 }
1658
1659 /* NUL-terminate */
1660 *versionString=0;
1661 }
1662
1663 U_CAPI void U_EXPORT2
u_getVersion(UVersionInfo versionArray)1664 u_getVersion(UVersionInfo versionArray) {
1665 u_versionFromString(versionArray, U_ICU_VERSION);
1666 }
1667
1668 /*
1669 * Hey, Emacs, please set the following:
1670 *
1671 * Local Variables:
1672 * indent-tabs-mode: nil
1673 * End:
1674 *
1675 */
1676