• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 1999-2015, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *   file name:  umachine.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 1999sep13
16 *   created by: Markus W. Scherer
17 *
18 *   This file defines basic types and constants for ICU to be
19 *   platform-independent. umachine.h and utf.h are included into
20 *   utypes.h to provide all the general definitions for ICU.
21 *   All of these definitions used to be in utypes.h before
22 *   the UTF-handling macros made this unmaintainable.
23 */
24 
25 #ifndef __UMACHINE_H__
26 #define __UMACHINE_H__
27 
28 
29 /**
30  * @addtogroup ICU4C
31  * @{
32  * \file
33  * \brief Basic types and constants for UTF
34  *
35  * <h2> Basic types and constants for UTF </h2>
36  *   This file defines basic types and constants for utf.h to be
37  *   platform-independent. umachine.h and utf.h are included into
38  *   utypes.h to provide all the general definitions for ICU.
39  *   All of these definitions used to be in utypes.h before
40  *   the UTF-handling macros made this unmaintainable.
41  *
42  */
43 /*==========================================================================*/
44 /* Include platform-dependent definitions                                   */
45 /* which are contained in the platform-specific file platform.h             */
46 /*==========================================================================*/
47 
48 #include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
49 
50 /*
51  * ANSI C headers:
52  * stddef.h defines wchar_t
53  */
54 #include <stdbool.h>
55 #include <stddef.h>
56 
57 /*==========================================================================*/
58 /* For C wrappers, we use the symbol U_CAPI.                                */
59 /* This works properly if the includer is C or C++.                         */
60 /* Functions are declared   U_CAPI return-type U_EXPORT2 function-name()... */
61 /*==========================================================================*/
62 
63 /**
64  * \def U_CFUNC
65  * This is used in a declaration of a library private ICU C function.
66  * \xrefitem stable "Stable" "Stable List" ICU 2.4
67  */
68 
69 /**
70  * \def U_CDECL_BEGIN
71  * This is used to begin a declaration of a library private ICU C API.
72  * \xrefitem stable "Stable" "Stable List" ICU 2.4
73  */
74 
75 /**
76  * \def U_CDECL_END
77  * This is used to end a declaration of a library private ICU C API
78  * \xrefitem stable "Stable" "Stable List" ICU 2.4
79  */
80 
81 #ifdef __cplusplus
82 #   define U_CFUNC extern "C"
83 #   define U_CDECL_BEGIN extern "C" {
84 #   define U_CDECL_END   }
85 #else
86 #   define U_CFUNC extern
87 #   define U_CDECL_BEGIN
88 #   define U_CDECL_END
89 #endif
90 
91 #ifndef U_ATTRIBUTE_DEPRECATED
92 /**
93  * \def U_ATTRIBUTE_DEPRECATED
94  *  This is used for GCC specific attributes
95  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.
96  */
97 #if U_GCC_MAJOR_MINOR >= 302
98 #    define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
99 /**
100  * \def U_ATTRIBUTE_DEPRECATED
101  * This is used for Visual C++ specific attributes
102  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.
103  */
104 #elif defined(_MSC_VER) && (_MSC_VER >= 1400)
105 #    define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
106 #else
107 #    define U_ATTRIBUTE_DEPRECATED
108 #endif
109 #endif
110 
111 /** This is used to declare a function as a public ICU C API \xrefitem stable "Stable" "Stable List" ICU 2.0*/
112 #define U_CAPI U_CFUNC U_EXPORT
113 /** Obsolete/same as U_CAPI; was used to declare a function as a stable public ICU C API*/
114 #define U_STABLE U_CAPI
115 /** Obsolete/same as U_CAPI; was used to declare a function as a draft public ICU C API  */
116 #define U_DRAFT  U_CAPI
117 /** This is used to declare a function as a deprecated public ICU C API  */
118 #define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
119 /** Obsolete/same as U_CAPI; was used to declare a function as an obsolete public ICU C API  */
120 #define U_OBSOLETE U_CAPI
121 /** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API  */
122 #define U_INTERNAL U_CAPI
123 
124 /**
125  * \def U_OVERRIDE
126  * Defined to the C++11 "override" keyword if available.
127  * Denotes a class or member which is an override of the base class.
128  * May result in an error if it applied to something not an override.
129  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.
130  */
131 #ifndef U_OVERRIDE
132 #define U_OVERRIDE override
133 #endif
134 
135 /**
136  * \def U_FINAL
137  * Defined to the C++11 "final" keyword if available.
138  * Denotes a class or member which may not be overridden in subclasses.
139  * May result in an error if subclasses attempt to override.
140  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.
141  */
142 #if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
143 #define U_FINAL final
144 #endif
145 
146 // Before ICU 65, function-like, multi-statement ICU macros were just defined as
147 // series of statements wrapped in { } blocks and the caller could choose to
148 // either treat them as if they were actual functions and end the invocation
149 // with a trailing ; creating an empty statement after the block or else omit
150 // this trailing ; using the knowledge that the macro would expand to { }.
151 //
152 // But doing so doesn't work well with macros that look like functions and
153 // compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
154 // switches to the standard solution of wrapping such macros in do { } while.
155 //
156 // This will however break existing code that depends on being able to invoke
157 // these macros without a trailing ; so to be able to remain compatible with
158 // such code the wrapper is itself defined as macros so that it's possible to
159 // build ICU 65 and later with the old macro behaviour, like this:
160 //
161 // export CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
162 // runConfigureICU ...
163 //
164 
165 /**
166  * \def UPRV_BLOCK_MACRO_BEGIN
167  * Defined as the "do" keyword by default.
168  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.
169  */
170 #ifndef UPRV_BLOCK_MACRO_BEGIN
171 #define UPRV_BLOCK_MACRO_BEGIN do
172 #endif
173 
174 /**
175  * \def UPRV_BLOCK_MACRO_END
176  * Defined as "while (false)" by default.
177  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.
178  */
179 #ifndef UPRV_BLOCK_MACRO_END
180 #define UPRV_BLOCK_MACRO_END while (false)
181 #endif
182 
183 /*==========================================================================*/
184 /* limits for int32_t etc., like in POSIX inttypes.h                        */
185 /*==========================================================================*/
186 
187 #ifndef INT8_MIN
188 /** The smallest value an 8 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
189 #   define INT8_MIN        ((int8_t)(-128))
190 #endif
191 #ifndef INT16_MIN
192 /** The smallest value a 16 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
193 #   define INT16_MIN       ((int16_t)(-32767-1))
194 #endif
195 #ifndef INT32_MIN
196 /** The smallest value a 32 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
197 #   define INT32_MIN       ((int32_t)(-2147483647-1))
198 #endif
199 
200 #ifndef INT8_MAX
201 /** The largest value an 8 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
202 #   define INT8_MAX        ((int8_t)(127))
203 #endif
204 #ifndef INT16_MAX
205 /** The largest value a 16 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
206 #   define INT16_MAX       ((int16_t)(32767))
207 #endif
208 #ifndef INT32_MAX
209 /** The largest value a 32 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
210 #   define INT32_MAX       ((int32_t)(2147483647))
211 #endif
212 
213 #ifndef UINT8_MAX
214 /** The largest value an 8 bit unsigned integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
215 #   define UINT8_MAX       ((uint8_t)(255U))
216 #endif
217 #ifndef UINT16_MAX
218 /** The largest value a 16 bit unsigned integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
219 #   define UINT16_MAX      ((uint16_t)(65535U))
220 #endif
221 #ifndef UINT32_MAX
222 /** The largest value a 32 bit unsigned integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.0 */
223 #   define UINT32_MAX      ((uint32_t)(4294967295U))
224 #endif
225 
226 #if defined(U_INT64_T_UNAVAILABLE)
227 # error int64_t is required for decimal format and rule-based number format.
228 #else
229 # ifndef INT64_C
230 /**
231  * Provides a platform independent way to specify a signed 64-bit integer constant.
232  * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
233  * \xrefitem stable "Stable" "Stable List" ICU 2.8
234  */
235 #   define INT64_C(c) c ## LL
236 # endif
237 # ifndef UINT64_C
238 /**
239  * Provides a platform independent way to specify an unsigned 64-bit integer constant.
240  * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
241  * \xrefitem stable "Stable" "Stable List" ICU 2.8
242  */
243 #   define UINT64_C(c) c ## ULL
244 # endif
245 # ifndef U_INT64_MIN
246 /** The smallest value a 64 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.8 */
247 #     define U_INT64_MIN       ((int64_t)(INT64_C(-9223372036854775807)-1))
248 # endif
249 # ifndef U_INT64_MAX
250 /** The largest value a 64 bit signed integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.8 */
251 #     define U_INT64_MAX       ((int64_t)(INT64_C(9223372036854775807)))
252 # endif
253 # ifndef U_UINT64_MAX
254 /** The largest value a 64 bit unsigned integer can hold \xrefitem stable "Stable" "Stable List" ICU 2.8 */
255 #     define U_UINT64_MAX      ((uint64_t)(UINT64_C(18446744073709551615)))
256 # endif
257 #endif
258 
259 /*==========================================================================*/
260 /* Boolean data type                                                        */
261 /*==========================================================================*/
262 
263 /**
264  * The ICU boolean type, a signed-byte integer.
265  * ICU-specific for historical reasons: The C and C++ standards used to not define type bool.
266  * Also provides a fixed type definition, as opposed to
267  * type bool whose details (e.g., sizeof) may vary by compiler and between C and C++.
268  *
269  * \xrefitem stable "Stable" "Stable List" ICU 2.0
270  */
271 typedef int8_t UBool;
272 
273 /**
274  * \def U_DEFINE_FALSE_AND_TRUE
275  * Normally turns off defining macros FALSE=0 & TRUE=1 in public ICU headers.
276  * These obsolete macros sometimes break compilation of other code that
277  * defines enum constants or similar with these names.
278  * C++ has long defined bool/false/true.
279  * C99 also added definitions for these, although as macros; see stdbool.h.
280  *
281  * You may transitionally define U_DEFINE_FALSE_AND_TRUE=1 if you need time to migrate code.
282  *
283  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only. ICU 68
284  */
285 #ifdef U_DEFINE_FALSE_AND_TRUE
286     // Use the predefined value.
287 #elif defined(U_COMBINED_IMPLEMENTATION) || \
288         defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
289         defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
290         defined(U_TOOLUTIL_IMPLEMENTATION)
291     // Inside ICU: Keep FALSE & TRUE available.
292 #   define U_DEFINE_FALSE_AND_TRUE 1
293 #else
294     // Outside ICU: Avoid collision with non-macro definitions of FALSE & TRUE.
295 #   define U_DEFINE_FALSE_AND_TRUE 0
296 #endif
297 
298 #if U_DEFINE_FALSE_AND_TRUE || defined(U_IN_DOXYGEN)
299 #ifndef TRUE
300 /**
301  * The TRUE value of a UBool.
302  *
303  * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 68 Use standard "true" instead.
304  */
305 #   define TRUE  1
306 #endif
307 #ifndef FALSE
308 /**
309  * The FALSE value of a UBool.
310  *
311  * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 68 Use standard "false" instead.
312  */
313 #   define FALSE 0
314 #endif
315 #endif  // U_DEFINE_FALSE_AND_TRUE
316 
317 /*==========================================================================*/
318 /* Unicode data types                                                       */
319 /*==========================================================================*/
320 
321 /* wchar_t-related definitions -------------------------------------------- */
322 
323 /*
324  * \def U_WCHAR_IS_UTF16
325  * Defined if wchar_t uses UTF-16.
326  *
327  * \xrefitem stable "Stable" "Stable List" ICU 2.0
328  */
329 /*
330  * \def U_WCHAR_IS_UTF32
331  * Defined if wchar_t uses UTF-32.
332  *
333  * \xrefitem stable "Stable" "Stable List" ICU 2.0
334  */
335 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
336 #   ifdef __STDC_ISO_10646__
337 #       if (U_SIZEOF_WCHAR_T==2)
338 #           define U_WCHAR_IS_UTF16
339 #       elif (U_SIZEOF_WCHAR_T==4)
340 #           define  U_WCHAR_IS_UTF32
341 #       endif
342 #   elif defined __UCS2__
343 #       if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
344 #           define U_WCHAR_IS_UTF16
345 #       endif
346 #   elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
347 #       if (U_SIZEOF_WCHAR_T==4)
348 #           define U_WCHAR_IS_UTF32
349 #       endif
350 #   elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
351 #       define U_WCHAR_IS_UTF32
352 #   elif U_PLATFORM_HAS_WIN32_API
353 #       define U_WCHAR_IS_UTF16
354 #   endif
355 #endif
356 
357 /* UChar and UChar32 definitions -------------------------------------------- */
358 
359 /** Number of bytes in a UChar. \xrefitem stable "Stable" "Stable List" ICU 2.0 */
360 #define U_SIZEOF_UCHAR 2
361 
362 /**
363  * \def U_CHAR16_IS_TYPEDEF
364  * If 1, then char16_t is a typedef and not a real type (yet)
365  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.
366  */
367 #if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
368 // for AIX, uchar.h needs to be included
369 # include <uchar.h>
370 # define U_CHAR16_IS_TYPEDEF 1
371 #elif defined(_MSC_VER) && (_MSC_VER < 1900)
372 // Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
373 // and instead use a typedef.  https://msdn.microsoft.com/library/bb531344.aspx
374 # define U_CHAR16_IS_TYPEDEF 1
375 #else
376 # define U_CHAR16_IS_TYPEDEF 0
377 #endif
378 
379 
380 /**
381  * \var UChar
382  *
383  * The base type for UTF-16 code units and pointers.
384  * Unsigned 16-bit integer.
385  * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
386  *
387  * UChar is configurable by defining the macro UCHAR_TYPE
388  * on the preprocessor or compiler command line:
389  * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
390  * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
391  * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
392  *
393  * The default is UChar=char16_t.
394  *
395  * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
396  *
397  * In C, char16_t is a simple typedef of uint_least16_t.
398  * ICU requires uint_least16_t=uint16_t for data memory mapping.
399  * On macOS, char16_t is not available because the uchar.h standard header is missing.
400  *
401  * \xrefitem stable "Stable" "Stable List" ICU 4.4
402  */
403 
404 #if 1
405     // #if 1 is normal. UChar defaults to char16_t in C++.
406     // For configuration testing of UChar=uint16_t temporarily change this to #if 0.
407     // The intltest Makefile #defines UCHAR_TYPE=char16_t,
408     // so we only #define it to uint16_t if it is undefined so far.
409 #elif !defined(UCHAR_TYPE)
410 #   define UCHAR_TYPE uint16_t
411 #endif
412 
413 #if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
414         defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
415     // Inside the ICU library code, never configurable.
416     typedef char16_t UChar;
417 #elif defined(UCHAR_TYPE)
418     typedef UCHAR_TYPE UChar;
419 #elif (U_CPLUSPLUS_VERSION >= 11)
420     typedef char16_t UChar;
421 #else
422     typedef uint16_t UChar;
423 #endif
424 
425 /**
426  * \var OldUChar
427  * Default ICU 58 definition of UChar.
428  * A base type for UTF-16 code units and pointers.
429  * Unsigned 16-bit integer.
430  *
431  * Define OldUChar to be wchar_t if that is 16 bits wide.
432  * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
433  *
434  * This makes the definition of OldUChar platform-dependent
435  * but allows direct string type compatibility with platforms with
436  * 16-bit wchar_t types.
437  *
438  * This is how UChar was defined in ICU 58, for transition convenience.
439  * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
440  * The current UChar responds to UCHAR_TYPE but OldUChar does not.
441  *
442  * \xrefitem stable "Stable" "Stable List" ICU 59
443  */
444 #if U_SIZEOF_WCHAR_T==2
445     typedef wchar_t OldUChar;
446 #elif defined(__CHAR16_TYPE__)
447     typedef __CHAR16_TYPE__ OldUChar;
448 #else
449     typedef uint16_t OldUChar;
450 #endif
451 
452 /**
453  * Define UChar32 as a type for single Unicode code points.
454  * UChar32 is a signed 32-bit integer (same as int32_t).
455  *
456  * The Unicode code point range is 0..0x10ffff.
457  * All other values (negative or >=0x110000) are illegal as Unicode code points.
458  * They may be used as sentinel values to indicate "done", "error"
459  * or similar non-code point conditions.
460  *
461  * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
462  * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
463  * or else to be uint32_t.
464  * That is, the definition of UChar32 was platform-dependent.
465  *
466  * @see U_SENTINEL
467  * \xrefitem stable "Stable" "Stable List" ICU 2.4
468  */
469 typedef int32_t UChar32;
470 
471 /**
472  * This value is intended for sentinel values for APIs that
473  * (take or) return single code points (UChar32).
474  * It is outside of the Unicode code point range 0..0x10ffff.
475  *
476  * For example, a "done" or "error" value in a new API
477  * could be indicated with U_SENTINEL.
478  *
479  * ICU APIs designed before ICU 2.4 usually define service-specific "done"
480  * values, mostly 0xffff.
481  * Those may need to be distinguished from
482  * actual U+ffff text contents by calling functions like
483  * CharacterIterator::hasNext() or UnicodeString::length().
484  *
485  * @return -1
486  * @see UChar32
487  * \xrefitem stable "Stable" "Stable List" ICU 2.4
488  */
489 #define U_SENTINEL (-1)
490 
491 #include "unicode/urename.h"
492 
493 #endif
494 
495 /** @} */ // addtogroup
496