• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2004-2009, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  gencase.h
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2004aug28
14 *   created by: Markus W. Scherer
15 */
16 
17 #ifndef __GENCASE_H__
18 #define __GENCASE_H__
19 
20 #include "unicode/utypes.h"
21 #include "utrie.h"
22 #include "propsvec.h"
23 #include "ucase.h"
24 
25 U_CDECL_BEGIN
26 
27 /* Unicode versions --------------------------------------------------------- */
28 
29 enum {
30     UNI_1_0,
31     UNI_1_1,
32     UNI_2_0,
33     UNI_3_0,
34     UNI_3_1,
35     UNI_3_2,
36     UNI_4_0,
37     UNI_4_0_1,
38     UNI_4_1,
39     UNI_5_1,
40     UNI_5_2,
41     UNI_6_0,
42     UNI_VER_COUNT
43 };
44 
45 extern int32_t ucdVersion;
46 
47 /* gencase ------------------------------------------------------------------ */
48 
49 #define UGENCASE_EXC_SHIFT     16
50 #define UGENCASE_EXC_MASK      0xffff0000
51 
52 /*
53  * Values for the ucase.icu unfold[] data array, see store.c.
54  * The values are stored in ucase.icu so that the runtime code will work with
55  * changing values, but they are hardcoded for gencase for simplicity.
56  * They are optimized, that is, provide for minimal table column widths,
57  * for the actual Unicode data, so that the table size is minimized.
58  * Future versions of Unicode may require increases of some of these values.
59  */
60 enum {
61     UGENCASE_UNFOLD_STRING_WIDTH=3,
62     UGENCASE_UNFOLD_CP_WIDTH=2,
63     UGENCASE_UNFOLD_WIDTH=UGENCASE_UNFOLD_STRING_WIDTH+UGENCASE_UNFOLD_CP_WIDTH,
64     UGENCASE_UNFOLD_MAX_ROWS=250
65 };
66 
67 /* Values for additional data stored in pv column 1 */
68 enum {
69     UGENCASE_IS_MID_LETTER_SHIFT        /* bit 0  WB=MidLetter or WB=MidNumLet */
70 };
71 
72 /* special casing data */
73 typedef struct {
74     UChar32 code;
75     UBool isComplex;
76     UChar lowerCase[32], upperCase[32], titleCase[32];
77 } SpecialCasing;
78 
79 /* case folding data */
80 typedef struct {
81     UChar32 code, simple;
82     char status;
83     UChar full[32];
84 } CaseFolding;
85 
86 /* case mapping properties */
87 typedef struct {
88     UChar32 code, lowerCase, upperCase, titleCase;
89     UChar32 closure[8];
90     SpecialCasing *specialCasing;
91     CaseFolding *caseFolding;
92     uint8_t gc, cc;
93     UBool isCaseIgnorable;
94 } Props;
95 
96 /* global flags */
97 extern UBool beVerbose, haveCopyright;
98 
99 /* properties vectors in gencase.c */
100 extern UPropsVectors *pv;
101 
102 /* prototypes */
103 U_CFUNC void
104 writeUCDFilename(char *basename, const char *filename, const char *suffix);
105 
106 U_CFUNC UBool
107 isToken(const char *token, const char *s);
108 
109 extern void
110 setUnicodeVersion(const char *v);
111 
112 extern void
113 setProps(Props *p);
114 
115 U_CFUNC uint32_t U_EXPORT2
116 getFoldedPropsValue(UNewTrie *trie, UChar32 start, int32_t offset);
117 
118 extern void
119 addCaseSensitive(UChar32 first, UChar32 last);
120 
121 extern void
122 makeCaseClosure(void);
123 
124 extern void
125 makeExceptions(void);
126 
127 extern void
128 generateData(const char *dataDir, UBool csource);
129 
130 U_CDECL_END
131 
132 #endif
133