• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2001-2011, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  ucol_tok.h
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created 02/22/2001
14 *   created by: Vladimir Weinstein
15 *
16 * This module reads a tailoring rule string and produces a list of
17 * tokens that will be turned into collation elements
18 *
19 */
20 
21 #ifndef UCOL_TOKENS_H
22 #define UCOL_TOKENS_H
23 
24 #include "unicode/utypes.h"
25 #include "unicode/uset.h"
26 
27 #if !UCONFIG_NO_COLLATION
28 
29 #include "ucol_imp.h"
30 #include "uhash.h"
31 #include "unicode/parseerr.h"
32 
33 #define UCOL_TOK_UNSET 0xFFFFFFFF
34 #define UCOL_TOK_RESET 0xDEADBEEF
35 
36 #define UCOL_TOK_POLARITY_NEGATIVE 0
37 #define UCOL_TOK_POLARITY_POSITIVE 1
38 
39 #define UCOL_TOK_TOP 0x04
40 #define UCOL_TOK_VARIABLE_TOP 0x08
41 #define UCOL_TOK_BEFORE 0x03
42 #define UCOL_TOK_SUCCESS 0x10
43 
44 /* this is space for the extra strings that need to be unquoted */
45 /* during the parsing of the rules */
46 #define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096
47 typedef struct UColToken UColToken;
48 
49 typedef struct  {
50   UColToken* first;
51   UColToken* last;
52   UColToken* reset;
53   UBool indirect;
54   uint32_t baseCE;
55   uint32_t baseContCE;
56   uint32_t nextCE;
57   uint32_t nextContCE;
58   uint32_t previousCE;
59   uint32_t previousContCE;
60   int32_t pos[UCOL_STRENGTH_LIMIT];
61   uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT];
62   uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT];
63   uint32_t numStr[UCOL_CE_STRENGTH_LIMIT];
64   UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT];
65   UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT];
66 } UColTokListHeader;
67 
68 struct UColToken {
69   UChar debugSource;
70   UChar debugExpansion;
71   UChar debugPrefix;
72   uint32_t CEs[128];
73   uint32_t noOfCEs;
74   uint32_t expCEs[128];
75   uint32_t noOfExpCEs;
76   uint32_t source;
77   uint32_t expansion;
78   uint32_t prefix;
79   uint32_t strength;
80   uint32_t toInsert;
81   uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */
82   UColTokListHeader *listHeader;
83   UColToken* previous;
84   UColToken* next;
85   UChar **rulesToParseHdl;
86   uint16_t flags;
87 };
88 
89 /*
90  * This is a token that has been parsed
91  * but not yet processed. Used to reduce
92  * the number of arguments in the parser
93  */
94 typedef struct {
95   uint32_t strength;
96   uint32_t charsOffset;
97   uint32_t charsLen;
98   uint32_t extensionOffset;
99   uint32_t extensionLen;
100   uint32_t prefixOffset;
101   uint32_t prefixLen;
102   uint16_t flags;
103   uint16_t indirectIndex;
104 } UColParsedToken;
105 
106 
107 typedef struct {
108   UColParsedToken parsedToken;
109   UChar *source;
110   UChar *end;
111   const UChar *current;
112   UChar *sourceCurrent;
113   UChar *extraCurrent;
114   UChar *extraEnd;
115   const InverseUCATableHeader *invUCA;
116   const UCollator *UCA;
117   UHashtable *tailored;
118   UColOptionSet *opts;
119   uint32_t resultLen;
120   uint32_t listCapacity;
121   UColTokListHeader *lh;
122   UColToken *varTop;
123   USet *copySet;
124   USet *removeSet;
125   UBool buildCCTabFlag;  /* Tailoring rule requirs building combining class table. */
126 
127   UChar32 previousCp;               /* Previous code point. */
128   /* For processing starred lists. */
129   UBool isStarred;                   /* Are we processing a starred token? */
130   UBool savedIsStarred;
131   uint32_t currentStarredCharIndex;  /* Index of the current charrecter in the starred expression. */
132   uint32_t lastStarredCharIndex;    /* Index to the last character in the starred expression. */
133 
134   /* For processing ranges. */
135   UBool inRange;                     /* Are we in a range? */
136   UChar32 currentRangeCp;           /* Current code point in the range. */
137   UChar32 lastRangeCp;              /* The last code point in the range. */
138 
139   /* reorder codes for collation reordering */
140   int32_t* reorderCodes;
141   int32_t reorderCodesLength;
142 
143 } UColTokenParser;
144 
145 typedef struct {
146   const UChar *subName;
147   int32_t subLen;
148   UColAttributeValue attrVal;
149 } ucolTokSuboption;
150 
151 typedef struct {
152    const UChar *optionName;
153    int32_t optionLen;
154    const ucolTokSuboption *subopts;
155    int32_t subSize;
156    UColAttribute attr;
157 } ucolTokOption;
158 
159 #define ucol_tok_isSpecialChar(ch)              \
160     (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \
161       (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \
162       (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \
163       (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \
164       (ch) == 0x007B))
165 
166 
167 U_CFUNC
168 uint32_t ucol_tok_assembleTokenList(UColTokenParser *src,
169                                     UParseError *parseError,
170                                     UErrorCode *status);
171 
172 U_CFUNC
173 void ucol_tok_initTokenList(UColTokenParser *src,
174                             const UChar *rules,
175                             const uint32_t rulesLength,
176                             const UCollator *UCA,
177                             GetCollationRulesFunction importFunc,
178                             void* context,
179                             UErrorCode *status);
180 
181 U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src);
182 
183 U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src,
184                         UBool startOfRules,
185                         UParseError *parseError,
186                         UErrorCode *status);
187 
188 
189 U_CAPI const UChar * U_EXPORT2
190 ucol_tok_getNextArgument(const UChar *start, const UChar *end,
191                                UColAttribute *attrib, UColAttributeValue *value,
192                                UErrorCode *status);
193 U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src,
194                                             uint32_t CE, uint32_t contCE,
195                                             uint32_t *nextCE, uint32_t *nextContCE,
196                                             uint32_t strength);
197 U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src,
198                                             uint32_t CE, uint32_t contCE,
199                                             uint32_t *prevCE, uint32_t *prevContCE,
200                                             uint32_t strength);
201 
202 const UChar* U_CALLCONV ucol_tok_getRulesFromBundle(
203     void* context,
204     const char* locale,
205     const char* type,
206     int32_t* pLength,
207     UErrorCode* status);
208 
209 #endif /* #if !UCONFIG_NO_COLLATION */
210 
211 #endif
212