1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2001-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: ucol_tok.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created 02/22/2001 14 * created by: Vladimir Weinstein 15 * 16 * This module reads a tailoring rule string and produces a list of 17 * tokens that will be turned into collation elements 18 * 19 */ 20 21 #ifndef UCOL_TOKENS_H 22 #define UCOL_TOKENS_H 23 24 #include "unicode/utypes.h" 25 #include "unicode/uset.h" 26 27 #if !UCONFIG_NO_COLLATION 28 29 #include "ucol_imp.h" 30 #include "uhash.h" 31 #include "unicode/parseerr.h" 32 33 #define UCOL_TOK_UNSET 0xFFFFFFFF 34 #define UCOL_TOK_RESET 0xDEADBEEF 35 36 #define UCOL_TOK_POLARITY_NEGATIVE 0 37 #define UCOL_TOK_POLARITY_POSITIVE 1 38 39 #define UCOL_TOK_TOP 0x04 40 #define UCOL_TOK_VARIABLE_TOP 0x08 41 #define UCOL_TOK_BEFORE 0x03 42 #define UCOL_TOK_SUCCESS 0x10 43 44 /* this is space for the extra strings that need to be unquoted */ 45 /* during the parsing of the rules */ 46 #define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096 47 typedef struct UColToken UColToken; 48 49 typedef struct { 50 UColToken* first; 51 UColToken* last; 52 UColToken* reset; 53 UBool indirect; 54 uint32_t baseCE; 55 uint32_t baseContCE; 56 uint32_t nextCE; 57 uint32_t nextContCE; 58 uint32_t previousCE; 59 uint32_t previousContCE; 60 int32_t pos[UCOL_STRENGTH_LIMIT]; 61 uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT]; 62 uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT]; 63 uint32_t numStr[UCOL_CE_STRENGTH_LIMIT]; 64 UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT]; 65 UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT]; 66 } UColTokListHeader; 67 68 struct UColToken { 69 UChar debugSource; 70 UChar debugExpansion; 71 UChar debugPrefix; 72 uint32_t CEs[128]; 73 uint32_t noOfCEs; 74 uint32_t expCEs[128]; 75 uint32_t noOfExpCEs; 76 uint32_t source; 77 uint32_t expansion; 78 uint32_t prefix; 79 uint32_t strength; 80 uint32_t toInsert; 81 uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */ 82 UColTokListHeader *listHeader; 83 UColToken* previous; 84 UColToken* next; 85 UChar **rulesToParseHdl; 86 uint16_t flags; 87 }; 88 89 /* 90 * This is a token that has been parsed 91 * but not yet processed. Used to reduce 92 * the number of arguments in the parser 93 */ 94 typedef struct { 95 uint32_t strength; 96 uint32_t charsOffset; 97 uint32_t charsLen; 98 uint32_t extensionOffset; 99 uint32_t extensionLen; 100 uint32_t prefixOffset; 101 uint32_t prefixLen; 102 uint16_t flags; 103 uint16_t indirectIndex; 104 } UColParsedToken; 105 106 107 typedef struct { 108 UColParsedToken parsedToken; 109 UChar *source; 110 UChar *end; 111 const UChar *current; 112 UChar *sourceCurrent; 113 UChar *extraCurrent; 114 UChar *extraEnd; 115 const InverseUCATableHeader *invUCA; 116 const UCollator *UCA; 117 UHashtable *tailored; 118 UColOptionSet *opts; 119 uint32_t resultLen; 120 uint32_t listCapacity; 121 UColTokListHeader *lh; 122 UColToken *varTop; 123 USet *copySet; 124 USet *removeSet; 125 UBool buildCCTabFlag; /* Tailoring rule requirs building combining class table. */ 126 127 UChar32 previousCp; /* Previous code point. */ 128 /* For processing starred lists. */ 129 UBool isStarred; /* Are we processing a starred token? */ 130 UBool savedIsStarred; 131 uint32_t currentStarredCharIndex; /* Index of the current charrecter in the starred expression. */ 132 uint32_t lastStarredCharIndex; /* Index to the last character in the starred expression. */ 133 134 /* For processing ranges. */ 135 UBool inRange; /* Are we in a range? */ 136 UChar32 currentRangeCp; /* Current code point in the range. */ 137 UChar32 lastRangeCp; /* The last code point in the range. */ 138 139 /* reorder codes for collation reordering */ 140 int32_t* reorderCodes; 141 int32_t reorderCodesLength; 142 143 } UColTokenParser; 144 145 typedef struct { 146 const UChar *subName; 147 int32_t subLen; 148 UColAttributeValue attrVal; 149 } ucolTokSuboption; 150 151 typedef struct { 152 const UChar *optionName; 153 int32_t optionLen; 154 const ucolTokSuboption *subopts; 155 int32_t subSize; 156 UColAttribute attr; 157 } ucolTokOption; 158 159 #define ucol_tok_isSpecialChar(ch) \ 160 (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \ 161 (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \ 162 (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \ 163 (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \ 164 (ch) == 0x007B)) 165 166 167 U_CFUNC 168 uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, 169 UParseError *parseError, 170 UErrorCode *status); 171 172 U_CFUNC 173 void ucol_tok_initTokenList(UColTokenParser *src, 174 const UChar *rules, 175 const uint32_t rulesLength, 176 const UCollator *UCA, 177 GetCollationRulesFunction importFunc, 178 void* context, 179 UErrorCode *status); 180 181 U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src); 182 183 U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src, 184 UBool startOfRules, 185 UParseError *parseError, 186 UErrorCode *status); 187 188 189 U_CAPI const UChar * U_EXPORT2 190 ucol_tok_getNextArgument(const UChar *start, const UChar *end, 191 UColAttribute *attrib, UColAttributeValue *value, 192 UErrorCode *status); 193 U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src, 194 uint32_t CE, uint32_t contCE, 195 uint32_t *nextCE, uint32_t *nextContCE, 196 uint32_t strength); 197 U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src, 198 uint32_t CE, uint32_t contCE, 199 uint32_t *prevCE, uint32_t *prevContCE, 200 uint32_t strength); 201 202 const UChar* U_CALLCONV ucol_tok_getRulesFromBundle( 203 void* context, 204 const char* locale, 205 const char* type, 206 int32_t* pLength, 207 UErrorCode* status); 208 209 #endif /* #if !UCONFIG_NO_COLLATION */ 210 211 #endif 212