1 /* 2 ******************************************************************************* 3 * Copyright (C) 2013-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * collationsettings.h 7 * 8 * created on: 2013feb07 9 * created by: Markus W. Scherer 10 */ 11 12 #ifndef __COLLATIONSETTINGS_H__ 13 #define __COLLATIONSETTINGS_H__ 14 15 #include "unicode/utypes.h" 16 17 #if !UCONFIG_NO_COLLATION 18 19 #include "unicode/ucol.h" 20 #include "collation.h" 21 #include "sharedobject.h" 22 #include "umutex.h" 23 24 U_NAMESPACE_BEGIN 25 26 /** 27 * Collation settings/options/attributes. 28 * These are the values that can be changed via API. 29 */ 30 struct U_I18N_API CollationSettings : public SharedObject { 31 /** 32 * Options bit 0: Perform the FCD check on the input text and deliver normalized text. 33 */ 34 static const int32_t CHECK_FCD = 1; 35 /** 36 * Options bit 1: Numeric collation. 37 * Also known as CODAN = COllate Digits As Numbers. 38 * 39 * Treat digit sequences as numbers with CE sequences in numeric order, 40 * rather than returning a normal CE for each digit. 41 */ 42 static const int32_t NUMERIC = 2; 43 /** 44 * "Shifted" alternate handling, see ALTERNATE_MASK. 45 */ 46 static const int32_t SHIFTED = 4; 47 /** 48 * Options bits 3..2: Alternate-handling mask. 0 for non-ignorable. 49 * Reserve values 8 and 0xc for shift-trimmed and blanked. 50 */ 51 static const int32_t ALTERNATE_MASK = 0xc; 52 /** 53 * Options bits 6..4: The 3-bit maxVariable value bit field is shifted by this value. 54 */ 55 static const int32_t MAX_VARIABLE_SHIFT = 4; 56 /** maxVariable options bit mask before shifting. */ 57 static const int32_t MAX_VARIABLE_MASK = 0x70; 58 /** Options bit 7: Reserved/unused/0. */ 59 /** 60 * Options bit 8: Sort uppercase first if caseLevel or caseFirst is on. 61 */ 62 static const int32_t UPPER_FIRST = 0x100; 63 /** 64 * Options bit 9: Keep the case bits in the tertiary weight (they trump other tertiary values) 65 * unless case level is on (when they are *moved* into the separate case level). 66 * By default, the case bits are removed from the tertiary weight (ignored). 67 * 68 * When CASE_FIRST is off, UPPER_FIRST must be off too, corresponding to 69 * the tri-value UCOL_CASE_FIRST attribute: UCOL_OFF vs. UCOL_LOWER_FIRST vs. UCOL_UPPER_FIRST. 70 */ 71 static const int32_t CASE_FIRST = 0x200; 72 /** 73 * Options bit mask for caseFirst and upperFirst, before shifting. 74 * Same value as caseFirst==upperFirst. 75 */ 76 static const int32_t CASE_FIRST_AND_UPPER_MASK = CASE_FIRST | UPPER_FIRST; 77 /** 78 * Options bit 10: Insert the case level between the secondary and tertiary levels. 79 */ 80 static const int32_t CASE_LEVEL = 0x400; 81 /** 82 * Options bit 11: Compare secondary weights backwards. ("French secondary") 83 */ 84 static const int32_t BACKWARD_SECONDARY = 0x800; 85 /** 86 * Options bits 15..12: The 4-bit strength value bit field is shifted by this value. 87 * It is the top used bit field in the options. (No need to mask after shifting.) 88 */ 89 static const int32_t STRENGTH_SHIFT = 12; 90 /** Strength options bit mask before shifting. */ 91 static const int32_t STRENGTH_MASK = 0xf000; 92 93 /** maxVariable values */ 94 enum MaxVariable { 95 MAX_VAR_SPACE, 96 MAX_VAR_PUNCT, 97 MAX_VAR_SYMBOL, 98 MAX_VAR_CURRENCY 99 }; 100 CollationSettingsCollationSettings101 CollationSettings() 102 : options((UCOL_DEFAULT_STRENGTH << STRENGTH_SHIFT) | 103 (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT)), 104 variableTop(0), 105 reorderTable(NULL), 106 reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0), 107 fastLatinOptions(-1) {} 108 109 CollationSettings(const CollationSettings &other); 110 virtual ~CollationSettings(); 111 112 UBool operator==(const CollationSettings &other) const; 113 114 inline UBool operator!=(const CollationSettings &other) const { 115 return !operator==(other); 116 } 117 118 int32_t hashCode() const; 119 120 void resetReordering(); 121 void aliasReordering(const int32_t *codes, int32_t length, const uint8_t *table); 122 UBool setReordering(const int32_t *codes, int32_t length, const uint8_t table[256]); 123 124 void setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode); 125 getStrengthCollationSettings126 static int32_t getStrength(int32_t options) { 127 return options >> STRENGTH_SHIFT; 128 } 129 getStrengthCollationSettings130 int32_t getStrength() const { 131 return getStrength(options); 132 } 133 134 /** Sets the options bit for an on/off attribute. */ 135 void setFlag(int32_t bit, UColAttributeValue value, 136 int32_t defaultOptions, UErrorCode &errorCode); 137 getFlagCollationSettings138 UColAttributeValue getFlag(int32_t bit) const { 139 return ((options & bit) != 0) ? UCOL_ON : UCOL_OFF; 140 } 141 142 void setCaseFirst(UColAttributeValue value, int32_t defaultOptions, UErrorCode &errorCode); 143 getCaseFirstCollationSettings144 UColAttributeValue getCaseFirst() const { 145 int32_t option = options & CASE_FIRST_AND_UPPER_MASK; 146 return (option == 0) ? UCOL_OFF : 147 (option == CASE_FIRST) ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST; 148 } 149 150 void setAlternateHandling(UColAttributeValue value, 151 int32_t defaultOptions, UErrorCode &errorCode); 152 getAlternateHandlingCollationSettings153 UColAttributeValue getAlternateHandling() const { 154 return ((options & ALTERNATE_MASK) == 0) ? UCOL_NON_IGNORABLE : UCOL_SHIFTED; 155 } 156 157 void setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode); 158 getMaxVariableCollationSettings159 MaxVariable getMaxVariable() const { 160 return (MaxVariable)((options & MAX_VARIABLE_MASK) >> MAX_VARIABLE_SHIFT); 161 } 162 163 /** 164 * Include case bits in the tertiary level if caseLevel=off and caseFirst!=off. 165 */ isTertiaryWithCaseBitsCollationSettings166 static inline UBool isTertiaryWithCaseBits(int32_t options) { 167 return (options & (CASE_LEVEL | CASE_FIRST)) == CASE_FIRST; 168 } getTertiaryMaskCollationSettings169 static uint32_t getTertiaryMask(int32_t options) { 170 // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off. 171 return isTertiaryWithCaseBits(options) ? 172 Collation::CASE_AND_TERTIARY_MASK : Collation::ONLY_TERTIARY_MASK; 173 } 174 sortsTertiaryUpperCaseFirstCollationSettings175 static UBool sortsTertiaryUpperCaseFirst(int32_t options) { 176 // On tertiary level, consider case bits and sort uppercase first 177 // if caseLevel is off and caseFirst==upperFirst. 178 return (options & (CASE_LEVEL | CASE_FIRST_AND_UPPER_MASK)) == CASE_FIRST_AND_UPPER_MASK; 179 } 180 dontCheckFCDCollationSettings181 inline UBool dontCheckFCD() const { 182 return (options & CHECK_FCD) == 0; 183 } 184 hasBackwardSecondaryCollationSettings185 inline UBool hasBackwardSecondary() const { 186 return (options & BACKWARD_SECONDARY) != 0; 187 } 188 isNumericCollationSettings189 inline UBool isNumeric() const { 190 return (options & NUMERIC) != 0; 191 } 192 193 /** CHECK_FCD etc. */ 194 int32_t options; 195 /** Variable-top primary weight. */ 196 uint32_t variableTop; 197 /** 256-byte table for reordering permutation of primary lead bytes; NULL if no reordering. */ 198 const uint8_t *reorderTable; 199 /** Array of reorder codes; ignored if reorderCodesLength == 0. */ 200 const int32_t *reorderCodes; 201 /** Number of reorder codes; 0 if no reordering. */ 202 int32_t reorderCodesLength; 203 /** 204 * Capacity of reorderCodes. 205 * If 0, then the table and codes are aliases. 206 * Otherwise, this object owns the memory via the reorderCodes pointer; 207 * the table and the codes are in the same memory block, with the codes first. 208 */ 209 int32_t reorderCodesCapacity; 210 211 /** Options for CollationFastLatin. Negative if disabled. */ 212 int32_t fastLatinOptions; 213 uint16_t fastLatinPrimaries[0x180]; 214 }; 215 216 U_NAMESPACE_END 217 218 #endif // !UCONFIG_NO_COLLATION 219 #endif // __COLLATIONSETTINGS_H__ 220