• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 2013-2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * collationsettings.h
7 *
8 * created on: 2013feb07
9 * created by: Markus W. Scherer
10 */
11 
12 #ifndef __COLLATIONSETTINGS_H__
13 #define __COLLATIONSETTINGS_H__
14 
15 #include "unicode/utypes.h"
16 
17 #if !UCONFIG_NO_COLLATION
18 
19 #include "unicode/ucol.h"
20 #include "collation.h"
21 #include "sharedobject.h"
22 #include "umutex.h"
23 
24 U_NAMESPACE_BEGIN
25 
26 /**
27  * Collation settings/options/attributes.
28  * These are the values that can be changed via API.
29  */
30 struct U_I18N_API CollationSettings : public SharedObject {
31     /**
32      * Options bit 0: Perform the FCD check on the input text and deliver normalized text.
33      */
34     static const int32_t CHECK_FCD = 1;
35     /**
36      * Options bit 1: Numeric collation.
37      * Also known as CODAN = COllate Digits As Numbers.
38      *
39      * Treat digit sequences as numbers with CE sequences in numeric order,
40      * rather than returning a normal CE for each digit.
41      */
42     static const int32_t NUMERIC = 2;
43     /**
44      * "Shifted" alternate handling, see ALTERNATE_MASK.
45      */
46     static const int32_t SHIFTED = 4;
47     /**
48      * Options bits 3..2: Alternate-handling mask. 0 for non-ignorable.
49      * Reserve values 8 and 0xc for shift-trimmed and blanked.
50      */
51     static const int32_t ALTERNATE_MASK = 0xc;
52     /**
53      * Options bits 6..4: The 3-bit maxVariable value bit field is shifted by this value.
54      */
55     static const int32_t MAX_VARIABLE_SHIFT = 4;
56     /** maxVariable options bit mask before shifting. */
57     static const int32_t MAX_VARIABLE_MASK = 0x70;
58     /** Options bit 7: Reserved/unused/0. */
59     /**
60      * Options bit 8: Sort uppercase first if caseLevel or caseFirst is on.
61      */
62     static const int32_t UPPER_FIRST = 0x100;
63     /**
64      * Options bit 9: Keep the case bits in the tertiary weight (they trump other tertiary values)
65      * unless case level is on (when they are *moved* into the separate case level).
66      * By default, the case bits are removed from the tertiary weight (ignored).
67      *
68      * When CASE_FIRST is off, UPPER_FIRST must be off too, corresponding to
69      * the tri-value UCOL_CASE_FIRST attribute: UCOL_OFF vs. UCOL_LOWER_FIRST vs. UCOL_UPPER_FIRST.
70      */
71     static const int32_t CASE_FIRST = 0x200;
72     /**
73      * Options bit mask for caseFirst and upperFirst, before shifting.
74      * Same value as caseFirst==upperFirst.
75      */
76     static const int32_t CASE_FIRST_AND_UPPER_MASK = CASE_FIRST | UPPER_FIRST;
77     /**
78      * Options bit 10: Insert the case level between the secondary and tertiary levels.
79      */
80     static const int32_t CASE_LEVEL = 0x400;
81     /**
82      * Options bit 11: Compare secondary weights backwards. ("French secondary")
83      */
84     static const int32_t BACKWARD_SECONDARY = 0x800;
85     /**
86      * Options bits 15..12: The 4-bit strength value bit field is shifted by this value.
87      * It is the top used bit field in the options. (No need to mask after shifting.)
88      */
89     static const int32_t STRENGTH_SHIFT = 12;
90     /** Strength options bit mask before shifting. */
91     static const int32_t STRENGTH_MASK = 0xf000;
92 
93     /** maxVariable values */
94     enum MaxVariable {
95         MAX_VAR_SPACE,
96         MAX_VAR_PUNCT,
97         MAX_VAR_SYMBOL,
98         MAX_VAR_CURRENCY
99     };
100 
CollationSettingsCollationSettings101     CollationSettings()
102             : options((UCOL_DEFAULT_STRENGTH << STRENGTH_SHIFT) |
103                       (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT)),
104               variableTop(0),
105               reorderTable(NULL),
106               reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
107               fastLatinOptions(-1) {}
108 
109     CollationSettings(const CollationSettings &other);
110     virtual ~CollationSettings();
111 
112     UBool operator==(const CollationSettings &other) const;
113 
114     inline UBool operator!=(const CollationSettings &other) const {
115         return !operator==(other);
116     }
117 
118     int32_t hashCode() const;
119 
120     void resetReordering();
121     void aliasReordering(const int32_t *codes, int32_t length, const uint8_t *table);
122     UBool setReordering(const int32_t *codes, int32_t length, const uint8_t table[256]);
123 
124     void setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode);
125 
getStrengthCollationSettings126     static int32_t getStrength(int32_t options) {
127         return options >> STRENGTH_SHIFT;
128     }
129 
getStrengthCollationSettings130     int32_t getStrength() const {
131         return getStrength(options);
132     }
133 
134     /** Sets the options bit for an on/off attribute. */
135     void setFlag(int32_t bit, UColAttributeValue value,
136                  int32_t defaultOptions, UErrorCode &errorCode);
137 
getFlagCollationSettings138     UColAttributeValue getFlag(int32_t bit) const {
139         return ((options & bit) != 0) ? UCOL_ON : UCOL_OFF;
140     }
141 
142     void setCaseFirst(UColAttributeValue value, int32_t defaultOptions, UErrorCode &errorCode);
143 
getCaseFirstCollationSettings144     UColAttributeValue getCaseFirst() const {
145         int32_t option = options & CASE_FIRST_AND_UPPER_MASK;
146         return (option == 0) ? UCOL_OFF :
147                 (option == CASE_FIRST) ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST;
148     }
149 
150     void setAlternateHandling(UColAttributeValue value,
151                               int32_t defaultOptions, UErrorCode &errorCode);
152 
getAlternateHandlingCollationSettings153     UColAttributeValue getAlternateHandling() const {
154         return ((options & ALTERNATE_MASK) == 0) ? UCOL_NON_IGNORABLE : UCOL_SHIFTED;
155     }
156 
157     void setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode);
158 
getMaxVariableCollationSettings159     MaxVariable getMaxVariable() const {
160         return (MaxVariable)((options & MAX_VARIABLE_MASK) >> MAX_VARIABLE_SHIFT);
161     }
162 
163     /**
164      * Include case bits in the tertiary level if caseLevel=off and caseFirst!=off.
165      */
isTertiaryWithCaseBitsCollationSettings166     static inline UBool isTertiaryWithCaseBits(int32_t options) {
167         return (options & (CASE_LEVEL | CASE_FIRST)) == CASE_FIRST;
168     }
getTertiaryMaskCollationSettings169     static uint32_t getTertiaryMask(int32_t options) {
170         // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
171         return isTertiaryWithCaseBits(options) ?
172                 Collation::CASE_AND_TERTIARY_MASK : Collation::ONLY_TERTIARY_MASK;
173     }
174 
sortsTertiaryUpperCaseFirstCollationSettings175     static UBool sortsTertiaryUpperCaseFirst(int32_t options) {
176         // On tertiary level, consider case bits and sort uppercase first
177         // if caseLevel is off and caseFirst==upperFirst.
178         return (options & (CASE_LEVEL | CASE_FIRST_AND_UPPER_MASK)) == CASE_FIRST_AND_UPPER_MASK;
179     }
180 
dontCheckFCDCollationSettings181     inline UBool dontCheckFCD() const {
182         return (options & CHECK_FCD) == 0;
183     }
184 
hasBackwardSecondaryCollationSettings185     inline UBool hasBackwardSecondary() const {
186         return (options & BACKWARD_SECONDARY) != 0;
187     }
188 
isNumericCollationSettings189     inline UBool isNumeric() const {
190         return (options & NUMERIC) != 0;
191     }
192 
193     /** CHECK_FCD etc. */
194     int32_t options;
195     /** Variable-top primary weight. */
196     uint32_t variableTop;
197     /** 256-byte table for reordering permutation of primary lead bytes; NULL if no reordering. */
198     const uint8_t *reorderTable;
199     /** Array of reorder codes; ignored if reorderCodesLength == 0. */
200     const int32_t *reorderCodes;
201     /** Number of reorder codes; 0 if no reordering. */
202     int32_t reorderCodesLength;
203     /**
204      * Capacity of reorderCodes.
205      * If 0, then the table and codes are aliases.
206      * Otherwise, this object owns the memory via the reorderCodes pointer;
207      * the table and the codes are in the same memory block, with the codes first.
208      */
209     int32_t reorderCodesCapacity;
210 
211     /** Options for CollationFastLatin. Negative if disabled. */
212     int32_t fastLatinOptions;
213     uint16_t fastLatinPrimaries[0x180];
214 };
215 
216 U_NAMESPACE_END
217 
218 #endif  // !UCONFIG_NO_COLLATION
219 #endif  // __COLLATIONSETTINGS_H__
220