1 /* 2 ******************************************************************************* 3 * Copyright (C) 2013-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * collationsets.h 7 * 8 * created on: 2013feb09 9 * created by: Markus W. Scherer 10 */ 11 12 #ifndef __COLLATIONSETS_H__ 13 #define __COLLATIONSETS_H__ 14 15 #include "unicode/utypes.h" 16 17 #if !UCONFIG_NO_COLLATION 18 19 #include "unicode/uniset.h" 20 #include "collation.h" 21 22 U_NAMESPACE_BEGIN 23 24 struct CollationData; 25 26 /** 27 * Finds the set of characters and strings that sort differently in the tailoring 28 * from the base data. 29 * 30 * Every mapping in the tailoring needs to be compared to the base, 31 * because some mappings are copied for optimization, and 32 * all contractions for a character are copied if any contractions for that character 33 * are added, modified or removed. 34 * 35 * It might be simpler to re-parse the rule string, but: 36 * - That would require duplicating some of the from-rules builder code. 37 * - That would make the runtime code depend on the builder. 38 * - That would only work if we have the rule string, and we allow users to 39 * omit the rule string from data files. 40 */ 41 class TailoredSet : public UMemory { 42 public: TailoredSet(UnicodeSet * t)43 TailoredSet(UnicodeSet *t) 44 : data(NULL), baseData(NULL), 45 tailored(t), 46 suffix(NULL), 47 errorCode(U_ZERO_ERROR) {} 48 49 void forData(const CollationData *d, UErrorCode &errorCode); 50 51 /** 52 * @return U_SUCCESS(errorCode) in C++, void in Java 53 * @internal only public for access by callback 54 */ 55 UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32); 56 57 private: 58 void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32); 59 void comparePrefixes(UChar32 c, const UChar *p, const UChar *q); 60 void compareContractions(UChar32 c, const UChar *p, const UChar *q); 61 62 void addPrefixes(const CollationData *d, UChar32 c, const UChar *p); 63 void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32); 64 void addContractions(UChar32 c, const UChar *p); 65 void addSuffix(UChar32 c, const UnicodeString &sfx); 66 void add(UChar32 c); 67 68 /** Prefixes are reversed in the data structure. */ setPrefix(const UnicodeString & pfx)69 void setPrefix(const UnicodeString &pfx) { 70 unreversedPrefix = pfx; 71 unreversedPrefix.reverse(); 72 } resetPrefix()73 void resetPrefix() { 74 unreversedPrefix.remove(); 75 } 76 77 const CollationData *data; 78 const CollationData *baseData; 79 UnicodeSet *tailored; 80 UnicodeString unreversedPrefix; 81 const UnicodeString *suffix; 82 UErrorCode errorCode; 83 }; 84 85 class ContractionsAndExpansions : public UMemory { 86 public: 87 class CESink : public UMemory { 88 public: 89 virtual ~CESink(); 90 virtual void handleCE(int64_t ce) = 0; 91 virtual void handleExpansion(const int64_t ces[], int32_t length) = 0; 92 }; 93 ContractionsAndExpansions(UnicodeSet * con,UnicodeSet * exp,CESink * s,UBool prefixes)94 ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes) 95 : data(NULL), 96 contractions(con), expansions(exp), 97 sink(s), 98 addPrefixes(prefixes), 99 checkTailored(0), 100 suffix(NULL), 101 errorCode(U_ZERO_ERROR) {} 102 103 void forData(const CollationData *d, UErrorCode &errorCode); 104 void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec); 105 106 // all following: @internal, only public for access by callback 107 108 void handleCE32(UChar32 start, UChar32 end, uint32_t ce32); 109 110 void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32); 111 void handleContractions(UChar32 start, UChar32 end, uint32_t ce32); 112 113 void addExpansions(UChar32 start, UChar32 end); 114 void addStrings(UChar32 start, UChar32 end, UnicodeSet *set); 115 116 /** Prefixes are reversed in the data structure. */ setPrefix(const UnicodeString & pfx)117 void setPrefix(const UnicodeString &pfx) { 118 unreversedPrefix = pfx; 119 unreversedPrefix.reverse(); 120 } resetPrefix()121 void resetPrefix() { 122 unreversedPrefix.remove(); 123 } 124 125 const CollationData *data; 126 UnicodeSet *contractions; 127 UnicodeSet *expansions; 128 CESink *sink; 129 UBool addPrefixes; 130 int8_t checkTailored; // -1: collected tailored +1: exclude tailored 131 UnicodeSet tailored; 132 UnicodeSet ranges; 133 UnicodeString unreversedPrefix; 134 const UnicodeString *suffix; 135 int64_t ces[Collation::MAX_EXPANSION_LENGTH]; 136 UErrorCode errorCode; 137 }; 138 139 U_NAMESPACE_END 140 141 #endif // !UCONFIG_NO_COLLATION 142 #endif // __COLLATIONSETS_H__ 143