1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2013-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * collationsets.h 9 * 10 * created on: 2013feb09 11 * created by: Markus W. Scherer 12 */ 13 14 #ifndef __COLLATIONSETS_H__ 15 #define __COLLATIONSETS_H__ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_COLLATION 20 21 #include "unicode/uniset.h" 22 #include "collation.h" 23 24 U_NAMESPACE_BEGIN 25 26 struct CollationData; 27 28 /** 29 * Finds the set of characters and strings that sort differently in the tailoring 30 * from the base data. 31 * 32 * Every mapping in the tailoring needs to be compared to the base, 33 * because some mappings are copied for optimization, and 34 * all contractions for a character are copied if any contractions for that character 35 * are added, modified or removed. 36 * 37 * It might be simpler to re-parse the rule string, but: 38 * - That would require duplicating some of the from-rules builder code. 39 * - That would make the runtime code depend on the builder. 40 * - That would only work if we have the rule string, and we allow users to 41 * omit the rule string from data files. 42 */ 43 class TailoredSet : public UMemory { 44 public: TailoredSet(UnicodeSet * t)45 TailoredSet(UnicodeSet *t) 46 : data(NULL), baseData(NULL), 47 tailored(t), 48 suffix(NULL), 49 errorCode(U_ZERO_ERROR) {} 50 51 void forData(const CollationData *d, UErrorCode &errorCode); 52 53 /** 54 * @return U_SUCCESS(errorCode) in C++, void in Java 55 * @internal only public for access by callback 56 */ 57 UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32); 58 59 private: 60 void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32); 61 void comparePrefixes(UChar32 c, const UChar *p, const UChar *q); 62 void compareContractions(UChar32 c, const UChar *p, const UChar *q); 63 64 void addPrefixes(const CollationData *d, UChar32 c, const UChar *p); 65 void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32); 66 void addContractions(UChar32 c, const UChar *p); 67 void addSuffix(UChar32 c, const UnicodeString &sfx); 68 void add(UChar32 c); 69 70 /** Prefixes are reversed in the data structure. */ setPrefix(const UnicodeString & pfx)71 void setPrefix(const UnicodeString &pfx) { 72 unreversedPrefix = pfx; 73 unreversedPrefix.reverse(); 74 } resetPrefix()75 void resetPrefix() { 76 unreversedPrefix.remove(); 77 } 78 79 const CollationData *data; 80 const CollationData *baseData; 81 UnicodeSet *tailored; 82 UnicodeString unreversedPrefix; 83 const UnicodeString *suffix; 84 UErrorCode errorCode; 85 }; 86 87 class ContractionsAndExpansions : public UMemory { 88 public: 89 class CESink : public UMemory { 90 public: 91 virtual ~CESink(); 92 virtual void handleCE(int64_t ce) = 0; 93 virtual void handleExpansion(const int64_t ces[], int32_t length) = 0; 94 }; 95 ContractionsAndExpansions(UnicodeSet * con,UnicodeSet * exp,CESink * s,UBool prefixes)96 ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes) 97 : data(NULL), 98 contractions(con), expansions(exp), 99 sink(s), 100 addPrefixes(prefixes), 101 checkTailored(0), 102 suffix(NULL), 103 errorCode(U_ZERO_ERROR) {} 104 105 void forData(const CollationData *d, UErrorCode &errorCode); 106 void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec); 107 108 // all following: @internal, only public for access by callback 109 110 void handleCE32(UChar32 start, UChar32 end, uint32_t ce32); 111 112 void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32); 113 void handleContractions(UChar32 start, UChar32 end, uint32_t ce32); 114 115 void addExpansions(UChar32 start, UChar32 end); 116 void addStrings(UChar32 start, UChar32 end, UnicodeSet *set); 117 118 /** Prefixes are reversed in the data structure. */ setPrefix(const UnicodeString & pfx)119 void setPrefix(const UnicodeString &pfx) { 120 unreversedPrefix = pfx; 121 unreversedPrefix.reverse(); 122 } resetPrefix()123 void resetPrefix() { 124 unreversedPrefix.remove(); 125 } 126 127 const CollationData *data; 128 UnicodeSet *contractions; 129 UnicodeSet *expansions; 130 CESink *sink; 131 UBool addPrefixes; 132 int8_t checkTailored; // -1: collected tailored +1: exclude tailored 133 UnicodeSet tailored; 134 UnicodeSet ranges; 135 UnicodeString unreversedPrefix; 136 const UnicodeString *suffix; 137 int64_t ces[Collation::MAX_EXPANSION_LENGTH]; 138 UErrorCode errorCode; 139 }; 140 141 U_NAMESPACE_END 142 143 #endif // !UCONFIG_NO_COLLATION 144 #endif // __COLLATIONSETS_H__ 145