• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 2013-2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * collationsets.h
7 *
8 * created on: 2013feb09
9 * created by: Markus W. Scherer
10 */
11 
12 #ifndef __COLLATIONSETS_H__
13 #define __COLLATIONSETS_H__
14 
15 #include "unicode/utypes.h"
16 
17 #if !UCONFIG_NO_COLLATION
18 
19 #include "unicode/uniset.h"
20 #include "collation.h"
21 
22 U_NAMESPACE_BEGIN
23 
24 struct CollationData;
25 
26 /**
27  * Finds the set of characters and strings that sort differently in the tailoring
28  * from the base data.
29  *
30  * Every mapping in the tailoring needs to be compared to the base,
31  * because some mappings are copied for optimization, and
32  * all contractions for a character are copied if any contractions for that character
33  * are added, modified or removed.
34  *
35  * It might be simpler to re-parse the rule string, but:
36  * - That would require duplicating some of the from-rules builder code.
37  * - That would make the runtime code depend on the builder.
38  * - That would only work if we have the rule string, and we allow users to
39  *   omit the rule string from data files.
40  */
41 class TailoredSet : public UMemory {
42 public:
TailoredSet(UnicodeSet * t)43     TailoredSet(UnicodeSet *t)
44             : data(NULL), baseData(NULL),
45               tailored(t),
46               suffix(NULL),
47               errorCode(U_ZERO_ERROR) {}
48 
49     void forData(const CollationData *d, UErrorCode &errorCode);
50 
51     /**
52      * @return U_SUCCESS(errorCode) in C++, void in Java
53      * @internal only public for access by callback
54      */
55     UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
56 
57 private:
58     void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32);
59     void comparePrefixes(UChar32 c, const UChar *p, const UChar *q);
60     void compareContractions(UChar32 c, const UChar *p, const UChar *q);
61 
62     void addPrefixes(const CollationData *d, UChar32 c, const UChar *p);
63     void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32);
64     void addContractions(UChar32 c, const UChar *p);
65     void addSuffix(UChar32 c, const UnicodeString &sfx);
66     void add(UChar32 c);
67 
68     /** Prefixes are reversed in the data structure. */
setPrefix(const UnicodeString & pfx)69     void setPrefix(const UnicodeString &pfx) {
70         unreversedPrefix = pfx;
71         unreversedPrefix.reverse();
72     }
resetPrefix()73     void resetPrefix() {
74         unreversedPrefix.remove();
75     }
76 
77     const CollationData *data;
78     const CollationData *baseData;
79     UnicodeSet *tailored;
80     UnicodeString unreversedPrefix;
81     const UnicodeString *suffix;
82     UErrorCode errorCode;
83 };
84 
85 class ContractionsAndExpansions : public UMemory {
86 public:
87     class CESink : public UMemory {
88     public:
89         virtual ~CESink();
90         virtual void handleCE(int64_t ce) = 0;
91         virtual void handleExpansion(const int64_t ces[], int32_t length) = 0;
92     };
93 
ContractionsAndExpansions(UnicodeSet * con,UnicodeSet * exp,CESink * s,UBool prefixes)94     ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes)
95             : data(NULL),
96               contractions(con), expansions(exp),
97               sink(s),
98               addPrefixes(prefixes),
99               checkTailored(0),
100               suffix(NULL),
101               errorCode(U_ZERO_ERROR) {}
102 
103     void forData(const CollationData *d, UErrorCode &errorCode);
104     void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec);
105 
106     // all following: @internal, only public for access by callback
107 
108     void handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
109 
110     void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32);
111     void handleContractions(UChar32 start, UChar32 end, uint32_t ce32);
112 
113     void addExpansions(UChar32 start, UChar32 end);
114     void addStrings(UChar32 start, UChar32 end, UnicodeSet *set);
115 
116     /** Prefixes are reversed in the data structure. */
setPrefix(const UnicodeString & pfx)117     void setPrefix(const UnicodeString &pfx) {
118         unreversedPrefix = pfx;
119         unreversedPrefix.reverse();
120     }
resetPrefix()121     void resetPrefix() {
122         unreversedPrefix.remove();
123     }
124 
125     const CollationData *data;
126     UnicodeSet *contractions;
127     UnicodeSet *expansions;
128     CESink *sink;
129     UBool addPrefixes;
130     int8_t checkTailored;  // -1: collected tailored  +1: exclude tailored
131     UnicodeSet tailored;
132     UnicodeSet ranges;
133     UnicodeString unreversedPrefix;
134     const UnicodeString *suffix;
135     int64_t ces[Collation::MAX_EXPANSION_LENGTH];
136     UErrorCode errorCode;
137 };
138 
139 U_NAMESPACE_END
140 
141 #endif  // !UCONFIG_NO_COLLATION
142 #endif  // __COLLATIONSETS_H__
143