• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2013-2014, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * collationsets.h
9 *
10 * created on: 2013feb09
11 * created by: Markus W. Scherer
12 */
13 
14 #ifndef __COLLATIONSETS_H__
15 #define __COLLATIONSETS_H__
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_COLLATION
20 
21 #include "unicode/uniset.h"
22 #include "collation.h"
23 
24 U_NAMESPACE_BEGIN
25 
26 struct CollationData;
27 
28 /**
29  * Finds the set of characters and strings that sort differently in the tailoring
30  * from the base data.
31  *
32  * Every mapping in the tailoring needs to be compared to the base,
33  * because some mappings are copied for optimization, and
34  * all contractions for a character are copied if any contractions for that character
35  * are added, modified or removed.
36  *
37  * It might be simpler to re-parse the rule string, but:
38  * - That would require duplicating some of the from-rules builder code.
39  * - That would make the runtime code depend on the builder.
40  * - That would only work if we have the rule string, and we allow users to
41  *   omit the rule string from data files.
42  */
43 class TailoredSet : public UMemory {
44 public:
TailoredSet(UnicodeSet * t)45     TailoredSet(UnicodeSet *t)
46             : data(NULL), baseData(NULL),
47               tailored(t),
48               suffix(NULL),
49               errorCode(U_ZERO_ERROR) {}
50 
51     void forData(const CollationData *d, UErrorCode &errorCode);
52 
53     /**
54      * @return U_SUCCESS(errorCode) in C++, void in Java
55      * @internal only public for access by callback
56      */
57     UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
58 
59 private:
60     void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32);
61     void comparePrefixes(UChar32 c, const UChar *p, const UChar *q);
62     void compareContractions(UChar32 c, const UChar *p, const UChar *q);
63 
64     void addPrefixes(const CollationData *d, UChar32 c, const UChar *p);
65     void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32);
66     void addContractions(UChar32 c, const UChar *p);
67     void addSuffix(UChar32 c, const UnicodeString &sfx);
68     void add(UChar32 c);
69 
70     /** Prefixes are reversed in the data structure. */
setPrefix(const UnicodeString & pfx)71     void setPrefix(const UnicodeString &pfx) {
72         unreversedPrefix = pfx;
73         unreversedPrefix.reverse();
74     }
resetPrefix()75     void resetPrefix() {
76         unreversedPrefix.remove();
77     }
78 
79     const CollationData *data;
80     const CollationData *baseData;
81     UnicodeSet *tailored;
82     UnicodeString unreversedPrefix;
83     const UnicodeString *suffix;
84     UErrorCode errorCode;
85 };
86 
87 class ContractionsAndExpansions : public UMemory {
88 public:
89     class CESink : public UMemory {
90     public:
91         virtual ~CESink();
92         virtual void handleCE(int64_t ce) = 0;
93         virtual void handleExpansion(const int64_t ces[], int32_t length) = 0;
94     };
95 
ContractionsAndExpansions(UnicodeSet * con,UnicodeSet * exp,CESink * s,UBool prefixes)96     ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes)
97             : data(NULL),
98               contractions(con), expansions(exp),
99               sink(s),
100               addPrefixes(prefixes),
101               checkTailored(0),
102               suffix(NULL),
103               errorCode(U_ZERO_ERROR) {}
104 
105     void forData(const CollationData *d, UErrorCode &errorCode);
106     void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec);
107 
108     // all following: @internal, only public for access by callback
109 
110     void handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
111 
112     void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32);
113     void handleContractions(UChar32 start, UChar32 end, uint32_t ce32);
114 
115     void addExpansions(UChar32 start, UChar32 end);
116     void addStrings(UChar32 start, UChar32 end, UnicodeSet *set);
117 
118     /** Prefixes are reversed in the data structure. */
setPrefix(const UnicodeString & pfx)119     void setPrefix(const UnicodeString &pfx) {
120         unreversedPrefix = pfx;
121         unreversedPrefix.reverse();
122     }
resetPrefix()123     void resetPrefix() {
124         unreversedPrefix.remove();
125     }
126 
127     const CollationData *data;
128     UnicodeSet *contractions;
129     UnicodeSet *expansions;
130     CESink *sink;
131     UBool addPrefixes;
132     int8_t checkTailored;  // -1: collected tailored  +1: exclude tailored
133     UnicodeSet tailored;
134     UnicodeSet ranges;
135     UnicodeString unreversedPrefix;
136     const UnicodeString *suffix;
137     int64_t ces[Collation::MAX_EXPANSION_LENGTH];
138     UErrorCode errorCode;
139 };
140 
141 U_NAMESPACE_END
142 
143 #endif  // !UCONFIG_NO_COLLATION
144 #endif  // __COLLATIONSETS_H__
145