• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 2012-2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * collationkeys.h
7 *
8 * created on: 2012sep02
9 * created by: Markus W. Scherer
10 */
11 
12 #ifndef __COLLATIONKEYS_H__
13 #define __COLLATIONKEYS_H__
14 
15 #include "unicode/utypes.h"
16 
17 #if !UCONFIG_NO_COLLATION
18 
19 #include "unicode/bytestream.h"
20 #include "unicode/ucol.h"
21 #include "charstr.h"
22 #include "collation.h"
23 
24 U_NAMESPACE_BEGIN
25 
26 class CollationIterator;
27 struct CollationDataReader;
28 struct CollationSettings;
29 
30 class SortKeyByteSink : public ByteSink {
31 public:
SortKeyByteSink(char * dest,int32_t destCapacity)32     SortKeyByteSink(char *dest, int32_t destCapacity)
33             : buffer_(dest), capacity_(destCapacity),
34               appended_(0), ignore_(0) {}
35     virtual ~SortKeyByteSink();
36 
IgnoreBytes(int32_t numIgnore)37     void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; }
38 
39     virtual void Append(const char *bytes, int32_t n);
Append(uint32_t b)40     void Append(uint32_t b) {
41         if (ignore_ > 0) {
42             --ignore_;
43         } else {
44             if (appended_ < capacity_ || Resize(1, appended_)) {
45                 buffer_[appended_] = (char)b;
46             }
47             ++appended_;
48         }
49     }
50     virtual char *GetAppendBuffer(int32_t min_capacity,
51                                   int32_t desired_capacity_hint,
52                                   char *scratch, int32_t scratch_capacity,
53                                   int32_t *result_capacity);
NumberOfBytesAppended()54     int32_t NumberOfBytesAppended() const { return appended_; }
55 
56     /**
57      * @return how many bytes can be appended (including ignored ones)
58      *         without reallocation
59      */
GetRemainingCapacity()60     int32_t GetRemainingCapacity() const {
61         // Either ignore_ or appended_ should be 0.
62         return ignore_ + capacity_ - appended_;
63     }
64 
Overflowed()65     UBool Overflowed() const { return appended_ > capacity_; }
66     /** @return FALSE if memory allocation failed */
IsOk()67     UBool IsOk() const { return buffer_ != NULL; }
68 
69 protected:
70     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;
71     virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;
72 
SetNotOk()73     void SetNotOk() {
74         buffer_ = NULL;
75         capacity_ = 0;
76     }
77 
78     char *buffer_;
79     int32_t capacity_;
80     int32_t appended_;
81     int32_t ignore_;
82 
83 private:
84     SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
85     SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
86 };
87 
88 class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ {
89 public:
90     class LevelCallback : public UMemory {
91     public:
92         virtual ~LevelCallback();
93         /**
94          * @param level The next level about to be written to the ByteSink.
95          * @return TRUE if the level is to be written
96          *         (the base class implementation always returns TRUE)
97          */
98         virtual UBool needToWrite(Collation::Level level);
99     };
100 
101     /**
102      * Writes the sort key bytes for minLevel up to the iterator data's strength.
103      * Optionally writes the case level.
104      * Stops writing levels when callback.needToWrite(level) returns FALSE.
105      * Separates levels with the LEVEL_SEPARATOR_BYTE
106      * but does not write a TERMINATOR_BYTE.
107      */
108     static void writeSortKeyUpToQuaternary(CollationIterator &iter,
109                                            const UBool *compressibleBytes,
110                                            const CollationSettings &settings,
111                                            SortKeyByteSink &sink,
112                                            Collation::Level minLevel, LevelCallback &callback,
113                                            UBool preflight, UErrorCode &errorCode);
114 private:
115     friend struct CollationDataReader;
116 
117     CollationKeys();  // no instantiation
118 
119     // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
120     static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE;
121     static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
122     static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40;
123     static const int32_t SEC_COMMON_MAX_COUNT = 0x21;
124 
125     // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
126     static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1;
127     static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
128     static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13;
129     static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
130 
131     // Case level, upperFirst: Compress up to 13 common weights as 3..15.
132     static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3;
133     static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15;
134     static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
135 
136     // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
137     static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE;
138     static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
139     static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
140     static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61;
141 
142     // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
143     static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE;
144     static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
145     static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
146     static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
147 
148     // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
149     static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80;
150     static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
151     static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
152     static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
153 
154     // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
155     static const uint32_t QUAT_COMMON_LOW = 0x1c;
156     static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
157     static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
158     static const int32_t QUAT_COMMON_MAX_COUNT = 0x71;
159     // Primary weights shifted to quaternary level must be encoded with
160     // a lead byte below the common-weight compression range.
161     static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1;  // 0x1b
162 };
163 
164 U_NAMESPACE_END
165 
166 #endif  // !UCONFIG_NO_COLLATION
167 #endif  // __COLLATIONKEYS_H__
168