1 /* 2 ******************************************************************************* 3 * Copyright (C) 2012-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * collationkeys.h 7 * 8 * created on: 2012sep02 9 * created by: Markus W. Scherer 10 */ 11 12 #ifndef __COLLATIONKEYS_H__ 13 #define __COLLATIONKEYS_H__ 14 15 #include "unicode/utypes.h" 16 17 #if !UCONFIG_NO_COLLATION 18 19 #include "unicode/bytestream.h" 20 #include "unicode/ucol.h" 21 #include "charstr.h" 22 #include "collation.h" 23 24 U_NAMESPACE_BEGIN 25 26 class CollationIterator; 27 struct CollationDataReader; 28 struct CollationSettings; 29 30 class SortKeyByteSink : public ByteSink { 31 public: SortKeyByteSink(char * dest,int32_t destCapacity)32 SortKeyByteSink(char *dest, int32_t destCapacity) 33 : buffer_(dest), capacity_(destCapacity), 34 appended_(0), ignore_(0) {} 35 virtual ~SortKeyByteSink(); 36 IgnoreBytes(int32_t numIgnore)37 void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; } 38 39 virtual void Append(const char *bytes, int32_t n); Append(uint32_t b)40 void Append(uint32_t b) { 41 if (ignore_ > 0) { 42 --ignore_; 43 } else { 44 if (appended_ < capacity_ || Resize(1, appended_)) { 45 buffer_[appended_] = (char)b; 46 } 47 ++appended_; 48 } 49 } 50 virtual char *GetAppendBuffer(int32_t min_capacity, 51 int32_t desired_capacity_hint, 52 char *scratch, int32_t scratch_capacity, 53 int32_t *result_capacity); NumberOfBytesAppended()54 int32_t NumberOfBytesAppended() const { return appended_; } 55 56 /** 57 * @return how many bytes can be appended (including ignored ones) 58 * without reallocation 59 */ GetRemainingCapacity()60 int32_t GetRemainingCapacity() const { 61 // Either ignore_ or appended_ should be 0. 62 return ignore_ + capacity_ - appended_; 63 } 64 Overflowed()65 UBool Overflowed() const { return appended_ > capacity_; } 66 /** @return FALSE if memory allocation failed */ IsOk()67 UBool IsOk() const { return buffer_ != NULL; } 68 69 protected: 70 virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0; 71 virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0; 72 SetNotOk()73 void SetNotOk() { 74 buffer_ = NULL; 75 capacity_ = 0; 76 } 77 78 char *buffer_; 79 int32_t capacity_; 80 int32_t appended_; 81 int32_t ignore_; 82 83 private: 84 SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented 85 SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented 86 }; 87 88 class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ { 89 public: 90 class LevelCallback : public UMemory { 91 public: 92 virtual ~LevelCallback(); 93 /** 94 * @param level The next level about to be written to the ByteSink. 95 * @return TRUE if the level is to be written 96 * (the base class implementation always returns TRUE) 97 */ 98 virtual UBool needToWrite(Collation::Level level); 99 }; 100 101 /** 102 * Writes the sort key bytes for minLevel up to the iterator data's strength. 103 * Optionally writes the case level. 104 * Stops writing levels when callback.needToWrite(level) returns FALSE. 105 * Separates levels with the LEVEL_SEPARATOR_BYTE 106 * but does not write a TERMINATOR_BYTE. 107 */ 108 static void writeSortKeyUpToQuaternary(CollationIterator &iter, 109 const UBool *compressibleBytes, 110 const CollationSettings &settings, 111 SortKeyByteSink &sink, 112 Collation::Level minLevel, LevelCallback &callback, 113 UBool preflight, UErrorCode &errorCode); 114 private: 115 friend struct CollationDataReader; 116 117 CollationKeys(); // no instantiation 118 119 // Secondary level: Compress up to 33 common weights as 05..25 or 25..45. 120 static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE; 121 static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20; 122 static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40; 123 static const int32_t SEC_COMMON_MAX_COUNT = 0x21; 124 125 // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13. 126 static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1; 127 static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7; 128 static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13; 129 static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7; 130 131 // Case level, upperFirst: Compress up to 13 common weights as 3..15. 132 static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3; 133 static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15; 134 static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13; 135 136 // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5. 137 static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE; 138 static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60; 139 static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0; 140 static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61; 141 142 // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45. 143 static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE; 144 static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20; 145 static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40; 146 static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21; 147 148 // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5. 149 static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80; 150 static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20; 151 static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40; 152 static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21; 153 154 // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC. 155 static const uint32_t QUAT_COMMON_LOW = 0x1c; 156 static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70; 157 static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0; 158 static const int32_t QUAT_COMMON_MAX_COUNT = 0x71; 159 // Primary weights shifted to quaternary level must be encoded with 160 // a lead byte below the common-weight compression range. 161 static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b 162 }; 163 164 U_NAMESPACE_END 165 166 #endif // !UCONFIG_NO_COLLATION 167 #endif // __COLLATIONKEYS_H__ 168