1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2012-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * collationkeys.h 9 * 10 * created on: 2012sep02 11 * created by: Markus W. Scherer 12 */ 13 14 #ifndef __COLLATIONKEYS_H__ 15 #define __COLLATIONKEYS_H__ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_COLLATION 20 21 #include "unicode/bytestream.h" 22 #include "unicode/ucol.h" 23 #include "charstr.h" 24 #include "collation.h" 25 26 U_NAMESPACE_BEGIN 27 28 class CollationIterator; 29 struct CollationDataReader; 30 struct CollationSettings; 31 32 class SortKeyByteSink : public ByteSink { 33 public: SortKeyByteSink(char * dest,int32_t destCapacity)34 SortKeyByteSink(char *dest, int32_t destCapacity) 35 : buffer_(dest), capacity_(destCapacity), 36 appended_(0), ignore_(0) {} 37 virtual ~SortKeyByteSink(); 38 IgnoreBytes(int32_t numIgnore)39 void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; } 40 41 virtual void Append(const char *bytes, int32_t n); Append(uint32_t b)42 void Append(uint32_t b) { 43 if (ignore_ > 0) { 44 --ignore_; 45 } else { 46 if (appended_ < capacity_ || Resize(1, appended_)) { 47 buffer_[appended_] = (char)b; 48 } 49 ++appended_; 50 } 51 } 52 virtual char *GetAppendBuffer(int32_t min_capacity, 53 int32_t desired_capacity_hint, 54 char *scratch, int32_t scratch_capacity, 55 int32_t *result_capacity); NumberOfBytesAppended()56 int32_t NumberOfBytesAppended() const { return appended_; } 57 58 /** 59 * @return how many bytes can be appended (including ignored ones) 60 * without reallocation 61 */ GetRemainingCapacity()62 int32_t GetRemainingCapacity() const { 63 // Either ignore_ or appended_ should be 0. 64 return ignore_ + capacity_ - appended_; 65 } 66 Overflowed()67 UBool Overflowed() const { return appended_ > capacity_; } 68 /** @return FALSE if memory allocation failed */ IsOk()69 UBool IsOk() const { return buffer_ != NULL; } 70 71 protected: 72 virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0; 73 virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0; 74 SetNotOk()75 void SetNotOk() { 76 buffer_ = NULL; 77 capacity_ = 0; 78 } 79 80 char *buffer_; 81 int32_t capacity_; 82 int32_t appended_; 83 int32_t ignore_; 84 85 private: 86 SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented 87 SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented 88 }; 89 90 class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ { 91 public: 92 class LevelCallback : public UMemory { 93 public: 94 virtual ~LevelCallback(); 95 /** 96 * @param level The next level about to be written to the ByteSink. 97 * @return TRUE if the level is to be written 98 * (the base class implementation always returns TRUE) 99 */ 100 virtual UBool needToWrite(Collation::Level level); 101 }; 102 103 /** 104 * Writes the sort key bytes for minLevel up to the iterator data's strength. 105 * Optionally writes the case level. 106 * Stops writing levels when callback.needToWrite(level) returns FALSE. 107 * Separates levels with the LEVEL_SEPARATOR_BYTE 108 * but does not write a TERMINATOR_BYTE. 109 */ 110 static void writeSortKeyUpToQuaternary(CollationIterator &iter, 111 const UBool *compressibleBytes, 112 const CollationSettings &settings, 113 SortKeyByteSink &sink, 114 Collation::Level minLevel, LevelCallback &callback, 115 UBool preflight, UErrorCode &errorCode); 116 private: 117 friend struct CollationDataReader; 118 119 CollationKeys(); // no instantiation 120 121 // Secondary level: Compress up to 33 common weights as 05..25 or 25..45. 122 static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE; 123 static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20; 124 static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40; 125 static const int32_t SEC_COMMON_MAX_COUNT = 0x21; 126 127 // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13. 128 static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1; 129 static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7; 130 static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13; 131 static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7; 132 133 // Case level, upperFirst: Compress up to 13 common weights as 3..15. 134 static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3; 135 static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15; 136 static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13; 137 138 // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5. 139 static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE; 140 static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60; 141 static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0; 142 static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61; 143 144 // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45. 145 static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE; 146 static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20; 147 static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40; 148 static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21; 149 150 // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5. 151 static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80; 152 static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20; 153 static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40; 154 static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21; 155 156 // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC. 157 static const uint32_t QUAT_COMMON_LOW = 0x1c; 158 static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70; 159 static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0; 160 static const int32_t QUAT_COMMON_MAX_COUNT = 0x71; 161 // Primary weights shifted to quaternary level must be encoded with 162 // a lead byte below the common-weight compression range. 163 static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b 164 }; 165 166 U_NAMESPACE_END 167 168 #endif // !UCONFIG_NO_COLLATION 169 #endif // __COLLATIONKEYS_H__ 170