1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: ucharstriebuilder.h 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2010nov14 12 * created by: Markus W. Scherer 13 */ 14 15 #ifndef __UCHARSTRIEBUILDER_H__ 16 #define __UCHARSTRIEBUILDER_H__ 17 18 #include "unicode/utypes.h" 19 #include "unicode/stringtriebuilder.h" 20 #include "unicode/ucharstrie.h" 21 #include "unicode/unistr.h" 22 23 U_NAMESPACE_BEGIN 24 25 class UCharsTrieElement; 26 27 /** 28 * Builder class for UCharsTrie. 29 * 30 * This class is not intended for public subclassing. 31 * @draft ICU 4.8 32 */ 33 class U_COMMON_API UCharsTrieBuilder : public StringTrieBuilder { 34 public: 35 /** 36 * Constructs an empty builder. 37 * @param errorCode Standard ICU error code. 38 * @draft ICU 4.8 39 */ 40 UCharsTrieBuilder(UErrorCode &errorCode); 41 42 /** 43 * Destructor. 44 * @draft ICU 4.8 45 */ 46 virtual ~UCharsTrieBuilder(); 47 48 /** 49 * Adds a (string, value) pair. 50 * The string must be unique. 51 * The string contents will be copied; the builder does not keep 52 * a reference to the input UnicodeString or its buffer. 53 * @param s The input string. 54 * @param value The value associated with this string. 55 * @param errorCode Standard ICU error code. Its input value must 56 * pass the U_SUCCESS() test, or else the function returns 57 * immediately. Check for U_FAILURE() on output or use with 58 * function chaining. (See User Guide for details.) 59 * @return *this 60 * @draft ICU 4.8 61 */ 62 UCharsTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode); 63 64 /** 65 * Builds a UCharsTrie for the add()ed data. 66 * Once built, no further data can be add()ed until clear() is called. 67 * 68 * This method passes ownership of the builder's internal result array to the new trie object. 69 * Another call to any build() variant will re-serialize the trie. 70 * After clear() has been called, a new array will be used as well. 71 * @param buildOption Build option, see UStringTrieBuildOption. 72 * @param errorCode Standard ICU error code. Its input value must 73 * pass the U_SUCCESS() test, or else the function returns 74 * immediately. Check for U_FAILURE() on output or use with 75 * function chaining. (See User Guide for details.) 76 * @return A new UCharsTrie for the add()ed data. 77 * @draft ICU 4.8 78 */ 79 UCharsTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 80 81 /** 82 * Builds a UCharsTrie for the add()ed data and UChar-serializes it. 83 * Once built, no further data can be add()ed until clear() is called. 84 * 85 * Multiple calls to buildUnicodeString() set the UnicodeStrings to the 86 * builder's same UChar array, without rebuilding. 87 * If buildUnicodeString() is called after build(), the trie will be 88 * re-serialized into a new array. 89 * If build() is called after buildUnicodeString(), the trie object will become 90 * the owner of the previously returned array. 91 * After clear() has been called, a new array will be used as well. 92 * @param buildOption Build option, see UStringTrieBuildOption. 93 * @param result A UnicodeString which will be set to the UChar-serialized 94 * UCharsTrie for the add()ed data. 95 * @param errorCode Standard ICU error code. Its input value must 96 * pass the U_SUCCESS() test, or else the function returns 97 * immediately. Check for U_FAILURE() on output or use with 98 * function chaining. (See User Guide for details.) 99 * @return result 100 * @draft ICU 4.8 101 */ 102 UnicodeString &buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result, 103 UErrorCode &errorCode); 104 105 /** 106 * Removes all (string, value) pairs. 107 * New data can then be add()ed and a new trie can be built. 108 * @return *this 109 * @draft ICU 4.8 110 */ clear()111 UCharsTrieBuilder &clear() { 112 strings.remove(); 113 elementsLength=0; 114 ucharsLength=0; 115 return *this; 116 } 117 118 private: 119 UCharsTrieBuilder(const UCharsTrieBuilder &other); // no copy constructor 120 UCharsTrieBuilder &operator=(const UCharsTrieBuilder &other); // no assignment operator 121 122 void buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 123 124 virtual int32_t getElementStringLength(int32_t i) const; 125 virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const; 126 virtual int32_t getElementValue(int32_t i) const; 127 128 virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const; 129 130 virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const; 131 virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const; 132 virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const; 133 matchNodesCanHaveValues()134 virtual UBool matchNodesCanHaveValues() const { return TRUE; } 135 getMaxBranchLinearSubNodeLength()136 virtual int32_t getMaxBranchLinearSubNodeLength() const { return UCharsTrie::kMaxBranchLinearSubNodeLength; } getMinLinearMatch()137 virtual int32_t getMinLinearMatch() const { return UCharsTrie::kMinLinearMatch; } getMaxLinearMatchLength()138 virtual int32_t getMaxLinearMatchLength() const { return UCharsTrie::kMaxLinearMatchLength; } 139 140 class UCTLinearMatchNode : public LinearMatchNode { 141 public: 142 UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode); 143 virtual UBool operator==(const Node &other) const; 144 virtual void write(StringTrieBuilder &builder); 145 private: 146 const UChar *s; 147 }; 148 149 virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, 150 Node *nextNode) const; 151 152 UBool ensureCapacity(int32_t length); 153 virtual int32_t write(int32_t unit); 154 int32_t write(const UChar *s, int32_t length); 155 virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length); 156 virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); 157 virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); 158 virtual int32_t writeDeltaTo(int32_t jumpTarget); 159 160 UnicodeString strings; 161 UCharsTrieElement *elements; 162 int32_t elementsCapacity; 163 int32_t elementsLength; 164 165 // UChar serialization of the trie. 166 // Grows from the back: ucharsLength measures from the end of the buffer! 167 UChar *uchars; 168 int32_t ucharsCapacity; 169 int32_t ucharsLength; 170 }; 171 172 U_NAMESPACE_END 173 174 #endif // __UCHARSTRIEBUILDER_H__ 175