• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *   Copyright (C) 2010-2012, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  ucharstriebuilder.h
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2010nov14
12 *   created by: Markus W. Scherer
13 */
14 
15 #ifndef __UCHARSTRIEBUILDER_H__
16 #define __UCHARSTRIEBUILDER_H__
17 
18 #include "unicode/utypes.h"
19 #include "unicode/stringtriebuilder.h"
20 #include "unicode/ucharstrie.h"
21 #include "unicode/unistr.h"
22 
23 /**
24  * \file
25  * \brief C++ API: Builder for icu::UCharsTrie
26  */
27 
28 U_NAMESPACE_BEGIN
29 
30 class UCharsTrieElement;
31 
32 /**
33  * Builder class for UCharsTrie.
34  *
35  * This class is not intended for public subclassing.
36  * @stable ICU 4.8
37  */
38 class U_COMMON_API UCharsTrieBuilder : public StringTrieBuilder {
39 public:
40     /**
41      * Constructs an empty builder.
42      * @param errorCode Standard ICU error code.
43      * @stable ICU 4.8
44      */
45     UCharsTrieBuilder(UErrorCode &errorCode);
46 
47     /**
48      * Destructor.
49      * @stable ICU 4.8
50      */
51     virtual ~UCharsTrieBuilder();
52 
53     /**
54      * Adds a (string, value) pair.
55      * The string must be unique.
56      * The string contents will be copied; the builder does not keep
57      * a reference to the input UnicodeString or its buffer.
58      * @param s The input string.
59      * @param value The value associated with this string.
60      * @param errorCode Standard ICU error code. Its input value must
61      *                  pass the U_SUCCESS() test, or else the function returns
62      *                  immediately. Check for U_FAILURE() on output or use with
63      *                  function chaining. (See User Guide for details.)
64      * @return *this
65      * @stable ICU 4.8
66      */
67     UCharsTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode);
68 
69     /**
70      * Builds a UCharsTrie for the add()ed data.
71      * Once built, no further data can be add()ed until clear() is called.
72      *
73      * This method passes ownership of the builder's internal result array to the new trie object.
74      * Another call to any build() variant will re-serialize the trie.
75      * After clear() has been called, a new array will be used as well.
76      * @param buildOption Build option, see UStringTrieBuildOption.
77      * @param errorCode Standard ICU error code. Its input value must
78      *                  pass the U_SUCCESS() test, or else the function returns
79      *                  immediately. Check for U_FAILURE() on output or use with
80      *                  function chaining. (See User Guide for details.)
81      * @return A new UCharsTrie for the add()ed data.
82      * @stable ICU 4.8
83      */
84     UCharsTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
85 
86     /**
87      * Builds a UCharsTrie for the add()ed data and UChar-serializes it.
88      * Once built, no further data can be add()ed until clear() is called.
89      *
90      * Multiple calls to buildUnicodeString() set the UnicodeStrings to the
91      * builder's same UChar array, without rebuilding.
92      * If buildUnicodeString() is called after build(), the trie will be
93      * re-serialized into a new array.
94      * If build() is called after buildUnicodeString(), the trie object will become
95      * the owner of the previously returned array.
96      * After clear() has been called, a new array will be used as well.
97      * @param buildOption Build option, see UStringTrieBuildOption.
98      * @param result A UnicodeString which will be set to the UChar-serialized
99      *               UCharsTrie for the add()ed data.
100      * @param errorCode Standard ICU error code. Its input value must
101      *                  pass the U_SUCCESS() test, or else the function returns
102      *                  immediately. Check for U_FAILURE() on output or use with
103      *                  function chaining. (See User Guide for details.)
104      * @return result
105      * @stable ICU 4.8
106      */
107     UnicodeString &buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result,
108                                       UErrorCode &errorCode);
109 
110     /**
111      * Removes all (string, value) pairs.
112      * New data can then be add()ed and a new trie can be built.
113      * @return *this
114      * @stable ICU 4.8
115      */
clear()116     UCharsTrieBuilder &clear() {
117         strings.remove();
118         elementsLength=0;
119         ucharsLength=0;
120         return *this;
121     }
122 
123 private:
124     UCharsTrieBuilder(const UCharsTrieBuilder &other);  // no copy constructor
125     UCharsTrieBuilder &operator=(const UCharsTrieBuilder &other);  // no assignment operator
126 
127     void buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
128 
129     virtual int32_t getElementStringLength(int32_t i) const;
130     virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const;
131     virtual int32_t getElementValue(int32_t i) const;
132 
133     virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const;
134 
135     virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const;
136     virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const;
137     virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const;
138 
matchNodesCanHaveValues()139     virtual UBool matchNodesCanHaveValues() const { return TRUE; }
140 
getMaxBranchLinearSubNodeLength()141     virtual int32_t getMaxBranchLinearSubNodeLength() const { return UCharsTrie::kMaxBranchLinearSubNodeLength; }
getMinLinearMatch()142     virtual int32_t getMinLinearMatch() const { return UCharsTrie::kMinLinearMatch; }
getMaxLinearMatchLength()143     virtual int32_t getMaxLinearMatchLength() const { return UCharsTrie::kMaxLinearMatchLength; }
144 
145 #ifndef U_HIDE_INTERNAL_API
146     class UCTLinearMatchNode : public LinearMatchNode {
147     public:
148         UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode);
149         virtual UBool operator==(const Node &other) const;
150         virtual void write(StringTrieBuilder &builder);
151     private:
152         const UChar *s;
153     };
154 #endif
155 
156     virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
157                                         Node *nextNode) const;
158 
159     UBool ensureCapacity(int32_t length);
160     virtual int32_t write(int32_t unit);
161     int32_t write(const UChar *s, int32_t length);
162     virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length);
163     virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
164     virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
165     virtual int32_t writeDeltaTo(int32_t jumpTarget);
166 
167     UnicodeString strings;
168     UCharsTrieElement *elements;
169     int32_t elementsCapacity;
170     int32_t elementsLength;
171 
172     // UChar serialization of the trie.
173     // Grows from the back: ucharsLength measures from the end of the buffer!
174     UChar *uchars;
175     int32_t ucharsCapacity;
176     int32_t ucharsLength;
177 };
178 
179 U_NAMESPACE_END
180 
181 #endif  // __UCHARSTRIEBUILDER_H__
182