1 /*
2 *******************************************************************************
3 * Copyright (C) 2010-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: ucharstrieiterator.h
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2010nov15
12 * created by: Markus W. Scherer
13 */
14
15 #include "unicode/utypes.h"
16 #include "unicode/ucharstrie.h"
17 #include "unicode/unistr.h"
18 #include "uvectr32.h"
19
20 U_NAMESPACE_BEGIN
21
Iterator(const UChar * trieUChars,int32_t maxStringLength,UErrorCode & errorCode)22 UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength,
23 UErrorCode &errorCode)
24 : uchars_(trieUChars),
25 pos_(uchars_), initialPos_(uchars_),
26 remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
27 skipValue_(FALSE),
28 maxLength_(maxStringLength), value_(0), stack_(NULL) {
29 if(U_FAILURE(errorCode)) {
30 return;
31 }
32 // stack_ is a pointer so that it's easy to turn ucharstrie.h into
33 // a public API header for which we would want it to depend only on
34 // other public headers.
35 // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
36 // via the UnicodeString and UVector32 implementations, so this additional
37 // cost is minimal.
38 stack_=new UVector32(errorCode);
39 if(stack_==NULL) {
40 errorCode=U_MEMORY_ALLOCATION_ERROR;
41 }
42 }
43
Iterator(const UCharsTrie & trie,int32_t maxStringLength,UErrorCode & errorCode)44 UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
45 UErrorCode &errorCode)
46 : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
47 remainingMatchLength_(trie.remainingMatchLength_),
48 initialRemainingMatchLength_(trie.remainingMatchLength_),
49 skipValue_(FALSE),
50 maxLength_(maxStringLength), value_(0), stack_(NULL) {
51 if(U_FAILURE(errorCode)) {
52 return;
53 }
54 stack_=new UVector32(errorCode);
55 if(U_FAILURE(errorCode)) {
56 return;
57 }
58 if(stack_==NULL) {
59 errorCode=U_MEMORY_ALLOCATION_ERROR;
60 return;
61 }
62 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
63 if(length>=0) {
64 // Pending linear-match node, append remaining UChars to str_.
65 ++length;
66 if(maxLength_>0 && length>maxLength_) {
67 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
68 }
69 str_.append(pos_, length);
70 pos_+=length;
71 remainingMatchLength_-=length;
72 }
73 }
74
~Iterator()75 UCharsTrie::Iterator::~Iterator() {
76 delete stack_;
77 }
78
79 UCharsTrie::Iterator &
reset()80 UCharsTrie::Iterator::reset() {
81 pos_=initialPos_;
82 remainingMatchLength_=initialRemainingMatchLength_;
83 skipValue_=FALSE;
84 int32_t length=remainingMatchLength_+1; // Remaining match length.
85 if(maxLength_>0 && length>maxLength_) {
86 length=maxLength_;
87 }
88 str_.truncate(length);
89 pos_+=length;
90 remainingMatchLength_-=length;
91 stack_->setSize(0);
92 return *this;
93 }
94
95 UBool
hasNext() const96 UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
97
98 UBool
next(UErrorCode & errorCode)99 UCharsTrie::Iterator::next(UErrorCode &errorCode) {
100 if(U_FAILURE(errorCode)) {
101 return FALSE;
102 }
103 const UChar *pos=pos_;
104 if(pos==NULL) {
105 if(stack_->isEmpty()) {
106 return FALSE;
107 }
108 // Pop the state off the stack and continue with the next outbound edge of
109 // the branch node.
110 int32_t stackSize=stack_->size();
111 int32_t length=stack_->elementAti(stackSize-1);
112 pos=uchars_+stack_->elementAti(stackSize-2);
113 stack_->setSize(stackSize-2);
114 str_.truncate(length&0xffff);
115 length=(int32_t)((uint32_t)length>>16);
116 if(length>1) {
117 pos=branchNext(pos, length, errorCode);
118 if(pos==NULL) {
119 return TRUE; // Reached a final value.
120 }
121 } else {
122 str_.append(*pos++);
123 }
124 }
125 if(remainingMatchLength_>=0) {
126 // We only get here if we started in a pending linear-match node
127 // with more than maxLength remaining units.
128 return truncateAndStop();
129 }
130 for(;;) {
131 int32_t node=*pos++;
132 if(node>=kMinValueLead) {
133 if(skipValue_) {
134 pos=skipNodeValue(pos, node);
135 node&=kNodeTypeMask;
136 skipValue_=FALSE;
137 } else {
138 // Deliver value for the string so far.
139 UBool isFinal=(UBool)(node>>15);
140 if(isFinal) {
141 value_=readValue(pos, node&0x7fff);
142 } else {
143 value_=readNodeValue(pos, node);
144 }
145 if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
146 pos_=NULL;
147 } else {
148 // We cannot skip the value right here because it shares its
149 // lead unit with a match node which we have to evaluate
150 // next time.
151 // Instead, keep pos_ on the node lead unit itself.
152 pos_=pos-1;
153 skipValue_=TRUE;
154 }
155 return TRUE;
156 }
157 }
158 if(maxLength_>0 && str_.length()==maxLength_) {
159 return truncateAndStop();
160 }
161 if(node<kMinLinearMatch) {
162 if(node==0) {
163 node=*pos++;
164 }
165 pos=branchNext(pos, node+1, errorCode);
166 if(pos==NULL) {
167 return TRUE; // Reached a final value.
168 }
169 } else {
170 // Linear-match node, append length units to str_.
171 int32_t length=node-kMinLinearMatch+1;
172 if(maxLength_>0 && str_.length()+length>maxLength_) {
173 str_.append(pos, maxLength_-str_.length());
174 return truncateAndStop();
175 }
176 str_.append(pos, length);
177 pos+=length;
178 }
179 }
180 }
181
182 // Branch node, needs to take the first outbound edge and push state for the rest.
183 const UChar *
branchNext(const UChar * pos,int32_t length,UErrorCode & errorCode)184 UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
185 while(length>kMaxBranchLinearSubNodeLength) {
186 ++pos; // ignore the comparison unit
187 // Push state for the greater-or-equal edge.
188 stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
189 stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
190 // Follow the less-than edge.
191 length>>=1;
192 pos=jumpByDelta(pos);
193 }
194 // List of key-value pairs where values are either final values or jump deltas.
195 // Read the first (key, value) pair.
196 UChar trieUnit=*pos++;
197 int32_t node=*pos++;
198 UBool isFinal=(UBool)(node>>15);
199 int32_t value=readValue(pos, node&=0x7fff);
200 pos=skipValue(pos, node);
201 stack_->addElement((int32_t)(pos-uchars_), errorCode);
202 stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
203 str_.append(trieUnit);
204 if(isFinal) {
205 pos_=NULL;
206 value_=value;
207 return NULL;
208 } else {
209 return pos+value;
210 }
211 }
212
213 U_NAMESPACE_END
214