• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *   Copyright (C) 2010-2011, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  ucharstrieiterator.h
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2010nov15
12 *   created by: Markus W. Scherer
13 */
14 
15 #include "unicode/utypes.h"
16 #include "unicode/ucharstrie.h"
17 #include "unicode/unistr.h"
18 #include "uvectr32.h"
19 
20 U_NAMESPACE_BEGIN
21 
Iterator(const UChar * trieUChars,int32_t maxStringLength,UErrorCode & errorCode)22 UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength,
23                                UErrorCode &errorCode)
24         : uchars_(trieUChars),
25           pos_(uchars_), initialPos_(uchars_),
26           remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
27           skipValue_(FALSE),
28           maxLength_(maxStringLength), value_(0), stack_(NULL) {
29     if(U_FAILURE(errorCode)) {
30         return;
31     }
32     // stack_ is a pointer so that it's easy to turn ucharstrie.h into
33     // a public API header for which we would want it to depend only on
34     // other public headers.
35     // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
36     // via the UnicodeString and UVector32 implementations, so this additional
37     // cost is minimal.
38     stack_=new UVector32(errorCode);
39     if(stack_==NULL) {
40         errorCode=U_MEMORY_ALLOCATION_ERROR;
41     }
42 }
43 
Iterator(const UCharsTrie & trie,int32_t maxStringLength,UErrorCode & errorCode)44 UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
45                                UErrorCode &errorCode)
46         : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
47           remainingMatchLength_(trie.remainingMatchLength_),
48           initialRemainingMatchLength_(trie.remainingMatchLength_),
49           skipValue_(FALSE),
50           maxLength_(maxStringLength), value_(0), stack_(NULL) {
51     if(U_FAILURE(errorCode)) {
52         return;
53     }
54     stack_=new UVector32(errorCode);
55     if(U_FAILURE(errorCode)) {
56         return;
57     }
58     if(stack_==NULL) {
59         errorCode=U_MEMORY_ALLOCATION_ERROR;
60         return;
61     }
62     int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
63     if(length>=0) {
64         // Pending linear-match node, append remaining UChars to str_.
65         ++length;
66         if(maxLength_>0 && length>maxLength_) {
67             length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
68         }
69         str_.append(pos_, length);
70         pos_+=length;
71         remainingMatchLength_-=length;
72     }
73 }
74 
~Iterator()75 UCharsTrie::Iterator::~Iterator() {
76     delete stack_;
77 }
78 
79 UCharsTrie::Iterator &
reset()80 UCharsTrie::Iterator::reset() {
81     pos_=initialPos_;
82     remainingMatchLength_=initialRemainingMatchLength_;
83     skipValue_=FALSE;
84     int32_t length=remainingMatchLength_+1;  // Remaining match length.
85     if(maxLength_>0 && length>maxLength_) {
86         length=maxLength_;
87     }
88     str_.truncate(length);
89     pos_+=length;
90     remainingMatchLength_-=length;
91     stack_->setSize(0);
92     return *this;
93 }
94 
95 UBool
hasNext() const96 UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
97 
98 UBool
next(UErrorCode & errorCode)99 UCharsTrie::Iterator::next(UErrorCode &errorCode) {
100     if(U_FAILURE(errorCode)) {
101         return FALSE;
102     }
103     const UChar *pos=pos_;
104     if(pos==NULL) {
105         if(stack_->isEmpty()) {
106             return FALSE;
107         }
108         // Pop the state off the stack and continue with the next outbound edge of
109         // the branch node.
110         int32_t stackSize=stack_->size();
111         int32_t length=stack_->elementAti(stackSize-1);
112         pos=uchars_+stack_->elementAti(stackSize-2);
113         stack_->setSize(stackSize-2);
114         str_.truncate(length&0xffff);
115         length=(int32_t)((uint32_t)length>>16);
116         if(length>1) {
117             pos=branchNext(pos, length, errorCode);
118             if(pos==NULL) {
119                 return TRUE;  // Reached a final value.
120             }
121         } else {
122             str_.append(*pos++);
123         }
124     }
125     if(remainingMatchLength_>=0) {
126         // We only get here if we started in a pending linear-match node
127         // with more than maxLength remaining units.
128         return truncateAndStop();
129     }
130     for(;;) {
131         int32_t node=*pos++;
132         if(node>=kMinValueLead) {
133             if(skipValue_) {
134                 pos=skipNodeValue(pos, node);
135                 node&=kNodeTypeMask;
136                 skipValue_=FALSE;
137             } else {
138                 // Deliver value for the string so far.
139                 UBool isFinal=(UBool)(node>>15);
140                 if(isFinal) {
141                     value_=readValue(pos, node&0x7fff);
142                 } else {
143                     value_=readNodeValue(pos, node);
144                 }
145                 if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
146                     pos_=NULL;
147                 } else {
148                     // We cannot skip the value right here because it shares its
149                     // lead unit with a match node which we have to evaluate
150                     // next time.
151                     // Instead, keep pos_ on the node lead unit itself.
152                     pos_=pos-1;
153                     skipValue_=TRUE;
154                 }
155                 return TRUE;
156             }
157         }
158         if(maxLength_>0 && str_.length()==maxLength_) {
159             return truncateAndStop();
160         }
161         if(node<kMinLinearMatch) {
162             if(node==0) {
163                 node=*pos++;
164             }
165             pos=branchNext(pos, node+1, errorCode);
166             if(pos==NULL) {
167                 return TRUE;  // Reached a final value.
168             }
169         } else {
170             // Linear-match node, append length units to str_.
171             int32_t length=node-kMinLinearMatch+1;
172             if(maxLength_>0 && str_.length()+length>maxLength_) {
173                 str_.append(pos, maxLength_-str_.length());
174                 return truncateAndStop();
175             }
176             str_.append(pos, length);
177             pos+=length;
178         }
179     }
180 }
181 
182 // Branch node, needs to take the first outbound edge and push state for the rest.
183 const UChar *
branchNext(const UChar * pos,int32_t length,UErrorCode & errorCode)184 UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
185     while(length>kMaxBranchLinearSubNodeLength) {
186         ++pos;  // ignore the comparison unit
187         // Push state for the greater-or-equal edge.
188         stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
189         stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
190         // Follow the less-than edge.
191         length>>=1;
192         pos=jumpByDelta(pos);
193     }
194     // List of key-value pairs where values are either final values or jump deltas.
195     // Read the first (key, value) pair.
196     UChar trieUnit=*pos++;
197     int32_t node=*pos++;
198     UBool isFinal=(UBool)(node>>15);
199     int32_t value=readValue(pos, node&=0x7fff);
200     pos=skipValue(pos, node);
201     stack_->addElement((int32_t)(pos-uchars_), errorCode);
202     stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
203     str_.append(trieUnit);
204     if(isFinal) {
205         pos_=NULL;
206         value_=value;
207         return NULL;
208     } else {
209         return pos+value;
210     }
211 }
212 
213 U_NAMESPACE_END
214