1 /*
2 *******************************************************************************
3 * Copyright (C) 2010-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: bytestrieiterator.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2010nov03
12 * created by: Markus W. Scherer
13 */
14
15 #include "unicode/utypes.h"
16 #include "unicode/bytestrie.h"
17 #include "unicode/stringpiece.h"
18 #include "charstr.h"
19 #include "uvectr32.h"
20
21 U_NAMESPACE_BEGIN
22
Iterator(const void * trieBytes,int32_t maxStringLength,UErrorCode & errorCode)23 BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
24 UErrorCode &errorCode)
25 : bytes_(reinterpret_cast<const uint8_t *>(trieBytes)),
26 pos_(bytes_), initialPos_(bytes_),
27 remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
28 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
29 if(U_FAILURE(errorCode)) {
30 return;
31 }
32 // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
33 // a public API header for which we would want it to depend only on
34 // other public headers.
35 // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
36 // via the CharString and UVector32 implementations, so this additional
37 // cost is minimal.
38 str_=new CharString();
39 stack_=new UVector32(errorCode);
40 if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
41 errorCode=U_MEMORY_ALLOCATION_ERROR;
42 }
43 }
44
Iterator(const BytesTrie & trie,int32_t maxStringLength,UErrorCode & errorCode)45 BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
46 UErrorCode &errorCode)
47 : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
48 remainingMatchLength_(trie.remainingMatchLength_),
49 initialRemainingMatchLength_(trie.remainingMatchLength_),
50 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
51 if(U_FAILURE(errorCode)) {
52 return;
53 }
54 str_=new CharString();
55 stack_=new UVector32(errorCode);
56 if(U_FAILURE(errorCode)) {
57 return;
58 }
59 if(str_==NULL || stack_==NULL) {
60 errorCode=U_MEMORY_ALLOCATION_ERROR;
61 return;
62 }
63 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
64 if(length>=0) {
65 // Pending linear-match node, append remaining bytes to str_.
66 ++length;
67 if(maxLength_>0 && length>maxLength_) {
68 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
69 }
70 str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
71 pos_+=length;
72 remainingMatchLength_-=length;
73 }
74 }
75
~Iterator()76 BytesTrie::Iterator::~Iterator() {
77 delete str_;
78 delete stack_;
79 }
80
81 BytesTrie::Iterator &
reset()82 BytesTrie::Iterator::reset() {
83 pos_=initialPos_;
84 remainingMatchLength_=initialRemainingMatchLength_;
85 int32_t length=remainingMatchLength_+1; // Remaining match length.
86 if(maxLength_>0 && length>maxLength_) {
87 length=maxLength_;
88 }
89 str_->truncate(length);
90 pos_+=length;
91 remainingMatchLength_-=length;
92 stack_->setSize(0);
93 return *this;
94 }
95
96 UBool
hasNext() const97 BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
98
99 UBool
next(UErrorCode & errorCode)100 BytesTrie::Iterator::next(UErrorCode &errorCode) {
101 if(U_FAILURE(errorCode)) {
102 return FALSE;
103 }
104 const uint8_t *pos=pos_;
105 if(pos==NULL) {
106 if(stack_->isEmpty()) {
107 return FALSE;
108 }
109 // Pop the state off the stack and continue with the next outbound edge of
110 // the branch node.
111 int32_t stackSize=stack_->size();
112 int32_t length=stack_->elementAti(stackSize-1);
113 pos=bytes_+stack_->elementAti(stackSize-2);
114 stack_->setSize(stackSize-2);
115 str_->truncate(length&0xffff);
116 length=(int32_t)((uint32_t)length>>16);
117 if(length>1) {
118 pos=branchNext(pos, length, errorCode);
119 if(pos==NULL) {
120 return TRUE; // Reached a final value.
121 }
122 } else {
123 str_->append((char)*pos++, errorCode);
124 }
125 }
126 if(remainingMatchLength_>=0) {
127 // We only get here if we started in a pending linear-match node
128 // with more than maxLength remaining bytes.
129 return truncateAndStop();
130 }
131 for(;;) {
132 int32_t node=*pos++;
133 if(node>=kMinValueLead) {
134 // Deliver value for the byte sequence so far.
135 UBool isFinal=(UBool)(node&kValueIsFinal);
136 value_=readValue(pos, node>>1);
137 if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
138 pos_=NULL;
139 } else {
140 pos_=skipValue(pos, node);
141 }
142 sp_.set(str_->data(), str_->length());
143 return TRUE;
144 }
145 if(maxLength_>0 && str_->length()==maxLength_) {
146 return truncateAndStop();
147 }
148 if(node<kMinLinearMatch) {
149 if(node==0) {
150 node=*pos++;
151 }
152 pos=branchNext(pos, node+1, errorCode);
153 if(pos==NULL) {
154 return TRUE; // Reached a final value.
155 }
156 } else {
157 // Linear-match node, append length bytes to str_.
158 int32_t length=node-kMinLinearMatch+1;
159 if(maxLength_>0 && str_->length()+length>maxLength_) {
160 str_->append(reinterpret_cast<const char *>(pos),
161 maxLength_-str_->length(), errorCode);
162 return truncateAndStop();
163 }
164 str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
165 pos+=length;
166 }
167 }
168 }
169
170 UBool
truncateAndStop()171 BytesTrie::Iterator::truncateAndStop() {
172 pos_=NULL;
173 sp_.set(str_->data(), str_->length());
174 value_=-1; // no real value for str
175 return TRUE;
176 }
177
178 // Branch node, needs to take the first outbound edge and push state for the rest.
179 const uint8_t *
branchNext(const uint8_t * pos,int32_t length,UErrorCode & errorCode)180 BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
181 while(length>kMaxBranchLinearSubNodeLength) {
182 ++pos; // ignore the comparison byte
183 // Push state for the greater-or-equal edge.
184 stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
185 stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
186 // Follow the less-than edge.
187 length>>=1;
188 pos=jumpByDelta(pos);
189 }
190 // List of key-value pairs where values are either final values or jump deltas.
191 // Read the first (key, value) pair.
192 uint8_t trieByte=*pos++;
193 int32_t node=*pos++;
194 UBool isFinal=(UBool)(node&kValueIsFinal);
195 int32_t value=readValue(pos, node>>1);
196 pos=skipValue(pos, node);
197 stack_->addElement((int32_t)(pos-bytes_), errorCode);
198 stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
199 str_->append((char)trieByte, errorCode);
200 if(isFinal) {
201 pos_=NULL;
202 sp_.set(str_->data(), str_->length());
203 value_=value;
204 return NULL;
205 } else {
206 return pos+value;
207 }
208 }
209
210 U_NAMESPACE_END
211