1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2010-2012, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: bytestrieiterator.cpp
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2010nov03
14 * created by: Markus W. Scherer
15 */
16
17 #include "unicode/utypes.h"
18 #include "unicode/bytestrie.h"
19 #include "unicode/stringpiece.h"
20 #include "charstr.h"
21 #include "uvectr32.h"
22
23 U_NAMESPACE_BEGIN
24
Iterator(const void * trieBytes,int32_t maxStringLength,UErrorCode & errorCode)25 BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
26 UErrorCode &errorCode)
27 : bytes_(static_cast<const uint8_t *>(trieBytes)),
28 pos_(bytes_), initialPos_(bytes_),
29 remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
30 str_(nullptr), maxLength_(maxStringLength), value_(0), stack_(nullptr) {
31 if(U_FAILURE(errorCode)) {
32 return;
33 }
34 // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
35 // a public API header for which we would want it to depend only on
36 // other public headers.
37 // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
38 // via the CharString and UVector32 implementations, so this additional
39 // cost is minimal.
40 str_=new CharString();
41 stack_=new UVector32(errorCode);
42 if(U_SUCCESS(errorCode) && (str_==nullptr || stack_==nullptr)) {
43 errorCode=U_MEMORY_ALLOCATION_ERROR;
44 }
45 }
46
Iterator(const BytesTrie & trie,int32_t maxStringLength,UErrorCode & errorCode)47 BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
48 UErrorCode &errorCode)
49 : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
50 remainingMatchLength_(trie.remainingMatchLength_),
51 initialRemainingMatchLength_(trie.remainingMatchLength_),
52 str_(nullptr), maxLength_(maxStringLength), value_(0), stack_(nullptr) {
53 if(U_FAILURE(errorCode)) {
54 return;
55 }
56 str_=new CharString();
57 stack_=new UVector32(errorCode);
58 if(U_FAILURE(errorCode)) {
59 return;
60 }
61 if(str_==nullptr || stack_==nullptr) {
62 errorCode=U_MEMORY_ALLOCATION_ERROR;
63 return;
64 }
65 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
66 if(length>=0) {
67 // Pending linear-match node, append remaining bytes to str_.
68 ++length;
69 if(maxLength_>0 && length>maxLength_) {
70 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
71 }
72 str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
73 pos_+=length;
74 remainingMatchLength_-=length;
75 }
76 }
77
~Iterator()78 BytesTrie::Iterator::~Iterator() {
79 delete str_;
80 delete stack_;
81 }
82
83 BytesTrie::Iterator &
reset()84 BytesTrie::Iterator::reset() {
85 pos_=initialPos_;
86 remainingMatchLength_=initialRemainingMatchLength_;
87 int32_t length=remainingMatchLength_+1; // Remaining match length.
88 if(maxLength_>0 && length>maxLength_) {
89 length=maxLength_;
90 }
91 str_->truncate(length);
92 pos_+=length;
93 remainingMatchLength_-=length;
94 stack_->setSize(0);
95 return *this;
96 }
97
98 UBool
hasNext() const99 BytesTrie::Iterator::hasNext() const { return pos_!=nullptr || !stack_->isEmpty(); }
100
101 UBool
next(UErrorCode & errorCode)102 BytesTrie::Iterator::next(UErrorCode &errorCode) {
103 if(U_FAILURE(errorCode)) {
104 return false;
105 }
106 const uint8_t *pos=pos_;
107 if(pos==nullptr) {
108 if(stack_->isEmpty()) {
109 return false;
110 }
111 // Pop the state off the stack and continue with the next outbound edge of
112 // the branch node.
113 int32_t stackSize=stack_->size();
114 int32_t length=stack_->elementAti(stackSize-1);
115 pos=bytes_+stack_->elementAti(stackSize-2);
116 stack_->setSize(stackSize-2);
117 str_->truncate(length&0xffff);
118 length=(int32_t)((uint32_t)length>>16);
119 if(length>1) {
120 pos=branchNext(pos, length, errorCode);
121 if(pos==nullptr) {
122 return true; // Reached a final value.
123 }
124 } else {
125 str_->append((char)*pos++, errorCode);
126 }
127 }
128 if(remainingMatchLength_>=0) {
129 // We only get here if we started in a pending linear-match node
130 // with more than maxLength remaining bytes.
131 return truncateAndStop();
132 }
133 for(;;) {
134 int32_t node=*pos++;
135 if(node>=kMinValueLead) {
136 // Deliver value for the byte sequence so far.
137 UBool isFinal=(UBool)(node&kValueIsFinal);
138 value_=readValue(pos, node>>1);
139 if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
140 pos_=nullptr;
141 } else {
142 pos_=skipValue(pos, node);
143 }
144 return true;
145 }
146 if(maxLength_>0 && str_->length()==maxLength_) {
147 return truncateAndStop();
148 }
149 if(node<kMinLinearMatch) {
150 if(node==0) {
151 node=*pos++;
152 }
153 pos=branchNext(pos, node+1, errorCode);
154 if(pos==nullptr) {
155 return true; // Reached a final value.
156 }
157 } else {
158 // Linear-match node, append length bytes to str_.
159 int32_t length=node-kMinLinearMatch+1;
160 if(maxLength_>0 && str_->length()+length>maxLength_) {
161 str_->append(reinterpret_cast<const char *>(pos),
162 maxLength_-str_->length(), errorCode);
163 return truncateAndStop();
164 }
165 str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
166 pos+=length;
167 }
168 }
169 }
170
171 StringPiece
getString() const172 BytesTrie::Iterator::getString() const {
173 return str_ == nullptr ? StringPiece() : str_->toStringPiece();
174 }
175
176 UBool
truncateAndStop()177 BytesTrie::Iterator::truncateAndStop() {
178 pos_=nullptr;
179 value_=-1; // no real value for str
180 return true;
181 }
182
183 // Branch node, needs to take the first outbound edge and push state for the rest.
184 const uint8_t *
branchNext(const uint8_t * pos,int32_t length,UErrorCode & errorCode)185 BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
186 while(length>kMaxBranchLinearSubNodeLength) {
187 ++pos; // ignore the comparison byte
188 // Push state for the greater-or-equal edge.
189 stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
190 stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
191 // Follow the less-than edge.
192 length>>=1;
193 pos=jumpByDelta(pos);
194 }
195 // List of key-value pairs where values are either final values or jump deltas.
196 // Read the first (key, value) pair.
197 uint8_t trieByte=*pos++;
198 int32_t node=*pos++;
199 UBool isFinal=(UBool)(node&kValueIsFinal);
200 int32_t value=readValue(pos, node>>1);
201 pos=skipValue(pos, node);
202 stack_->addElement((int32_t)(pos-bytes_), errorCode);
203 stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
204 str_->append((char)trieByte, errorCode);
205 if(isFinal) {
206 pos_=nullptr;
207 value_=value;
208 return nullptr;
209 } else {
210 return pos+value;
211 }
212 }
213
214 U_NAMESPACE_END
215