1 /*
2 **********************************************************************
3 * Copyright (C) 2001-2014 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 03/22/2000 helena Creation.
7 **********************************************************************
8 */
9
10 #include "unicode/utypes.h"
11
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
13
14 #include "unicode/stsearch.h"
15 #include "usrchimp.h"
16 #include "cmemory.h"
17
18 U_NAMESPACE_BEGIN
19
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
21
22 // public constructors and destructors -----------------------------------
23
24 StringSearch::StringSearch(const UnicodeString &pattern,
25 const UnicodeString &text,
26 const Locale &locale,
27 BreakIterator *breakiter,
28 UErrorCode &status) :
29 SearchIterator(text, breakiter),
30 m_pattern_(pattern)
31 {
32 if (U_FAILURE(status)) {
33 m_strsrch_ = NULL;
34 return;
35 }
36
37 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
38 m_text_.getBuffer(), m_text_.length(),
39 locale.getName(), (UBreakIterator *)breakiter,
40 &status);
41 uprv_free(m_search_);
42 m_search_ = NULL;
43
44 if (U_SUCCESS(status)) {
45 // m_search_ has been created by the base SearchIterator class
46 m_search_ = m_strsrch_->search;
47 }
48 }
49
StringSearch(const UnicodeString & pattern,const UnicodeString & text,RuleBasedCollator * coll,BreakIterator * breakiter,UErrorCode & status)50 StringSearch::StringSearch(const UnicodeString &pattern,
51 const UnicodeString &text,
52 RuleBasedCollator *coll,
53 BreakIterator *breakiter,
54 UErrorCode &status) :
55 SearchIterator(text, breakiter),
56 m_pattern_(pattern)
57 {
58 if (U_FAILURE(status)) {
59 m_strsrch_ = NULL;
60 return;
61 }
62 if (coll == NULL) {
63 status = U_ILLEGAL_ARGUMENT_ERROR;
64 m_strsrch_ = NULL;
65 return;
66 }
67 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
68 m_pattern_.length(),
69 m_text_.getBuffer(),
70 m_text_.length(), coll->toUCollator(),
71 (UBreakIterator *)breakiter,
72 &status);
73 uprv_free(m_search_);
74 m_search_ = NULL;
75
76 if (U_SUCCESS(status)) {
77 // m_search_ has been created by the base SearchIterator class
78 m_search_ = m_strsrch_->search;
79 }
80 }
81
StringSearch(const UnicodeString & pattern,CharacterIterator & text,const Locale & locale,BreakIterator * breakiter,UErrorCode & status)82 StringSearch::StringSearch(const UnicodeString &pattern,
83 CharacterIterator &text,
84 const Locale &locale,
85 BreakIterator *breakiter,
86 UErrorCode &status) :
87 SearchIterator(text, breakiter),
88 m_pattern_(pattern)
89 {
90 if (U_FAILURE(status)) {
91 m_strsrch_ = NULL;
92 return;
93 }
94 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
95 m_text_.getBuffer(), m_text_.length(),
96 locale.getName(), (UBreakIterator *)breakiter,
97 &status);
98 uprv_free(m_search_);
99 m_search_ = NULL;
100
101 if (U_SUCCESS(status)) {
102 // m_search_ has been created by the base SearchIterator class
103 m_search_ = m_strsrch_->search;
104 }
105 }
106
StringSearch(const UnicodeString & pattern,CharacterIterator & text,RuleBasedCollator * coll,BreakIterator * breakiter,UErrorCode & status)107 StringSearch::StringSearch(const UnicodeString &pattern,
108 CharacterIterator &text,
109 RuleBasedCollator *coll,
110 BreakIterator *breakiter,
111 UErrorCode &status) :
112 SearchIterator(text, breakiter),
113 m_pattern_(pattern)
114 {
115 if (U_FAILURE(status)) {
116 m_strsrch_ = NULL;
117 return;
118 }
119 if (coll == NULL) {
120 status = U_ILLEGAL_ARGUMENT_ERROR;
121 m_strsrch_ = NULL;
122 return;
123 }
124 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
125 m_pattern_.length(),
126 m_text_.getBuffer(),
127 m_text_.length(), coll->toUCollator(),
128 (UBreakIterator *)breakiter,
129 &status);
130 uprv_free(m_search_);
131 m_search_ = NULL;
132
133 if (U_SUCCESS(status)) {
134 // m_search_ has been created by the base SearchIterator class
135 m_search_ = m_strsrch_->search;
136 }
137 }
138
StringSearch(const StringSearch & that)139 StringSearch::StringSearch(const StringSearch &that) :
140 SearchIterator(that.m_text_, that.m_breakiterator_),
141 m_pattern_(that.m_pattern_)
142 {
143 UErrorCode status = U_ZERO_ERROR;
144
145 // Free m_search_ from the superclass
146 uprv_free(m_search_);
147 m_search_ = NULL;
148
149 if (that.m_strsrch_ == NULL) {
150 // This was not a good copy
151 m_strsrch_ = NULL;
152 }
153 else {
154 // Make a deep copy
155 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
156 m_pattern_.length(),
157 m_text_.getBuffer(),
158 m_text_.length(),
159 that.m_strsrch_->collator,
160 (UBreakIterator *)that.m_breakiterator_,
161 &status);
162 if (U_SUCCESS(status)) {
163 // m_search_ has been created by the base SearchIterator class
164 m_search_ = m_strsrch_->search;
165 }
166 }
167 }
168
~StringSearch()169 StringSearch::~StringSearch()
170 {
171 if (m_strsrch_ != NULL) {
172 usearch_close(m_strsrch_);
173 m_search_ = NULL;
174 }
175 }
176
177 StringSearch *
clone() const178 StringSearch::clone() const {
179 return new StringSearch(*this);
180 }
181
182 // operator overloading ---------------------------------------------
operator =(const StringSearch & that)183 StringSearch & StringSearch::operator=(const StringSearch &that)
184 {
185 if ((*this) != that) {
186 UErrorCode status = U_ZERO_ERROR;
187 m_text_ = that.m_text_;
188 m_breakiterator_ = that.m_breakiterator_;
189 m_pattern_ = that.m_pattern_;
190 // all m_search_ in the parent class is linked up with m_strsrch_
191 usearch_close(m_strsrch_);
192 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
193 m_pattern_.length(),
194 m_text_.getBuffer(),
195 m_text_.length(),
196 that.m_strsrch_->collator,
197 NULL, &status);
198 // Check null pointer
199 if (m_strsrch_ != NULL) {
200 m_search_ = m_strsrch_->search;
201 }
202 }
203 return *this;
204 }
205
operator ==(const SearchIterator & that) const206 UBool StringSearch::operator==(const SearchIterator &that) const
207 {
208 if (this == &that) {
209 return TRUE;
210 }
211 if (SearchIterator::operator ==(that)) {
212 StringSearch &thatsrch = (StringSearch &)that;
213 return (this->m_pattern_ == thatsrch.m_pattern_ &&
214 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
215 }
216 return FALSE;
217 }
218
219 // public get and set methods ----------------------------------------
220
setOffset(int32_t position,UErrorCode & status)221 void StringSearch::setOffset(int32_t position, UErrorCode &status)
222 {
223 // status checked in usearch_setOffset
224 usearch_setOffset(m_strsrch_, position, &status);
225 }
226
getOffset(void) const227 int32_t StringSearch::getOffset(void) const
228 {
229 return usearch_getOffset(m_strsrch_);
230 }
231
setText(const UnicodeString & text,UErrorCode & status)232 void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
233 {
234 if (U_SUCCESS(status)) {
235 m_text_ = text;
236 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
237 }
238 }
239
setText(CharacterIterator & text,UErrorCode & status)240 void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
241 {
242 if (U_SUCCESS(status)) {
243 text.getText(m_text_);
244 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
245 }
246 }
247
getCollator() const248 RuleBasedCollator * StringSearch::getCollator() const
249 {
250 // Note the const_cast. It would be cleaner if this const method returned a const collator.
251 return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
252 }
253
setCollator(RuleBasedCollator * coll,UErrorCode & status)254 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
255 {
256 if (U_SUCCESS(status)) {
257 usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
258 }
259 }
260
setPattern(const UnicodeString & pattern,UErrorCode & status)261 void StringSearch::setPattern(const UnicodeString &pattern,
262 UErrorCode &status)
263 {
264 if (U_SUCCESS(status)) {
265 m_pattern_ = pattern;
266 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
267 &status);
268 }
269 }
270
getPattern() const271 const UnicodeString & StringSearch::getPattern() const
272 {
273 return m_pattern_;
274 }
275
276 // public methods ----------------------------------------------------
277
reset()278 void StringSearch::reset()
279 {
280 usearch_reset(m_strsrch_);
281 }
282
safeClone(void) const283 SearchIterator * StringSearch::safeClone(void) const
284 {
285 UErrorCode status = U_ZERO_ERROR;
286 StringSearch *result = new StringSearch(m_pattern_, m_text_,
287 getCollator(),
288 m_breakiterator_,
289 status);
290 /* test for NULL */
291 if (result == 0) {
292 status = U_MEMORY_ALLOCATION_ERROR;
293 return 0;
294 }
295 result->setOffset(getOffset(), status);
296 result->setMatchStart(m_strsrch_->search->matchedIndex);
297 result->setMatchLength(m_strsrch_->search->matchedLength);
298 if (U_FAILURE(status)) {
299 return NULL;
300 }
301 return result;
302 }
303
304 // protected method -------------------------------------------------
305
handleNext(int32_t position,UErrorCode & status)306 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
307 {
308 // values passed here are already in the pre-shift position
309 if (U_SUCCESS(status)) {
310 if (m_strsrch_->pattern.cesLength == 0) {
311 m_search_->matchedIndex =
312 m_search_->matchedIndex == USEARCH_DONE ?
313 getOffset() : m_search_->matchedIndex + 1;
314 m_search_->matchedLength = 0;
315 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
316 &status);
317 if (m_search_->matchedIndex == m_search_->textLength) {
318 m_search_->matchedIndex = USEARCH_DONE;
319 }
320 }
321 else {
322 // looking at usearch.cpp, this part is shifted out to
323 // StringSearch instead of SearchIterator because m_strsrch_ is
324 // not accessible in SearchIterator
325 #if 0
326 if (position + m_strsrch_->pattern.defaultShiftSize
327 > m_search_->textLength) {
328 setMatchNotFound();
329 return USEARCH_DONE;
330 }
331 #endif
332 if (m_search_->matchedLength <= 0) {
333 // the flipping direction issue has already been handled
334 // in next()
335 // for boundary check purposes. this will ensure that the
336 // next match will not preceed the current offset
337 // note search->matchedIndex will always be set to something
338 // in the code
339 m_search_->matchedIndex = position - 1;
340 }
341
342 ucol_setOffset(m_strsrch_->textIter, position, &status);
343
344 #if 0
345 for (;;) {
346 if (m_search_->isCanonicalMatch) {
347 // can't use exact here since extra accents are allowed.
348 usearch_handleNextCanonical(m_strsrch_, &status);
349 }
350 else {
351 usearch_handleNextExact(m_strsrch_, &status);
352 }
353 if (U_FAILURE(status)) {
354 return USEARCH_DONE;
355 }
356 if (m_breakiterator_ == NULL
357 #if !UCONFIG_NO_BREAK_ITERATION
358 ||
359 m_search_->matchedIndex == USEARCH_DONE ||
360 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
361 m_breakiterator_->isBoundary(m_search_->matchedIndex +
362 m_search_->matchedLength))
363 #endif
364 ) {
365 if (m_search_->matchedIndex == USEARCH_DONE) {
366 ucol_setOffset(m_strsrch_->textIter,
367 m_search_->textLength, &status);
368 }
369 else {
370 ucol_setOffset(m_strsrch_->textIter,
371 m_search_->matchedIndex, &status);
372 }
373 return m_search_->matchedIndex;
374 }
375 }
376 #else
377 // if m_strsrch_->breakIter is always the same as m_breakiterator_
378 // then we don't need to check the match boundaries here because
379 // usearch_handleNextXXX will already have done it.
380 if (m_search_->isCanonicalMatch) {
381 // *could* actually use exact here 'cause no extra accents allowed...
382 usearch_handleNextCanonical(m_strsrch_, &status);
383 } else {
384 usearch_handleNextExact(m_strsrch_, &status);
385 }
386
387 if (U_FAILURE(status)) {
388 return USEARCH_DONE;
389 }
390
391 if (m_search_->matchedIndex == USEARCH_DONE) {
392 ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
393 } else {
394 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
395 }
396
397 return m_search_->matchedIndex;
398 #endif
399 }
400 }
401 return USEARCH_DONE;
402 }
403
handlePrev(int32_t position,UErrorCode & status)404 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
405 {
406 // values passed here are already in the pre-shift position
407 if (U_SUCCESS(status)) {
408 if (m_strsrch_->pattern.cesLength == 0) {
409 m_search_->matchedIndex =
410 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
411 m_search_->matchedIndex);
412 if (m_search_->matchedIndex == 0) {
413 setMatchNotFound();
414 }
415 else {
416 m_search_->matchedIndex --;
417 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
418 &status);
419 m_search_->matchedLength = 0;
420 }
421 }
422 else {
423 // looking at usearch.cpp, this part is shifted out to
424 // StringSearch instead of SearchIterator because m_strsrch_ is
425 // not accessible in SearchIterator
426 #if 0
427 if (!m_search_->isOverlap &&
428 position - m_strsrch_->pattern.defaultShiftSize < 0) {
429 setMatchNotFound();
430 return USEARCH_DONE;
431 }
432
433 for (;;) {
434 if (m_search_->isCanonicalMatch) {
435 // can't use exact here since extra accents are allowed.
436 usearch_handlePreviousCanonical(m_strsrch_, &status);
437 }
438 else {
439 usearch_handlePreviousExact(m_strsrch_, &status);
440 }
441 if (U_FAILURE(status)) {
442 return USEARCH_DONE;
443 }
444 if (m_breakiterator_ == NULL
445 #if !UCONFIG_NO_BREAK_ITERATION
446 ||
447 m_search_->matchedIndex == USEARCH_DONE ||
448 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
449 m_breakiterator_->isBoundary(m_search_->matchedIndex +
450 m_search_->matchedLength))
451 #endif
452 ) {
453 return m_search_->matchedIndex;
454 }
455 }
456 #else
457 ucol_setOffset(m_strsrch_->textIter, position, &status);
458
459 if (m_search_->isCanonicalMatch) {
460 // *could* use exact match here since extra accents *not* allowed!
461 usearch_handlePreviousCanonical(m_strsrch_, &status);
462 } else {
463 usearch_handlePreviousExact(m_strsrch_, &status);
464 }
465
466 if (U_FAILURE(status)) {
467 return USEARCH_DONE;
468 }
469
470 return m_search_->matchedIndex;
471 #endif
472 }
473
474 return m_search_->matchedIndex;
475 }
476 return USEARCH_DONE;
477 }
478
479 U_NAMESPACE_END
480
481 #endif /* #if !UCONFIG_NO_COLLATION */
482