1 /*
2 **********************************************************************
3 * Copyright (C) 2001-2008 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 03/22/2000 helena Creation.
7 **********************************************************************
8 */
9
10 #include "unicode/utypes.h"
11
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
13
14 #include "unicode/stsearch.h"
15 #include "usrchimp.h"
16 #include "cmemory.h"
17
18 U_NAMESPACE_BEGIN
19
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
21
22 // public constructors and destructors -----------------------------------
23
24 StringSearch::StringSearch(const UnicodeString &pattern,
25 const UnicodeString &text,
26 const Locale &locale,
27 BreakIterator *breakiter,
28 UErrorCode &status) :
29 SearchIterator(text, breakiter),
30 m_collator_(),
31 m_pattern_(pattern)
32 {
33 if (U_FAILURE(status)) {
34 m_strsrch_ = NULL;
35 return;
36 }
37
38 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
39 m_text_.getBuffer(), m_text_.length(),
40 locale.getName(), (UBreakIterator *)breakiter,
41 &status);
42 uprv_free(m_search_);
43 m_search_ = NULL;
44
45 // !!! dlf m_collator_ is an odd beast. basically it is an aliasing
46 // wrapper around the internal collator and rules, which (here) are
47 // owned by this stringsearch object. this means 1) it's destructor
48 // _should not_ delete the ucollator or rules, and 2) changes made
49 // to the exposed collator (setStrength etc) _should_ modify the
50 // ucollator. thus the collator is not a copy-on-write alias, and it
51 // needs to distinguish itself not merely from 'stand alone' colators
52 // but also from copy-on-write ones. it needs additional state, which
53 // setUCollator should set.
54
55 if (U_SUCCESS(status)) {
56 // Alias the collator
57 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
58 // m_search_ has been created by the base SearchIterator class
59 m_search_ = m_strsrch_->search;
60 }
61 }
62
StringSearch(const UnicodeString & pattern,const UnicodeString & text,RuleBasedCollator * coll,BreakIterator * breakiter,UErrorCode & status)63 StringSearch::StringSearch(const UnicodeString &pattern,
64 const UnicodeString &text,
65 RuleBasedCollator *coll,
66 BreakIterator *breakiter,
67 UErrorCode &status) :
68 SearchIterator(text, breakiter),
69 m_collator_(),
70 m_pattern_(pattern)
71 {
72 if (U_FAILURE(status)) {
73 m_strsrch_ = NULL;
74 return;
75 }
76 if (coll == NULL) {
77 status = U_ILLEGAL_ARGUMENT_ERROR;
78 m_strsrch_ = NULL;
79 return;
80 }
81 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
82 m_pattern_.length(),
83 m_text_.getBuffer(),
84 m_text_.length(), coll->ucollator,
85 (UBreakIterator *)breakiter,
86 &status);
87 uprv_free(m_search_);
88 m_search_ = NULL;
89
90 if (U_SUCCESS(status)) {
91 // Alias the collator
92 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
93 // m_search_ has been created by the base SearchIterator class
94 m_search_ = m_strsrch_->search;
95 }
96 }
97
StringSearch(const UnicodeString & pattern,CharacterIterator & text,const Locale & locale,BreakIterator * breakiter,UErrorCode & status)98 StringSearch::StringSearch(const UnicodeString &pattern,
99 CharacterIterator &text,
100 const Locale &locale,
101 BreakIterator *breakiter,
102 UErrorCode &status) :
103 SearchIterator(text, breakiter),
104 m_collator_(),
105 m_pattern_(pattern)
106 {
107 if (U_FAILURE(status)) {
108 m_strsrch_ = NULL;
109 return;
110 }
111 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
112 m_text_.getBuffer(), m_text_.length(),
113 locale.getName(), (UBreakIterator *)breakiter,
114 &status);
115 uprv_free(m_search_);
116 m_search_ = NULL;
117
118 if (U_SUCCESS(status)) {
119 // Alias the collator
120 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
121 // m_search_ has been created by the base SearchIterator class
122 m_search_ = m_strsrch_->search;
123 }
124 }
125
StringSearch(const UnicodeString & pattern,CharacterIterator & text,RuleBasedCollator * coll,BreakIterator * breakiter,UErrorCode & status)126 StringSearch::StringSearch(const UnicodeString &pattern,
127 CharacterIterator &text,
128 RuleBasedCollator *coll,
129 BreakIterator *breakiter,
130 UErrorCode &status) :
131 SearchIterator(text, breakiter),
132 m_collator_(),
133 m_pattern_(pattern)
134 {
135 if (U_FAILURE(status)) {
136 m_strsrch_ = NULL;
137 return;
138 }
139 if (coll == NULL) {
140 status = U_ILLEGAL_ARGUMENT_ERROR;
141 m_strsrch_ = NULL;
142 return;
143 }
144 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
145 m_pattern_.length(),
146 m_text_.getBuffer(),
147 m_text_.length(), coll->ucollator,
148 (UBreakIterator *)breakiter,
149 &status);
150 uprv_free(m_search_);
151 m_search_ = NULL;
152
153 if (U_SUCCESS(status)) {
154 // Alias the collator
155 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
156 // m_search_ has been created by the base SearchIterator class
157 m_search_ = m_strsrch_->search;
158 }
159 }
160
StringSearch(const StringSearch & that)161 StringSearch::StringSearch(const StringSearch &that) :
162 SearchIterator(that.m_text_, that.m_breakiterator_),
163 m_collator_(),
164 m_pattern_(that.m_pattern_)
165 {
166 UErrorCode status = U_ZERO_ERROR;
167
168 // Free m_search_ from the superclass
169 uprv_free(m_search_);
170 m_search_ = NULL;
171
172 if (that.m_strsrch_ == NULL) {
173 // This was not a good copy
174 m_strsrch_ = NULL;
175 }
176 else {
177 // Make a deep copy
178 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
179 m_pattern_.length(),
180 m_text_.getBuffer(),
181 m_text_.length(),
182 that.m_strsrch_->collator,
183 (UBreakIterator *)that.m_breakiterator_,
184 &status);
185 if (U_SUCCESS(status)) {
186 // Alias the collator
187 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
188 // m_search_ has been created by the base SearchIterator class
189 m_search_ = m_strsrch_->search;
190 }
191 }
192 }
193
~StringSearch()194 StringSearch::~StringSearch()
195 {
196 if (m_strsrch_ != NULL) {
197 usearch_close(m_strsrch_);
198 m_search_ = NULL;
199 }
200 }
201
202 StringSearch *
clone() const203 StringSearch::clone() const {
204 return new StringSearch(*this);
205 }
206
207 // operator overloading ---------------------------------------------
operator =(const StringSearch & that)208 StringSearch & StringSearch::operator=(const StringSearch &that)
209 {
210 if ((*this) != that) {
211 UErrorCode status = U_ZERO_ERROR;
212 m_text_ = that.m_text_;
213 m_breakiterator_ = that.m_breakiterator_;
214 m_pattern_ = that.m_pattern_;
215 // all m_search_ in the parent class is linked up with m_strsrch_
216 usearch_close(m_strsrch_);
217 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
218 m_pattern_.length(),
219 m_text_.getBuffer(),
220 m_text_.length(),
221 that.m_strsrch_->collator,
222 NULL, &status);
223 // Check null pointer
224 if (m_strsrch_ != NULL) {
225 // Alias the collator
226 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
227 m_search_ = m_strsrch_->search;
228 }
229 }
230 return *this;
231 }
232
operator ==(const SearchIterator & that) const233 UBool StringSearch::operator==(const SearchIterator &that) const
234 {
235 if (this == &that) {
236 return TRUE;
237 }
238 if (SearchIterator::operator ==(that)) {
239 StringSearch &thatsrch = (StringSearch &)that;
240 return (this->m_pattern_ == thatsrch.m_pattern_ &&
241 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
242 }
243 return FALSE;
244 }
245
246 // public get and set methods ----------------------------------------
247
setOffset(int32_t position,UErrorCode & status)248 void StringSearch::setOffset(int32_t position, UErrorCode &status)
249 {
250 // status checked in usearch_setOffset
251 usearch_setOffset(m_strsrch_, position, &status);
252 }
253
getOffset(void) const254 int32_t StringSearch::getOffset(void) const
255 {
256 return usearch_getOffset(m_strsrch_);
257 }
258
setText(const UnicodeString & text,UErrorCode & status)259 void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
260 {
261 if (U_SUCCESS(status)) {
262 m_text_ = text;
263 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
264 }
265 }
266
setText(CharacterIterator & text,UErrorCode & status)267 void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
268 {
269 if (U_SUCCESS(status)) {
270 text.getText(m_text_);
271 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
272 }
273 }
274
getCollator() const275 RuleBasedCollator * StringSearch::getCollator() const
276 {
277 return (RuleBasedCollator *)&m_collator_;
278 }
279
setCollator(RuleBasedCollator * coll,UErrorCode & status)280 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
281 {
282 if (U_SUCCESS(status)) {
283 usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
284 // Alias the collator
285 m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
286 }
287 }
288
setPattern(const UnicodeString & pattern,UErrorCode & status)289 void StringSearch::setPattern(const UnicodeString &pattern,
290 UErrorCode &status)
291 {
292 if (U_SUCCESS(status)) {
293 m_pattern_ = pattern;
294 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
295 &status);
296 }
297 }
298
getPattern() const299 const UnicodeString & StringSearch::getPattern() const
300 {
301 return m_pattern_;
302 }
303
304 // public methods ----------------------------------------------------
305
reset()306 void StringSearch::reset()
307 {
308 usearch_reset(m_strsrch_);
309 }
310
safeClone(void) const311 SearchIterator * StringSearch::safeClone(void) const
312 {
313 UErrorCode status = U_ZERO_ERROR;
314 StringSearch *result = new StringSearch(m_pattern_, m_text_,
315 (RuleBasedCollator *)&m_collator_,
316 m_breakiterator_,
317 status);
318 /* test for NULL */
319 if (result == 0) {
320 status = U_MEMORY_ALLOCATION_ERROR;
321 return 0;
322 }
323 result->setOffset(getOffset(), status);
324 result->setMatchStart(m_strsrch_->search->matchedIndex);
325 result->setMatchLength(m_strsrch_->search->matchedLength);
326 if (U_FAILURE(status)) {
327 return NULL;
328 }
329 return result;
330 }
331
332 // protected method -------------------------------------------------
333
handleNext(int32_t position,UErrorCode & status)334 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
335 {
336 // values passed here are already in the pre-shift position
337 if (U_SUCCESS(status)) {
338 if (m_strsrch_->pattern.CELength == 0) {
339 m_search_->matchedIndex =
340 m_search_->matchedIndex == USEARCH_DONE ?
341 getOffset() : m_search_->matchedIndex + 1;
342 m_search_->matchedLength = 0;
343 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
344 &status);
345 if (m_search_->matchedIndex == m_search_->textLength) {
346 m_search_->matchedIndex = USEARCH_DONE;
347 }
348 }
349 else {
350 // looking at usearch.cpp, this part is shifted out to
351 // StringSearch instead of SearchIterator because m_strsrch_ is
352 // not accessible in SearchIterator
353 #if 0
354 if (position + m_strsrch_->pattern.defaultShiftSize
355 > m_search_->textLength) {
356 setMatchNotFound();
357 return USEARCH_DONE;
358 }
359 #endif
360 if (m_search_->matchedLength <= 0) {
361 // the flipping direction issue has already been handled
362 // in next()
363 // for boundary check purposes. this will ensure that the
364 // next match will not preceed the current offset
365 // note search->matchedIndex will always be set to something
366 // in the code
367 m_search_->matchedIndex = position - 1;
368 }
369
370 ucol_setOffset(m_strsrch_->textIter, position, &status);
371
372 #if 0
373 for (;;) {
374 if (m_search_->isCanonicalMatch) {
375 // can't use exact here since extra accents are allowed.
376 usearch_handleNextCanonical(m_strsrch_, &status);
377 }
378 else {
379 usearch_handleNextExact(m_strsrch_, &status);
380 }
381 if (U_FAILURE(status)) {
382 return USEARCH_DONE;
383 }
384 if (m_breakiterator_ == NULL
385 #if !UCONFIG_NO_BREAK_ITERATION
386 ||
387 m_search_->matchedIndex == USEARCH_DONE ||
388 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
389 m_breakiterator_->isBoundary(m_search_->matchedIndex +
390 m_search_->matchedLength))
391 #endif
392 ) {
393 if (m_search_->matchedIndex == USEARCH_DONE) {
394 ucol_setOffset(m_strsrch_->textIter,
395 m_search_->textLength, &status);
396 }
397 else {
398 ucol_setOffset(m_strsrch_->textIter,
399 m_search_->matchedIndex, &status);
400 }
401 return m_search_->matchedIndex;
402 }
403 }
404 #else
405 // if m_strsrch_->breakIter is always the same as m_breakiterator_
406 // then we don't need to check the match boundaries here because
407 // usearch_handleNextXXX will already have done it.
408 if (m_search_->isCanonicalMatch) {
409 // *could* actually use exact here 'cause no extra accents allowed...
410 usearch_handleNextCanonical(m_strsrch_, &status);
411 } else {
412 usearch_handleNextExact(m_strsrch_, &status);
413 }
414
415 if (U_FAILURE(status)) {
416 return USEARCH_DONE;
417 }
418
419 if (m_search_->matchedIndex == USEARCH_DONE) {
420 ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
421 } else {
422 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
423 }
424
425 return m_search_->matchedIndex;
426 #endif
427 }
428 }
429 return USEARCH_DONE;
430 }
431
handlePrev(int32_t position,UErrorCode & status)432 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
433 {
434 // values passed here are already in the pre-shift position
435 if (U_SUCCESS(status)) {
436 if (m_strsrch_->pattern.CELength == 0) {
437 m_search_->matchedIndex =
438 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
439 m_search_->matchedIndex);
440 if (m_search_->matchedIndex == 0) {
441 setMatchNotFound();
442 }
443 else {
444 m_search_->matchedIndex --;
445 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
446 &status);
447 m_search_->matchedLength = 0;
448 }
449 }
450 else {
451 // looking at usearch.cpp, this part is shifted out to
452 // StringSearch instead of SearchIterator because m_strsrch_ is
453 // not accessible in SearchIterator
454 #if 0
455 if (!m_search_->isOverlap &&
456 position - m_strsrch_->pattern.defaultShiftSize < 0) {
457 setMatchNotFound();
458 return USEARCH_DONE;
459 }
460
461 for (;;) {
462 if (m_search_->isCanonicalMatch) {
463 // can't use exact here since extra accents are allowed.
464 usearch_handlePreviousCanonical(m_strsrch_, &status);
465 }
466 else {
467 usearch_handlePreviousExact(m_strsrch_, &status);
468 }
469 if (U_FAILURE(status)) {
470 return USEARCH_DONE;
471 }
472 if (m_breakiterator_ == NULL
473 #if !UCONFIG_NO_BREAK_ITERATION
474 ||
475 m_search_->matchedIndex == USEARCH_DONE ||
476 (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
477 m_breakiterator_->isBoundary(m_search_->matchedIndex +
478 m_search_->matchedLength))
479 #endif
480 ) {
481 return m_search_->matchedIndex;
482 }
483 }
484 #else
485 ucol_setOffset(m_strsrch_->textIter, position, &status);
486
487 if (m_search_->isCanonicalMatch) {
488 // *could* use exact match here since extra accents *not* allowed!
489 usearch_handlePreviousCanonical(m_strsrch_, &status);
490 } else {
491 usearch_handlePreviousExact(m_strsrch_, &status);
492 }
493
494 if (U_FAILURE(status)) {
495 return USEARCH_DONE;
496 }
497
498 return m_search_->matchedIndex;
499 #endif
500 }
501
502 return m_search_->matchedIndex;
503 }
504 return USEARCH_DONE;
505 }
506
507 U_NAMESPACE_END
508
509 #endif /* #if !UCONFIG_NO_COLLATION */
510