• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 **********************************************************************
3 *   Copyright (C) 2001-2008 IBM and others. All rights reserved.
4 **********************************************************************
5 *   Date        Name        Description
6 *  03/22/2000   helena      Creation.
7 **********************************************************************
8 */
9 
10 #include "unicode/utypes.h"
11 
12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
13 
14 #include "unicode/stsearch.h"
15 #include "usrchimp.h"
16 #include "cmemory.h"
17 
18 U_NAMESPACE_BEGIN
19 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
21 
22 // public constructors and destructors -----------------------------------
23 
24 StringSearch::StringSearch(const UnicodeString &pattern,
25                            const UnicodeString &text,
26                            const Locale        &locale,
27                                  BreakIterator *breakiter,
28                                  UErrorCode    &status) :
29                            SearchIterator(text, breakiter),
30                            m_collator_(),
31                            m_pattern_(pattern)
32 {
33     if (U_FAILURE(status)) {
34         m_strsrch_ = NULL;
35         return;
36     }
37 
38     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
39                               m_text_.getBuffer(), m_text_.length(),
40                               locale.getName(), (UBreakIterator *)breakiter,
41                               &status);
42     uprv_free(m_search_);
43     m_search_ = NULL;
44 
45     // !!! dlf m_collator_ is an odd beast.  basically it is an aliasing
46     // wrapper around the internal collator and rules, which (here) are
47     // owned by this stringsearch object.  this means 1) it's destructor
48     // _should not_ delete the ucollator or rules, and 2) changes made
49     // to the exposed collator (setStrength etc) _should_ modify the
50     // ucollator.  thus the collator is not a copy-on-write alias, and it
51     // needs to distinguish itself not merely from 'stand alone' colators
52     // but also from copy-on-write ones.  it needs additional state, which
53     // setUCollator should set.
54 
55     if (U_SUCCESS(status)) {
56         // Alias the collator
57         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
58         // m_search_ has been created by the base SearchIterator class
59         m_search_        = m_strsrch_->search;
60     }
61 }
62 
StringSearch(const UnicodeString & pattern,const UnicodeString & text,RuleBasedCollator * coll,BreakIterator * breakiter,UErrorCode & status)63 StringSearch::StringSearch(const UnicodeString     &pattern,
64                            const UnicodeString     &text,
65                                  RuleBasedCollator *coll,
66                                  BreakIterator     *breakiter,
67                                  UErrorCode        &status) :
68                            SearchIterator(text, breakiter),
69                            m_collator_(),
70                            m_pattern_(pattern)
71 {
72     if (U_FAILURE(status)) {
73         m_strsrch_ = NULL;
74         return;
75     }
76     if (coll == NULL) {
77         status     = U_ILLEGAL_ARGUMENT_ERROR;
78         m_strsrch_ = NULL;
79         return;
80     }
81     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
82                                           m_pattern_.length(),
83                                           m_text_.getBuffer(),
84                                           m_text_.length(), coll->ucollator,
85                                           (UBreakIterator *)breakiter,
86                                           &status);
87     uprv_free(m_search_);
88     m_search_ = NULL;
89 
90     if (U_SUCCESS(status)) {
91         // Alias the collator
92         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
93         // m_search_ has been created by the base SearchIterator class
94         m_search_ = m_strsrch_->search;
95     }
96 }
97 
StringSearch(const UnicodeString & pattern,CharacterIterator & text,const Locale & locale,BreakIterator * breakiter,UErrorCode & status)98 StringSearch::StringSearch(const UnicodeString     &pattern,
99                                  CharacterIterator &text,
100                            const Locale            &locale,
101                                  BreakIterator     *breakiter,
102                                  UErrorCode        &status) :
103                            SearchIterator(text, breakiter),
104                            m_collator_(),
105                            m_pattern_(pattern)
106 {
107     if (U_FAILURE(status)) {
108         m_strsrch_ = NULL;
109         return;
110     }
111     m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
112                               m_text_.getBuffer(), m_text_.length(),
113                               locale.getName(), (UBreakIterator *)breakiter,
114                               &status);
115     uprv_free(m_search_);
116     m_search_ = NULL;
117 
118     if (U_SUCCESS(status)) {
119         // Alias the collator
120         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
121         // m_search_ has been created by the base SearchIterator class
122         m_search_ = m_strsrch_->search;
123     }
124 }
125 
StringSearch(const UnicodeString & pattern,CharacterIterator & text,RuleBasedCollator * coll,BreakIterator * breakiter,UErrorCode & status)126 StringSearch::StringSearch(const UnicodeString     &pattern,
127                                  CharacterIterator &text,
128                                  RuleBasedCollator *coll,
129                                  BreakIterator     *breakiter,
130                                  UErrorCode        &status) :
131                            SearchIterator(text, breakiter),
132                            m_collator_(),
133                            m_pattern_(pattern)
134 {
135     if (U_FAILURE(status)) {
136         m_strsrch_ = NULL;
137         return;
138     }
139     if (coll == NULL) {
140         status     = U_ILLEGAL_ARGUMENT_ERROR;
141         m_strsrch_ = NULL;
142         return;
143     }
144     m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
145                                           m_pattern_.length(),
146                                           m_text_.getBuffer(),
147                                           m_text_.length(), coll->ucollator,
148                                           (UBreakIterator *)breakiter,
149                                           &status);
150     uprv_free(m_search_);
151     m_search_ = NULL;
152 
153     if (U_SUCCESS(status)) {
154         // Alias the collator
155         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
156         // m_search_ has been created by the base SearchIterator class
157         m_search_ = m_strsrch_->search;
158     }
159 }
160 
StringSearch(const StringSearch & that)161 StringSearch::StringSearch(const StringSearch &that) :
162                        SearchIterator(that.m_text_, that.m_breakiterator_),
163                        m_collator_(),
164                        m_pattern_(that.m_pattern_)
165 {
166     UErrorCode status = U_ZERO_ERROR;
167 
168     // Free m_search_ from the superclass
169     uprv_free(m_search_);
170     m_search_ = NULL;
171 
172     if (that.m_strsrch_ == NULL) {
173         // This was not a good copy
174         m_strsrch_ = NULL;
175     }
176     else {
177         // Make a deep copy
178         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
179                                               m_pattern_.length(),
180                                               m_text_.getBuffer(),
181                                               m_text_.length(),
182                                               that.m_strsrch_->collator,
183                                              (UBreakIterator *)that.m_breakiterator_,
184                                               &status);
185         if (U_SUCCESS(status)) {
186             // Alias the collator
187             m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
188             // m_search_ has been created by the base SearchIterator class
189             m_search_        = m_strsrch_->search;
190         }
191     }
192 }
193 
~StringSearch()194 StringSearch::~StringSearch()
195 {
196     if (m_strsrch_ != NULL) {
197         usearch_close(m_strsrch_);
198         m_search_ = NULL;
199     }
200 }
201 
202 StringSearch *
clone() const203 StringSearch::clone() const {
204     return new StringSearch(*this);
205 }
206 
207 // operator overloading ---------------------------------------------
operator =(const StringSearch & that)208 StringSearch & StringSearch::operator=(const StringSearch &that)
209 {
210     if ((*this) != that) {
211         UErrorCode status = U_ZERO_ERROR;
212         m_text_          = that.m_text_;
213         m_breakiterator_ = that.m_breakiterator_;
214         m_pattern_       = that.m_pattern_;
215         // all m_search_ in the parent class is linked up with m_strsrch_
216         usearch_close(m_strsrch_);
217         m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
218                                               m_pattern_.length(),
219                                               m_text_.getBuffer(),
220                                               m_text_.length(),
221                                               that.m_strsrch_->collator,
222                                               NULL, &status);
223         // Check null pointer
224         if (m_strsrch_ != NULL) {
225 	        // Alias the collator
226 	        m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
227 	        m_search_ = m_strsrch_->search;
228         }
229     }
230     return *this;
231 }
232 
operator ==(const SearchIterator & that) const233 UBool StringSearch::operator==(const SearchIterator &that) const
234 {
235     if (this == &that) {
236         return TRUE;
237     }
238     if (SearchIterator::operator ==(that)) {
239         StringSearch &thatsrch = (StringSearch &)that;
240         return (this->m_pattern_ == thatsrch.m_pattern_ &&
241                 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
242     }
243     return FALSE;
244 }
245 
246 // public get and set methods ----------------------------------------
247 
setOffset(int32_t position,UErrorCode & status)248 void StringSearch::setOffset(int32_t position, UErrorCode &status)
249 {
250     // status checked in usearch_setOffset
251     usearch_setOffset(m_strsrch_, position, &status);
252 }
253 
getOffset(void) const254 int32_t StringSearch::getOffset(void) const
255 {
256     return usearch_getOffset(m_strsrch_);
257 }
258 
setText(const UnicodeString & text,UErrorCode & status)259 void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
260 {
261     if (U_SUCCESS(status)) {
262         m_text_ = text;
263         usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
264     }
265 }
266 
setText(CharacterIterator & text,UErrorCode & status)267 void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
268 {
269     if (U_SUCCESS(status)) {
270         text.getText(m_text_);
271         usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
272     }
273 }
274 
getCollator() const275 RuleBasedCollator * StringSearch::getCollator() const
276 {
277     return (RuleBasedCollator *)&m_collator_;
278 }
279 
setCollator(RuleBasedCollator * coll,UErrorCode & status)280 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
281 {
282     if (U_SUCCESS(status)) {
283         usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
284         // Alias the collator
285         m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
286     }
287 }
288 
setPattern(const UnicodeString & pattern,UErrorCode & status)289 void StringSearch::setPattern(const UnicodeString &pattern,
290                                     UErrorCode    &status)
291 {
292     if (U_SUCCESS(status)) {
293         m_pattern_ = pattern;
294         usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
295                            &status);
296     }
297 }
298 
getPattern() const299 const UnicodeString & StringSearch::getPattern() const
300 {
301     return m_pattern_;
302 }
303 
304 // public methods ----------------------------------------------------
305 
reset()306 void StringSearch::reset()
307 {
308     usearch_reset(m_strsrch_);
309 }
310 
safeClone(void) const311 SearchIterator * StringSearch::safeClone(void) const
312 {
313     UErrorCode status = U_ZERO_ERROR;
314     StringSearch *result = new StringSearch(m_pattern_, m_text_,
315                                             (RuleBasedCollator *)&m_collator_,
316                                             m_breakiterator_,
317                                             status);
318     /* test for NULL */
319     if (result == 0) {
320         status = U_MEMORY_ALLOCATION_ERROR;
321         return 0;
322     }
323     result->setOffset(getOffset(), status);
324     result->setMatchStart(m_strsrch_->search->matchedIndex);
325     result->setMatchLength(m_strsrch_->search->matchedLength);
326     if (U_FAILURE(status)) {
327         return NULL;
328     }
329     return result;
330 }
331 
332 // protected method -------------------------------------------------
333 
handleNext(int32_t position,UErrorCode & status)334 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
335 {
336     // values passed here are already in the pre-shift position
337     if (U_SUCCESS(status)) {
338         if (m_strsrch_->pattern.CELength == 0) {
339             m_search_->matchedIndex =
340                                     m_search_->matchedIndex == USEARCH_DONE ?
341                                     getOffset() : m_search_->matchedIndex + 1;
342             m_search_->matchedLength = 0;
343             ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
344                            &status);
345             if (m_search_->matchedIndex == m_search_->textLength) {
346                 m_search_->matchedIndex = USEARCH_DONE;
347             }
348         }
349         else {
350             // looking at usearch.cpp, this part is shifted out to
351             // StringSearch instead of SearchIterator because m_strsrch_ is
352             // not accessible in SearchIterator
353 #if 0
354             if (position + m_strsrch_->pattern.defaultShiftSize
355                 > m_search_->textLength) {
356                 setMatchNotFound();
357                 return USEARCH_DONE;
358             }
359 #endif
360             if (m_search_->matchedLength <= 0) {
361                 // the flipping direction issue has already been handled
362                 // in next()
363                 // for boundary check purposes. this will ensure that the
364                 // next match will not preceed the current offset
365                 // note search->matchedIndex will always be set to something
366                 // in the code
367                 m_search_->matchedIndex = position - 1;
368             }
369 
370             ucol_setOffset(m_strsrch_->textIter, position, &status);
371 
372 #if 0
373             for (;;) {
374                 if (m_search_->isCanonicalMatch) {
375                     // can't use exact here since extra accents are allowed.
376                     usearch_handleNextCanonical(m_strsrch_, &status);
377                 }
378                 else {
379                     usearch_handleNextExact(m_strsrch_, &status);
380                 }
381                 if (U_FAILURE(status)) {
382                     return USEARCH_DONE;
383                 }
384                 if (m_breakiterator_ == NULL
385 #if !UCONFIG_NO_BREAK_ITERATION
386                     ||
387                     m_search_->matchedIndex == USEARCH_DONE ||
388                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
389                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
390                                                   m_search_->matchedLength))
391 #endif
392                 ) {
393                     if (m_search_->matchedIndex == USEARCH_DONE) {
394                         ucol_setOffset(m_strsrch_->textIter,
395                                        m_search_->textLength, &status);
396                     }
397                     else {
398                         ucol_setOffset(m_strsrch_->textIter,
399                                        m_search_->matchedIndex, &status);
400                     }
401                     return m_search_->matchedIndex;
402                 }
403             }
404 #else
405             // if m_strsrch_->breakIter is always the same as m_breakiterator_
406             // then we don't need to check the match boundaries here because
407             // usearch_handleNextXXX will already have done it.
408             if (m_search_->isCanonicalMatch) {
409             	// *could* actually use exact here 'cause no extra accents allowed...
410             	usearch_handleNextCanonical(m_strsrch_, &status);
411             } else {
412             	usearch_handleNextExact(m_strsrch_, &status);
413             }
414 
415             if (U_FAILURE(status)) {
416             	return USEARCH_DONE;
417             }
418 
419             if (m_search_->matchedIndex == USEARCH_DONE) {
420             	ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
421             } else {
422             	ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
423             }
424 
425             return m_search_->matchedIndex;
426 #endif
427         }
428     }
429     return USEARCH_DONE;
430 }
431 
handlePrev(int32_t position,UErrorCode & status)432 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
433 {
434     // values passed here are already in the pre-shift position
435     if (U_SUCCESS(status)) {
436         if (m_strsrch_->pattern.CELength == 0) {
437             m_search_->matchedIndex =
438                   (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
439                    m_search_->matchedIndex);
440             if (m_search_->matchedIndex == 0) {
441                 setMatchNotFound();
442             }
443             else {
444                 m_search_->matchedIndex --;
445                 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
446                                &status);
447                 m_search_->matchedLength = 0;
448             }
449         }
450         else {
451             // looking at usearch.cpp, this part is shifted out to
452             // StringSearch instead of SearchIterator because m_strsrch_ is
453             // not accessible in SearchIterator
454 #if 0
455             if (!m_search_->isOverlap &&
456                 position - m_strsrch_->pattern.defaultShiftSize < 0) {
457                 setMatchNotFound();
458                 return USEARCH_DONE;
459             }
460 
461             for (;;) {
462                 if (m_search_->isCanonicalMatch) {
463                     // can't use exact here since extra accents are allowed.
464                     usearch_handlePreviousCanonical(m_strsrch_, &status);
465                 }
466                 else {
467                     usearch_handlePreviousExact(m_strsrch_, &status);
468                 }
469                 if (U_FAILURE(status)) {
470                     return USEARCH_DONE;
471                 }
472                 if (m_breakiterator_ == NULL
473 #if !UCONFIG_NO_BREAK_ITERATION
474                     ||
475                     m_search_->matchedIndex == USEARCH_DONE ||
476                     (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
477                      m_breakiterator_->isBoundary(m_search_->matchedIndex +
478                                                   m_search_->matchedLength))
479 #endif
480                 ) {
481                     return m_search_->matchedIndex;
482                 }
483             }
484 #else
485             ucol_setOffset(m_strsrch_->textIter, position, &status);
486 
487             if (m_search_->isCanonicalMatch) {
488             	// *could* use exact match here since extra accents *not* allowed!
489             	usearch_handlePreviousCanonical(m_strsrch_, &status);
490             } else {
491             	usearch_handlePreviousExact(m_strsrch_, &status);
492             }
493 
494             if (U_FAILURE(status)) {
495             	return USEARCH_DONE;
496             }
497 
498             return m_search_->matchedIndex;
499 #endif
500         }
501 
502         return m_search_->matchedIndex;
503     }
504     return USEARCH_DONE;
505 }
506 
507 U_NAMESPACE_END
508 
509 #endif /* #if !UCONFIG_NO_COLLATION */
510