• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <stdint.h>
6 
7 #include "base/i18n/string_search.h"
8 
9 #include "base/check.h"
10 #include "base/check_op.h"
11 #include "third_party/icu/source/i18n/unicode/usearch.h"
12 
13 namespace base {
14 namespace i18n {
15 
FixedPatternStringSearch(const std::u16string & find_this,bool case_sensitive)16 FixedPatternStringSearch::FixedPatternStringSearch(
17     const std::u16string& find_this,
18     bool case_sensitive)
19     : find_this_(find_this) {
20   // usearch_open requires a valid string argument to be searched, even if we
21   // want to set it by usearch_setText afterwards. So, supplying a dummy text.
22   const std::u16string& dummy = find_this_;
23 
24   UErrorCode status = U_ZERO_ERROR;
25   search_ = usearch_open(find_this_.data(), find_this_.size(), dummy.data(),
26                          dummy.size(), uloc_getDefault(),
27                          nullptr,  // breakiter
28                          &status);
29   if (U_SUCCESS(status)) {
30     // http://icu-project.org/apiref/icu4c40/ucol_8h.html#6a967f36248b0a1bc7654f538ee8ba96
31     // Set comparison level to UCOL_PRIMARY to ignore secondary and tertiary
32     // differences. Set comparison level to UCOL_TERTIARY to include all
33     // comparison differences.
34     // Diacritical differences on the same base letter represent a
35     // secondary difference.
36     // Uppercase and lowercase versions of the same character represents a
37     // tertiary difference.
38     UCollator* collator = usearch_getCollator(search_);
39     ucol_setStrength(collator, case_sensitive ? UCOL_TERTIARY : UCOL_PRIMARY);
40     usearch_reset(search_);
41   }
42 }
43 
~FixedPatternStringSearch()44 FixedPatternStringSearch::~FixedPatternStringSearch() {
45   if (search_)
46     usearch_close(search_.ExtractAsDangling());
47 }
48 
Search(const std::u16string & in_this,size_t * match_index,size_t * match_length,bool forward_search)49 bool FixedPatternStringSearch::Search(const std::u16string& in_this,
50                                       size_t* match_index,
51                                       size_t* match_length,
52                                       bool forward_search) {
53   UErrorCode status = U_ZERO_ERROR;
54   usearch_setText(search_, in_this.data(), in_this.size(), &status);
55 
56   // Default to basic substring search if usearch fails. According to
57   // http://icu-project.org/apiref/icu4c/usearch_8h.html, usearch_open will fail
58   // if either |find_this| or |in_this| are empty. In either case basic
59   // substring search will give the correct return value.
60   if (!U_SUCCESS(status)) {
61     size_t index = in_this.find(find_this_);
62     if (index == std::u16string::npos)
63       return false;
64     if (match_index)
65       *match_index = index;
66     if (match_length)
67       *match_length = find_this_.size();
68     return true;
69   }
70 
71   int32_t index = forward_search ? usearch_first(search_, &status)
72                                  : usearch_last(search_, &status);
73   if (!U_SUCCESS(status) || index == USEARCH_DONE)
74     return false;
75   if (match_index)
76     *match_index = static_cast<size_t>(index);
77   if (match_length)
78     *match_length = static_cast<size_t>(usearch_getMatchedLength(search_));
79   return true;
80 }
81 
82 FixedPatternStringSearchIgnoringCaseAndAccents::
FixedPatternStringSearchIgnoringCaseAndAccents(const std::u16string & find_this)83     FixedPatternStringSearchIgnoringCaseAndAccents(
84         const std::u16string& find_this)
85     : base_search_(find_this, /*case_sensitive=*/false) {}
86 
Search(const std::u16string & in_this,size_t * match_index,size_t * match_length)87 bool FixedPatternStringSearchIgnoringCaseAndAccents::Search(
88     const std::u16string& in_this,
89     size_t* match_index,
90     size_t* match_length) {
91   return base_search_.Search(in_this, match_index, match_length,
92                              /*forward_search=*/true);
93 }
94 
StringSearchIgnoringCaseAndAccents(const std::u16string & find_this,const std::u16string & in_this,size_t * match_index,size_t * match_length)95 bool StringSearchIgnoringCaseAndAccents(const std::u16string& find_this,
96                                         const std::u16string& in_this,
97                                         size_t* match_index,
98                                         size_t* match_length) {
99   return FixedPatternStringSearchIgnoringCaseAndAccents(find_this).Search(
100       in_this, match_index, match_length);
101 }
102 
StringSearch(const std::u16string & find_this,const std::u16string & in_this,size_t * match_index,size_t * match_length,bool case_sensitive,bool forward_search)103 bool StringSearch(const std::u16string& find_this,
104                   const std::u16string& in_this,
105                   size_t* match_index,
106                   size_t* match_length,
107                   bool case_sensitive,
108                   bool forward_search) {
109   return FixedPatternStringSearch(find_this, case_sensitive)
110       .Search(in_this, match_index, match_length, forward_search);
111 }
112 
RepeatingStringSearch(const std::u16string & find_this,const std::u16string & in_this,bool case_sensitive)113 RepeatingStringSearch::RepeatingStringSearch(const std::u16string& find_this,
114                                              const std::u16string& in_this,
115                                              bool case_sensitive)
116     : find_this_(find_this), in_this_(in_this) {
117   std::string locale = uloc_getDefault();
118   UErrorCode status = U_ZERO_ERROR;
119   search_ = usearch_open(find_this_.data(), find_this_.size(), in_this_.data(),
120                          in_this_.size(), locale.data(), /*breakiter=*/nullptr,
121                          &status);
122   DCHECK(U_SUCCESS(status));
123   if (U_SUCCESS(status)) {
124     // http://icu-project.org/apiref/icu4c40/ucol_8h.html#6a967f36248b0a1bc7654f538ee8ba96
125     // Set comparison level to UCOL_PRIMARY to ignore secondary and tertiary
126     // differences. Set comparison level to UCOL_TERTIARY to include all
127     // comparison differences.
128     // Diacritical differences on the same base letter represent a
129     // secondary difference.
130     // Uppercase and lowercase versions of the same character represents a
131     // tertiary difference.
132     UCollator* collator = usearch_getCollator(search_);
133     ucol_setStrength(collator, case_sensitive ? UCOL_TERTIARY : UCOL_PRIMARY);
134     usearch_reset(search_);
135   }
136 }
137 
~RepeatingStringSearch()138 RepeatingStringSearch::~RepeatingStringSearch() {
139   if (search_)
140     usearch_close(search_.ExtractAsDangling());
141 }
142 
NextMatchResult(int & match_index,int & match_length)143 bool RepeatingStringSearch::NextMatchResult(int& match_index,
144                                             int& match_length) {
145   UErrorCode status = U_ZERO_ERROR;
146   const int match_start = usearch_next(search_, &status);
147   if (U_FAILURE(status) || match_start == USEARCH_DONE)
148     return false;
149   DCHECK(U_SUCCESS(status));
150   match_index = match_start;
151   match_length = usearch_getMatchedLength(search_);
152   return true;
153 }
154 
155 }  // namespace i18n
156 }  // namespace base
157