1 // Copyright 2011 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <stdint.h>
6
7 #include "base/i18n/string_search.h"
8
9 #include "base/check.h"
10 #include "base/check_op.h"
11 #include "third_party/icu/source/i18n/unicode/usearch.h"
12
13 namespace base {
14 namespace i18n {
15
FixedPatternStringSearch(const std::u16string & find_this,bool case_sensitive)16 FixedPatternStringSearch::FixedPatternStringSearch(
17 const std::u16string& find_this,
18 bool case_sensitive)
19 : find_this_(find_this) {
20 // usearch_open requires a valid string argument to be searched, even if we
21 // want to set it by usearch_setText afterwards. So, supplying a dummy text.
22 const std::u16string& dummy = find_this_;
23
24 UErrorCode status = U_ZERO_ERROR;
25 search_ = usearch_open(find_this_.data(), find_this_.size(), dummy.data(),
26 dummy.size(), uloc_getDefault(),
27 nullptr, // breakiter
28 &status);
29 if (U_SUCCESS(status)) {
30 // http://icu-project.org/apiref/icu4c40/ucol_8h.html#6a967f36248b0a1bc7654f538ee8ba96
31 // Set comparison level to UCOL_PRIMARY to ignore secondary and tertiary
32 // differences. Set comparison level to UCOL_TERTIARY to include all
33 // comparison differences.
34 // Diacritical differences on the same base letter represent a
35 // secondary difference.
36 // Uppercase and lowercase versions of the same character represents a
37 // tertiary difference.
38 UCollator* collator = usearch_getCollator(search_);
39 ucol_setStrength(collator, case_sensitive ? UCOL_TERTIARY : UCOL_PRIMARY);
40 usearch_reset(search_);
41 }
42 }
43
~FixedPatternStringSearch()44 FixedPatternStringSearch::~FixedPatternStringSearch() {
45 if (search_)
46 usearch_close(search_.ExtractAsDangling());
47 }
48
Search(const std::u16string & in_this,size_t * match_index,size_t * match_length,bool forward_search)49 bool FixedPatternStringSearch::Search(const std::u16string& in_this,
50 size_t* match_index,
51 size_t* match_length,
52 bool forward_search) {
53 UErrorCode status = U_ZERO_ERROR;
54 usearch_setText(search_, in_this.data(), in_this.size(), &status);
55
56 // Default to basic substring search if usearch fails. According to
57 // http://icu-project.org/apiref/icu4c/usearch_8h.html, usearch_open will fail
58 // if either |find_this| or |in_this| are empty. In either case basic
59 // substring search will give the correct return value.
60 if (!U_SUCCESS(status)) {
61 size_t index = in_this.find(find_this_);
62 if (index == std::u16string::npos)
63 return false;
64 if (match_index)
65 *match_index = index;
66 if (match_length)
67 *match_length = find_this_.size();
68 return true;
69 }
70
71 int32_t index = forward_search ? usearch_first(search_, &status)
72 : usearch_last(search_, &status);
73 if (!U_SUCCESS(status) || index == USEARCH_DONE)
74 return false;
75 if (match_index)
76 *match_index = static_cast<size_t>(index);
77 if (match_length)
78 *match_length = static_cast<size_t>(usearch_getMatchedLength(search_));
79 return true;
80 }
81
82 FixedPatternStringSearchIgnoringCaseAndAccents::
FixedPatternStringSearchIgnoringCaseAndAccents(const std::u16string & find_this)83 FixedPatternStringSearchIgnoringCaseAndAccents(
84 const std::u16string& find_this)
85 : base_search_(find_this, /*case_sensitive=*/false) {}
86
Search(const std::u16string & in_this,size_t * match_index,size_t * match_length)87 bool FixedPatternStringSearchIgnoringCaseAndAccents::Search(
88 const std::u16string& in_this,
89 size_t* match_index,
90 size_t* match_length) {
91 return base_search_.Search(in_this, match_index, match_length,
92 /*forward_search=*/true);
93 }
94
StringSearchIgnoringCaseAndAccents(const std::u16string & find_this,const std::u16string & in_this,size_t * match_index,size_t * match_length)95 bool StringSearchIgnoringCaseAndAccents(const std::u16string& find_this,
96 const std::u16string& in_this,
97 size_t* match_index,
98 size_t* match_length) {
99 return FixedPatternStringSearchIgnoringCaseAndAccents(find_this).Search(
100 in_this, match_index, match_length);
101 }
102
StringSearch(const std::u16string & find_this,const std::u16string & in_this,size_t * match_index,size_t * match_length,bool case_sensitive,bool forward_search)103 bool StringSearch(const std::u16string& find_this,
104 const std::u16string& in_this,
105 size_t* match_index,
106 size_t* match_length,
107 bool case_sensitive,
108 bool forward_search) {
109 return FixedPatternStringSearch(find_this, case_sensitive)
110 .Search(in_this, match_index, match_length, forward_search);
111 }
112
RepeatingStringSearch(const std::u16string & find_this,const std::u16string & in_this,bool case_sensitive)113 RepeatingStringSearch::RepeatingStringSearch(const std::u16string& find_this,
114 const std::u16string& in_this,
115 bool case_sensitive)
116 : find_this_(find_this), in_this_(in_this) {
117 std::string locale = uloc_getDefault();
118 UErrorCode status = U_ZERO_ERROR;
119 search_ = usearch_open(find_this_.data(), find_this_.size(), in_this_.data(),
120 in_this_.size(), locale.data(), /*breakiter=*/nullptr,
121 &status);
122 DCHECK(U_SUCCESS(status));
123 if (U_SUCCESS(status)) {
124 // http://icu-project.org/apiref/icu4c40/ucol_8h.html#6a967f36248b0a1bc7654f538ee8ba96
125 // Set comparison level to UCOL_PRIMARY to ignore secondary and tertiary
126 // differences. Set comparison level to UCOL_TERTIARY to include all
127 // comparison differences.
128 // Diacritical differences on the same base letter represent a
129 // secondary difference.
130 // Uppercase and lowercase versions of the same character represents a
131 // tertiary difference.
132 UCollator* collator = usearch_getCollator(search_);
133 ucol_setStrength(collator, case_sensitive ? UCOL_TERTIARY : UCOL_PRIMARY);
134 usearch_reset(search_);
135 }
136 }
137
~RepeatingStringSearch()138 RepeatingStringSearch::~RepeatingStringSearch() {
139 if (search_)
140 usearch_close(search_.ExtractAsDangling());
141 }
142
NextMatchResult(int & match_index,int & match_length)143 bool RepeatingStringSearch::NextMatchResult(int& match_index,
144 int& match_length) {
145 UErrorCode status = U_ZERO_ERROR;
146 const int match_start = usearch_next(search_, &status);
147 if (U_FAILURE(status) || match_start == USEARCH_DONE)
148 return false;
149 DCHECK(U_SUCCESS(status));
150 match_index = match_start;
151 match_length = usearch_getMatchedLength(search_);
152 return true;
153 }
154
155 } // namespace i18n
156 } // namespace base
157