1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/i18n/break_iterator.h"
6
7 #include "base/string_piece.h"
8 #include "base/string_util.h"
9 #include "base/utf_string_conversions.h"
10 #include "testing/gtest/include/gtest/gtest.h"
11
TEST(BreakIteratorTest,BreakWordEmpty)12 TEST(BreakIteratorTest, BreakWordEmpty) {
13 string16 empty;
14 base::BreakIterator iter(&empty, base::BreakIterator::BREAK_WORD);
15 ASSERT_TRUE(iter.Init());
16 EXPECT_FALSE(iter.Advance());
17 EXPECT_FALSE(iter.IsWord());
18 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
19 EXPECT_FALSE(iter.IsWord());
20 }
21
TEST(BreakIteratorTest,BreakWord)22 TEST(BreakIteratorTest, BreakWord) {
23 string16 space(UTF8ToUTF16(" "));
24 string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
25 base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
26 ASSERT_TRUE(iter.Init());
27 EXPECT_TRUE(iter.Advance());
28 EXPECT_FALSE(iter.IsWord());
29 EXPECT_EQ(space, iter.GetString());
30 EXPECT_TRUE(iter.Advance());
31 EXPECT_TRUE(iter.IsWord());
32 EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetString());
33 EXPECT_TRUE(iter.Advance());
34 EXPECT_FALSE(iter.IsWord());
35 EXPECT_EQ(space, iter.GetString());
36 EXPECT_TRUE(iter.Advance());
37 EXPECT_TRUE(iter.IsWord());
38 EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetString());
39 EXPECT_TRUE(iter.Advance());
40 EXPECT_FALSE(iter.IsWord());
41 EXPECT_EQ(UTF8ToUTF16("!"), iter.GetString());
42 EXPECT_TRUE(iter.Advance());
43 EXPECT_FALSE(iter.IsWord());
44 EXPECT_EQ(space, iter.GetString());
45 EXPECT_TRUE(iter.Advance());
46 EXPECT_FALSE(iter.IsWord());
47 EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetString());
48 EXPECT_TRUE(iter.Advance());
49 EXPECT_TRUE(iter.IsWord());
50 EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetString());
51 EXPECT_TRUE(iter.Advance());
52 EXPECT_FALSE(iter.IsWord());
53 EXPECT_EQ(space, iter.GetString());
54 EXPECT_TRUE(iter.Advance());
55 EXPECT_TRUE(iter.IsWord());
56 EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString());
57 EXPECT_FALSE(iter.Advance());
58 EXPECT_FALSE(iter.IsWord());
59 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
60 EXPECT_FALSE(iter.IsWord());
61 }
62
TEST(BreakIteratorTest,BreakWide16)63 TEST(BreakIteratorTest, BreakWide16) {
64 // Two greek words separated by space.
65 const string16 str(WideToUTF16(
66 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
67 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2"));
68 const string16 word1(str.substr(0, 10));
69 const string16 word2(str.substr(11, 5));
70 base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
71 ASSERT_TRUE(iter.Init());
72 EXPECT_TRUE(iter.Advance());
73 EXPECT_TRUE(iter.IsWord());
74 EXPECT_EQ(word1, iter.GetString());
75 EXPECT_TRUE(iter.Advance());
76 EXPECT_FALSE(iter.IsWord());
77 EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
78 EXPECT_TRUE(iter.Advance());
79 EXPECT_TRUE(iter.IsWord());
80 EXPECT_EQ(word2, iter.GetString());
81 EXPECT_FALSE(iter.Advance());
82 EXPECT_FALSE(iter.IsWord());
83 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
84 EXPECT_FALSE(iter.IsWord());
85 }
86
TEST(BreakIteratorTest,BreakWide32)87 TEST(BreakIteratorTest, BreakWide32) {
88 // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
89 const char* very_wide_char = "\xF0\x9D\x92\x9C";
90 const string16 str(
91 UTF8ToUTF16(StringPrintf("%s a", very_wide_char)));
92 const string16 very_wide_word(str.substr(0, 2));
93
94 base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
95 ASSERT_TRUE(iter.Init());
96 EXPECT_TRUE(iter.Advance());
97 EXPECT_TRUE(iter.IsWord());
98 EXPECT_EQ(very_wide_word, iter.GetString());
99 EXPECT_TRUE(iter.Advance());
100 EXPECT_FALSE(iter.IsWord());
101 EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
102 EXPECT_TRUE(iter.Advance());
103 EXPECT_TRUE(iter.IsWord());
104 EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
105 EXPECT_FALSE(iter.Advance());
106 EXPECT_FALSE(iter.IsWord());
107 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
108 EXPECT_FALSE(iter.IsWord());
109 }
110
TEST(BreakIteratorTest,BreakSpaceEmpty)111 TEST(BreakIteratorTest, BreakSpaceEmpty) {
112 string16 empty;
113 base::BreakIterator iter(&empty, base::BreakIterator::BREAK_SPACE);
114 ASSERT_TRUE(iter.Init());
115 EXPECT_FALSE(iter.Advance());
116 EXPECT_FALSE(iter.IsWord());
117 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
118 EXPECT_FALSE(iter.IsWord());
119 }
120
TEST(BreakIteratorTest,BreakSpace)121 TEST(BreakIteratorTest, BreakSpace) {
122 string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
123 base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
124 ASSERT_TRUE(iter.Init());
125 EXPECT_TRUE(iter.Advance());
126 EXPECT_FALSE(iter.IsWord());
127 EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
128 EXPECT_TRUE(iter.Advance());
129 EXPECT_FALSE(iter.IsWord());
130 EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString());
131 EXPECT_TRUE(iter.Advance());
132 EXPECT_FALSE(iter.IsWord());
133 EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString());
134 EXPECT_TRUE(iter.Advance());
135 EXPECT_FALSE(iter.IsWord());
136 EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString());
137 EXPECT_TRUE(iter.Advance());
138 EXPECT_FALSE(iter.IsWord());
139 EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString());
140 EXPECT_FALSE(iter.Advance());
141 EXPECT_FALSE(iter.IsWord());
142 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
143 EXPECT_FALSE(iter.IsWord());
144 }
145
TEST(BreakIteratorTest,BreakSpaceSP)146 TEST(BreakIteratorTest, BreakSpaceSP) {
147 string16 str(UTF8ToUTF16(" foo bar! \npouet boom "));
148 base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
149 ASSERT_TRUE(iter.Init());
150 EXPECT_TRUE(iter.Advance());
151 EXPECT_FALSE(iter.IsWord());
152 EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
153 EXPECT_TRUE(iter.Advance());
154 EXPECT_FALSE(iter.IsWord());
155 EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString());
156 EXPECT_TRUE(iter.Advance());
157 EXPECT_FALSE(iter.IsWord());
158 EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString());
159 EXPECT_TRUE(iter.Advance());
160 EXPECT_FALSE(iter.IsWord());
161 EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString());
162 EXPECT_TRUE(iter.Advance());
163 EXPECT_FALSE(iter.IsWord());
164 EXPECT_EQ(UTF8ToUTF16("boom "), iter.GetString());
165 EXPECT_FALSE(iter.Advance());
166 EXPECT_FALSE(iter.IsWord());
167 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
168 EXPECT_FALSE(iter.IsWord());
169 }
170
TEST(BreakIteratorTest,BreakSpacekWide16)171 TEST(BreakIteratorTest, BreakSpacekWide16) {
172 // Two Greek words.
173 const string16 str(WideToUTF16(
174 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
175 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2"));
176 const string16 word1(str.substr(0, 11));
177 const string16 word2(str.substr(11, 5));
178 base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
179 ASSERT_TRUE(iter.Init());
180 EXPECT_TRUE(iter.Advance());
181 EXPECT_FALSE(iter.IsWord());
182 EXPECT_EQ(word1, iter.GetString());
183 EXPECT_TRUE(iter.Advance());
184 EXPECT_FALSE(iter.IsWord());
185 EXPECT_EQ(word2, iter.GetString());
186 EXPECT_FALSE(iter.Advance());
187 EXPECT_FALSE(iter.IsWord());
188 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
189 EXPECT_FALSE(iter.IsWord());
190 }
191
TEST(BreakIteratorTest,BreakSpaceWide32)192 TEST(BreakIteratorTest, BreakSpaceWide32) {
193 // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
194 const char* very_wide_char = "\xF0\x9D\x92\x9C";
195 const string16 str(
196 UTF8ToUTF16(StringPrintf("%s a", very_wide_char)));
197 const string16 very_wide_word(str.substr(0, 3));
198
199 base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
200 ASSERT_TRUE(iter.Init());
201 EXPECT_TRUE(iter.Advance());
202 EXPECT_FALSE(iter.IsWord());
203 EXPECT_EQ(very_wide_word, iter.GetString());
204 EXPECT_TRUE(iter.Advance());
205 EXPECT_FALSE(iter.IsWord());
206 EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
207 EXPECT_FALSE(iter.Advance());
208 EXPECT_FALSE(iter.IsWord());
209 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
210 EXPECT_FALSE(iter.IsWord());
211 }
212
TEST(BreakIteratorTest,BreakLineEmpty)213 TEST(BreakIteratorTest, BreakLineEmpty) {
214 string16 empty;
215 base::BreakIterator iter(&empty, base::BreakIterator::BREAK_NEWLINE);
216 ASSERT_TRUE(iter.Init());
217 EXPECT_FALSE(iter.Advance());
218 EXPECT_FALSE(iter.IsWord());
219 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
220 EXPECT_FALSE(iter.IsWord());
221 }
222
TEST(BreakIteratorTest,BreakLine)223 TEST(BreakIteratorTest, BreakLine) {
224 string16 nl(UTF8ToUTF16("\n"));
225 string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom"));
226 base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
227 ASSERT_TRUE(iter.Init());
228 EXPECT_TRUE(iter.Advance());
229 EXPECT_FALSE(iter.IsWord());
230 EXPECT_EQ(nl, iter.GetString());
231 EXPECT_TRUE(iter.Advance());
232 EXPECT_FALSE(iter.IsWord());
233 EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString());
234 EXPECT_TRUE(iter.Advance());
235 EXPECT_FALSE(iter.IsWord());
236 EXPECT_EQ(nl, iter.GetString());
237 EXPECT_TRUE(iter.Advance());
238 EXPECT_FALSE(iter.IsWord());
239 EXPECT_EQ(UTF8ToUTF16("pouet boom"), iter.GetString());
240 EXPECT_FALSE(iter.Advance());
241 EXPECT_FALSE(iter.IsWord());
242 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
243 EXPECT_FALSE(iter.IsWord());
244 }
245
TEST(BreakIteratorTest,BreakLineNL)246 TEST(BreakIteratorTest, BreakLineNL) {
247 string16 nl(UTF8ToUTF16("\n"));
248 string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom\n"));
249 base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
250 ASSERT_TRUE(iter.Init());
251 EXPECT_TRUE(iter.Advance());
252 EXPECT_FALSE(iter.IsWord());
253 EXPECT_EQ(nl, iter.GetString());
254 EXPECT_TRUE(iter.Advance());
255 EXPECT_FALSE(iter.IsWord());
256 EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString());
257 EXPECT_TRUE(iter.Advance());
258 EXPECT_FALSE(iter.IsWord());
259 EXPECT_EQ(nl, iter.GetString());
260 EXPECT_TRUE(iter.Advance());
261 EXPECT_FALSE(iter.IsWord());
262 EXPECT_EQ(UTF8ToUTF16("pouet boom\n"), iter.GetString());
263 EXPECT_FALSE(iter.Advance());
264 EXPECT_FALSE(iter.IsWord());
265 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
266 EXPECT_FALSE(iter.IsWord());
267 }
268
TEST(BreakIteratorTest,BreakLineWide16)269 TEST(BreakIteratorTest, BreakLineWide16) {
270 // Two Greek words separated by newline.
271 const string16 str(WideToUTF16(
272 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
273 L"\x03bf\x03c2\x000a\x0399\x03c3\x03c4\x03cc\x03c2"));
274 const string16 line1(str.substr(0, 11));
275 const string16 line2(str.substr(11, 5));
276 base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
277 ASSERT_TRUE(iter.Init());
278 EXPECT_TRUE(iter.Advance());
279 EXPECT_FALSE(iter.IsWord());
280 EXPECT_EQ(line1, iter.GetString());
281 EXPECT_TRUE(iter.Advance());
282 EXPECT_FALSE(iter.IsWord());
283 EXPECT_EQ(line2, iter.GetString());
284 EXPECT_FALSE(iter.Advance());
285 EXPECT_FALSE(iter.IsWord());
286 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
287 EXPECT_FALSE(iter.IsWord());
288 }
289
TEST(BreakIteratorTest,BreakLineWide32)290 TEST(BreakIteratorTest, BreakLineWide32) {
291 // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
292 const char* very_wide_char = "\xF0\x9D\x92\x9C";
293 const string16 str(
294 UTF8ToUTF16(StringPrintf("%s\na", very_wide_char)));
295 const string16 very_wide_line(str.substr(0, 3));
296 base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
297 ASSERT_TRUE(iter.Init());
298 EXPECT_TRUE(iter.Advance());
299 EXPECT_FALSE(iter.IsWord());
300 EXPECT_EQ(very_wide_line, iter.GetString());
301 EXPECT_TRUE(iter.Advance());
302 EXPECT_FALSE(iter.IsWord());
303 EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
304 EXPECT_FALSE(iter.Advance());
305 EXPECT_FALSE(iter.IsWord());
306 EXPECT_FALSE(iter.Advance()); // Test unexpected advance after end.
307 EXPECT_FALSE(iter.IsWord());
308 }
309