• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/break_iterator.h"
6 
7 #include "base/string_piece.h"
8 #include "base/string_util.h"
9 #include "base/utf_string_conversions.h"
10 #include "testing/gtest/include/gtest/gtest.h"
11 
TEST(BreakIteratorTest,BreakWordEmpty)12 TEST(BreakIteratorTest, BreakWordEmpty) {
13   string16 empty;
14   base::BreakIterator iter(&empty, base::BreakIterator::BREAK_WORD);
15   ASSERT_TRUE(iter.Init());
16   EXPECT_FALSE(iter.Advance());
17   EXPECT_FALSE(iter.IsWord());
18   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
19   EXPECT_FALSE(iter.IsWord());
20 }
21 
TEST(BreakIteratorTest,BreakWord)22 TEST(BreakIteratorTest, BreakWord) {
23   string16 space(UTF8ToUTF16(" "));
24   string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
25   base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
26   ASSERT_TRUE(iter.Init());
27   EXPECT_TRUE(iter.Advance());
28   EXPECT_FALSE(iter.IsWord());
29   EXPECT_EQ(space, iter.GetString());
30   EXPECT_TRUE(iter.Advance());
31   EXPECT_TRUE(iter.IsWord());
32   EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetString());
33   EXPECT_TRUE(iter.Advance());
34   EXPECT_FALSE(iter.IsWord());
35   EXPECT_EQ(space, iter.GetString());
36   EXPECT_TRUE(iter.Advance());
37   EXPECT_TRUE(iter.IsWord());
38   EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetString());
39   EXPECT_TRUE(iter.Advance());
40   EXPECT_FALSE(iter.IsWord());
41   EXPECT_EQ(UTF8ToUTF16("!"), iter.GetString());
42   EXPECT_TRUE(iter.Advance());
43   EXPECT_FALSE(iter.IsWord());
44   EXPECT_EQ(space, iter.GetString());
45   EXPECT_TRUE(iter.Advance());
46   EXPECT_FALSE(iter.IsWord());
47   EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetString());
48   EXPECT_TRUE(iter.Advance());
49   EXPECT_TRUE(iter.IsWord());
50   EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetString());
51   EXPECT_TRUE(iter.Advance());
52   EXPECT_FALSE(iter.IsWord());
53   EXPECT_EQ(space, iter.GetString());
54   EXPECT_TRUE(iter.Advance());
55   EXPECT_TRUE(iter.IsWord());
56   EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString());
57   EXPECT_FALSE(iter.Advance());
58   EXPECT_FALSE(iter.IsWord());
59   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
60   EXPECT_FALSE(iter.IsWord());
61 }
62 
TEST(BreakIteratorTest,BreakWide16)63 TEST(BreakIteratorTest, BreakWide16) {
64   // Two greek words separated by space.
65   const string16 str(WideToUTF16(
66       L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
67       L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2"));
68   const string16 word1(str.substr(0, 10));
69   const string16 word2(str.substr(11, 5));
70   base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
71   ASSERT_TRUE(iter.Init());
72   EXPECT_TRUE(iter.Advance());
73   EXPECT_TRUE(iter.IsWord());
74   EXPECT_EQ(word1, iter.GetString());
75   EXPECT_TRUE(iter.Advance());
76   EXPECT_FALSE(iter.IsWord());
77   EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
78   EXPECT_TRUE(iter.Advance());
79   EXPECT_TRUE(iter.IsWord());
80   EXPECT_EQ(word2, iter.GetString());
81   EXPECT_FALSE(iter.Advance());
82   EXPECT_FALSE(iter.IsWord());
83   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
84   EXPECT_FALSE(iter.IsWord());
85 }
86 
TEST(BreakIteratorTest,BreakWide32)87 TEST(BreakIteratorTest, BreakWide32) {
88   // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
89   const char* very_wide_char = "\xF0\x9D\x92\x9C";
90   const string16 str(
91       UTF8ToUTF16(StringPrintf("%s a", very_wide_char)));
92   const string16 very_wide_word(str.substr(0, 2));
93 
94   base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
95   ASSERT_TRUE(iter.Init());
96   EXPECT_TRUE(iter.Advance());
97   EXPECT_TRUE(iter.IsWord());
98   EXPECT_EQ(very_wide_word, iter.GetString());
99   EXPECT_TRUE(iter.Advance());
100   EXPECT_FALSE(iter.IsWord());
101   EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
102   EXPECT_TRUE(iter.Advance());
103   EXPECT_TRUE(iter.IsWord());
104   EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
105   EXPECT_FALSE(iter.Advance());
106   EXPECT_FALSE(iter.IsWord());
107   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
108   EXPECT_FALSE(iter.IsWord());
109 }
110 
TEST(BreakIteratorTest,BreakSpaceEmpty)111 TEST(BreakIteratorTest, BreakSpaceEmpty) {
112   string16 empty;
113   base::BreakIterator iter(&empty, base::BreakIterator::BREAK_SPACE);
114   ASSERT_TRUE(iter.Init());
115   EXPECT_FALSE(iter.Advance());
116   EXPECT_FALSE(iter.IsWord());
117   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
118   EXPECT_FALSE(iter.IsWord());
119 }
120 
TEST(BreakIteratorTest,BreakSpace)121 TEST(BreakIteratorTest, BreakSpace) {
122   string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
123   base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
124   ASSERT_TRUE(iter.Init());
125   EXPECT_TRUE(iter.Advance());
126   EXPECT_FALSE(iter.IsWord());
127   EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
128   EXPECT_TRUE(iter.Advance());
129   EXPECT_FALSE(iter.IsWord());
130   EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString());
131   EXPECT_TRUE(iter.Advance());
132   EXPECT_FALSE(iter.IsWord());
133   EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString());
134   EXPECT_TRUE(iter.Advance());
135   EXPECT_FALSE(iter.IsWord());
136   EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString());
137   EXPECT_TRUE(iter.Advance());
138   EXPECT_FALSE(iter.IsWord());
139   EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString());
140   EXPECT_FALSE(iter.Advance());
141   EXPECT_FALSE(iter.IsWord());
142   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
143   EXPECT_FALSE(iter.IsWord());
144 }
145 
TEST(BreakIteratorTest,BreakSpaceSP)146 TEST(BreakIteratorTest, BreakSpaceSP) {
147   string16 str(UTF8ToUTF16(" foo bar! \npouet boom "));
148   base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
149   ASSERT_TRUE(iter.Init());
150   EXPECT_TRUE(iter.Advance());
151   EXPECT_FALSE(iter.IsWord());
152   EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
153   EXPECT_TRUE(iter.Advance());
154   EXPECT_FALSE(iter.IsWord());
155   EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString());
156   EXPECT_TRUE(iter.Advance());
157   EXPECT_FALSE(iter.IsWord());
158   EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString());
159   EXPECT_TRUE(iter.Advance());
160   EXPECT_FALSE(iter.IsWord());
161   EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString());
162   EXPECT_TRUE(iter.Advance());
163   EXPECT_FALSE(iter.IsWord());
164   EXPECT_EQ(UTF8ToUTF16("boom "), iter.GetString());
165   EXPECT_FALSE(iter.Advance());
166   EXPECT_FALSE(iter.IsWord());
167   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
168   EXPECT_FALSE(iter.IsWord());
169 }
170 
TEST(BreakIteratorTest,BreakSpacekWide16)171 TEST(BreakIteratorTest, BreakSpacekWide16) {
172   // Two Greek words.
173   const string16 str(WideToUTF16(
174       L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
175       L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2"));
176   const string16 word1(str.substr(0, 11));
177   const string16 word2(str.substr(11, 5));
178   base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
179   ASSERT_TRUE(iter.Init());
180   EXPECT_TRUE(iter.Advance());
181   EXPECT_FALSE(iter.IsWord());
182   EXPECT_EQ(word1, iter.GetString());
183   EXPECT_TRUE(iter.Advance());
184   EXPECT_FALSE(iter.IsWord());
185   EXPECT_EQ(word2, iter.GetString());
186   EXPECT_FALSE(iter.Advance());
187   EXPECT_FALSE(iter.IsWord());
188   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
189   EXPECT_FALSE(iter.IsWord());
190 }
191 
TEST(BreakIteratorTest,BreakSpaceWide32)192 TEST(BreakIteratorTest, BreakSpaceWide32) {
193   // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
194   const char* very_wide_char = "\xF0\x9D\x92\x9C";
195   const string16 str(
196       UTF8ToUTF16(StringPrintf("%s a", very_wide_char)));
197   const string16 very_wide_word(str.substr(0, 3));
198 
199   base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
200   ASSERT_TRUE(iter.Init());
201   EXPECT_TRUE(iter.Advance());
202   EXPECT_FALSE(iter.IsWord());
203   EXPECT_EQ(very_wide_word, iter.GetString());
204   EXPECT_TRUE(iter.Advance());
205   EXPECT_FALSE(iter.IsWord());
206   EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
207   EXPECT_FALSE(iter.Advance());
208   EXPECT_FALSE(iter.IsWord());
209   EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
210   EXPECT_FALSE(iter.IsWord());
211 }
212 
TEST(BreakIteratorTest,BreakLineEmpty)213 TEST(BreakIteratorTest, BreakLineEmpty) {
214   string16 empty;
215   base::BreakIterator iter(&empty, base::BreakIterator::BREAK_NEWLINE);
216   ASSERT_TRUE(iter.Init());
217   EXPECT_FALSE(iter.Advance());
218   EXPECT_FALSE(iter.IsWord());
219   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
220   EXPECT_FALSE(iter.IsWord());
221 }
222 
TEST(BreakIteratorTest,BreakLine)223 TEST(BreakIteratorTest, BreakLine) {
224   string16 nl(UTF8ToUTF16("\n"));
225   string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom"));
226   base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
227   ASSERT_TRUE(iter.Init());
228   EXPECT_TRUE(iter.Advance());
229   EXPECT_FALSE(iter.IsWord());
230   EXPECT_EQ(nl, iter.GetString());
231   EXPECT_TRUE(iter.Advance());
232   EXPECT_FALSE(iter.IsWord());
233   EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString());
234   EXPECT_TRUE(iter.Advance());
235   EXPECT_FALSE(iter.IsWord());
236   EXPECT_EQ(nl, iter.GetString());
237   EXPECT_TRUE(iter.Advance());
238   EXPECT_FALSE(iter.IsWord());
239   EXPECT_EQ(UTF8ToUTF16("pouet boom"), iter.GetString());
240   EXPECT_FALSE(iter.Advance());
241   EXPECT_FALSE(iter.IsWord());
242   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
243   EXPECT_FALSE(iter.IsWord());
244 }
245 
TEST(BreakIteratorTest,BreakLineNL)246 TEST(BreakIteratorTest, BreakLineNL) {
247   string16 nl(UTF8ToUTF16("\n"));
248   string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom\n"));
249   base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
250   ASSERT_TRUE(iter.Init());
251   EXPECT_TRUE(iter.Advance());
252   EXPECT_FALSE(iter.IsWord());
253   EXPECT_EQ(nl, iter.GetString());
254   EXPECT_TRUE(iter.Advance());
255   EXPECT_FALSE(iter.IsWord());
256   EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString());
257   EXPECT_TRUE(iter.Advance());
258   EXPECT_FALSE(iter.IsWord());
259   EXPECT_EQ(nl, iter.GetString());
260   EXPECT_TRUE(iter.Advance());
261   EXPECT_FALSE(iter.IsWord());
262   EXPECT_EQ(UTF8ToUTF16("pouet boom\n"), iter.GetString());
263   EXPECT_FALSE(iter.Advance());
264   EXPECT_FALSE(iter.IsWord());
265   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
266   EXPECT_FALSE(iter.IsWord());
267 }
268 
TEST(BreakIteratorTest,BreakLineWide16)269 TEST(BreakIteratorTest, BreakLineWide16) {
270   // Two Greek words separated by newline.
271   const string16 str(WideToUTF16(
272       L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
273       L"\x03bf\x03c2\x000a\x0399\x03c3\x03c4\x03cc\x03c2"));
274   const string16 line1(str.substr(0, 11));
275   const string16 line2(str.substr(11, 5));
276   base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
277   ASSERT_TRUE(iter.Init());
278   EXPECT_TRUE(iter.Advance());
279   EXPECT_FALSE(iter.IsWord());
280   EXPECT_EQ(line1, iter.GetString());
281   EXPECT_TRUE(iter.Advance());
282   EXPECT_FALSE(iter.IsWord());
283   EXPECT_EQ(line2, iter.GetString());
284   EXPECT_FALSE(iter.Advance());
285   EXPECT_FALSE(iter.IsWord());
286   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
287   EXPECT_FALSE(iter.IsWord());
288 }
289 
TEST(BreakIteratorTest,BreakLineWide32)290 TEST(BreakIteratorTest, BreakLineWide32) {
291   // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
292   const char* very_wide_char = "\xF0\x9D\x92\x9C";
293   const string16 str(
294       UTF8ToUTF16(StringPrintf("%s\na", very_wide_char)));
295   const string16 very_wide_line(str.substr(0, 3));
296   base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
297   ASSERT_TRUE(iter.Init());
298   EXPECT_TRUE(iter.Advance());
299   EXPECT_FALSE(iter.IsWord());
300   EXPECT_EQ(very_wide_line, iter.GetString());
301   EXPECT_TRUE(iter.Advance());
302   EXPECT_FALSE(iter.IsWord());
303   EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
304   EXPECT_FALSE(iter.Advance());
305   EXPECT_FALSE(iter.IsWord());
306   EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
307   EXPECT_FALSE(iter.IsWord());
308 }
309