1 /*
2 * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
3 * Copyright (C) 2007-2009 Torch Mobile, Inc.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
19 *
20 */
21
22 #include "config.h"
23 #include "TextBreakIterator.h"
24
25 #include "PlatformString.h"
26 #include <wtf/StdLibExtras.h>
27 #include <wtf/unicode/Unicode.h>
28
29 using namespace std;
30 using namespace WTF::Unicode;
31
32 namespace WebCore {
33
34 // Hack, not entirely correct
isCharStop(UChar c)35 static inline bool isCharStop(UChar c)
36 {
37 CharCategory charCategory = category(c);
38 return charCategory != Mark_NonSpacing && (charCategory != Other_Surrogate || (c < 0xd800 || c >= 0xdc00));
39 }
40
isLineStop(UChar c)41 static inline bool isLineStop(UChar c)
42 {
43 return category(c) != Separator_Line;
44 }
45
isSentenceStop(UChar c)46 static inline bool isSentenceStop(UChar c)
47 {
48 return isPunct(c);
49 }
50
51 class TextBreakIterator {
52 public:
reset(const UChar * str,int len)53 void reset(const UChar* str, int len)
54 {
55 string = str;
56 length = len;
57 currentPos = 0;
58 }
first()59 int first()
60 {
61 currentPos = 0;
62 return currentPos;
63 }
last()64 int last()
65 {
66 currentPos = length;
67 return currentPos;
68 }
69 virtual int next() = 0;
70 virtual int previous() = 0;
following(int position)71 int following(int position)
72 {
73 currentPos = position;
74 return next();
75 }
preceding(int position)76 int preceding(int position)
77 {
78 currentPos = position;
79 return previous();
80 }
81
82 int currentPos;
83 const UChar* string;
84 int length;
85 };
86
87 struct WordBreakIterator: TextBreakIterator {
88 virtual int next();
89 virtual int previous();
90 };
91
92 struct CharBreakIterator: TextBreakIterator {
93 virtual int next();
94 virtual int previous();
95 };
96
97 struct LineBreakIterator: TextBreakIterator {
98 virtual int next();
99 virtual int previous();
100 };
101
102 struct SentenceBreakIterator : TextBreakIterator {
103 virtual int next();
104 virtual int previous();
105 };
106
next()107 int WordBreakIterator::next()
108 {
109 if (currentPos == length) {
110 currentPos = -1;
111 return currentPos;
112 }
113 bool haveSpace = false;
114 while (currentPos < length) {
115 if (haveSpace && !isSpace(string[currentPos]))
116 break;
117 if (isSpace(string[currentPos]))
118 haveSpace = true;
119 ++currentPos;
120 }
121 return currentPos;
122 }
123
previous()124 int WordBreakIterator::previous()
125 {
126 if (!currentPos) {
127 currentPos = -1;
128 return currentPos;
129 }
130 bool haveSpace = false;
131 while (currentPos > 0) {
132 if (haveSpace && !isSpace(string[currentPos]))
133 break;
134 if (isSpace(string[currentPos]))
135 haveSpace = true;
136 --currentPos;
137 }
138 return currentPos;
139 }
140
next()141 int CharBreakIterator::next()
142 {
143 if (currentPos >= length)
144 return -1;
145 ++currentPos;
146 while (currentPos < length && !isCharStop(string[currentPos]))
147 ++currentPos;
148 return currentPos;
149 }
150
previous()151 int CharBreakIterator::previous()
152 {
153 if (currentPos <= 0)
154 return -1;
155 if (currentPos > length)
156 currentPos = length;
157 --currentPos;
158 while (currentPos > 0 && !isCharStop(string[currentPos]))
159 --currentPos;
160 return currentPos;
161 }
162
next()163 int LineBreakIterator::next()
164 {
165 if (currentPos == length) {
166 currentPos = -1;
167 return currentPos;
168 }
169 bool haveSpace = false;
170 while (currentPos < length) {
171 if (haveSpace && !isLineStop(string[currentPos]))
172 break;
173 if (isLineStop(string[currentPos]))
174 haveSpace = true;
175 ++currentPos;
176 }
177 return currentPos;
178 }
179
previous()180 int LineBreakIterator::previous()
181 {
182 if (!currentPos) {
183 currentPos = -1;
184 return currentPos;
185 }
186 bool haveSpace = false;
187 while (currentPos > 0) {
188 if (haveSpace && !isLineStop(string[currentPos]))
189 break;
190 if (isLineStop(string[currentPos]))
191 haveSpace = true;
192 --currentPos;
193 }
194 return currentPos;
195 }
196
next()197 int SentenceBreakIterator::next()
198 {
199 if (currentPos == length) {
200 currentPos = -1;
201 return currentPos;
202 }
203 bool haveSpace = false;
204 while (currentPos < length) {
205 if (haveSpace && !isSentenceStop(string[currentPos]))
206 break;
207 if (isSentenceStop(string[currentPos]))
208 haveSpace = true;
209 ++currentPos;
210 }
211 return currentPos;
212 }
213
previous()214 int SentenceBreakIterator::previous()
215 {
216 if (!currentPos) {
217 currentPos = -1;
218 return currentPos;
219 }
220 bool haveSpace = false;
221 while (currentPos > 0) {
222 if (haveSpace && !isSentenceStop(string[currentPos]))
223 break;
224 if (isSentenceStop(string[currentPos]))
225 haveSpace = true;
226 --currentPos;
227 }
228 return currentPos;
229 }
230
wordBreakIterator(const UChar * string,int length)231 TextBreakIterator* wordBreakIterator(const UChar* string, int length)
232 {
233 DEFINE_STATIC_LOCAL(WordBreakIterator, iterator, ());
234 iterator.reset(string, length);
235 return &iterator;
236 }
237
characterBreakIterator(const UChar * string,int length)238 TextBreakIterator* characterBreakIterator(const UChar* string, int length)
239 {
240 DEFINE_STATIC_LOCAL(CharBreakIterator, iterator, ());
241 iterator.reset(string, length);
242 return &iterator;
243 }
244
245 static TextBreakIterator* staticLineBreakIterator;
246
acquireLineBreakIterator(const UChar * string,int length)247 TextBreakIterator* acquireLineBreakIterator(const UChar* string, int length)
248 {
249 TextBreakIterator* lineBreakIterator = 0;
250 if (staticLineBreakIterator) {
251 staticLineBreakIterator->reset(string, length);
252 swap(staticLineBreakIterator, lineBreakIterator);
253 }
254
255 if (!lineBreakIterator && string && length) {
256 lineBreakIterator = new LineBreakIterator;
257 lineBreakIterator->reset(string, length);
258 }
259
260 return lineBreakIterator;
261 }
262
releaseLineBreakIterator(TextBreakIterator * iterator)263 void releaseLineBreakIterator(TextBreakIterator* iterator)
264 {
265 ASSERT(iterator);
266
267 if (!staticLineBreakIterator)
268 staticLineBreakIterator = iterator;
269 else
270 delete iterator;
271 }
272
sentenceBreakIterator(const UChar * string,int length)273 TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
274 {
275 DEFINE_STATIC_LOCAL(SentenceBreakIterator, iterator, ());
276 iterator.reset(string, length);
277 return &iterator;
278 }
279
textBreakFirst(TextBreakIterator * breakIterator)280 int textBreakFirst(TextBreakIterator* breakIterator)
281 {
282 return breakIterator->first();
283 }
284
textBreakLast(TextBreakIterator * breakIterator)285 int textBreakLast(TextBreakIterator* breakIterator)
286 {
287 return breakIterator->last();
288 }
289
textBreakNext(TextBreakIterator * breakIterator)290 int textBreakNext(TextBreakIterator* breakIterator)
291 {
292 return breakIterator->next();
293 }
294
textBreakPrevious(TextBreakIterator * breakIterator)295 int textBreakPrevious(TextBreakIterator* breakIterator)
296 {
297 return breakIterator->previous();
298 }
299
textBreakPreceding(TextBreakIterator * breakIterator,int position)300 int textBreakPreceding(TextBreakIterator* breakIterator, int position)
301 {
302 return breakIterator->preceding(position);
303 }
304
textBreakFollowing(TextBreakIterator * breakIterator,int position)305 int textBreakFollowing(TextBreakIterator* breakIterator, int position)
306 {
307 return breakIterator->following(position);
308 }
309
textBreakCurrent(TextBreakIterator * breakIterator)310 int textBreakCurrent(TextBreakIterator* breakIterator)
311 {
312 return breakIterator->currentPos;
313 }
314
isTextBreak(TextBreakIterator *,int)315 bool isTextBreak(TextBreakIterator*, int)
316 {
317 return true;
318 }
319
cursorMovementIterator(const UChar * string,int length)320 TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
321 {
322 return characterBreakIterator(string, length);
323 }
324
325 } // namespace WebCore
326