1 /*
2 * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
3 * Copyright (C) 2007-2009 Torch Mobile, Inc.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 * Boston, MA 02111-1307, USA.
19 *
20 */
21
22 #include "config.h"
23 #include "TextBreakIterator.h"
24
25 #include "PlatformString.h"
26 #include <wtf/StdLibExtras.h>
27 #include <wtf/unicode/Unicode.h>
28
29 using namespace std;
30 using namespace WTF::Unicode;
31
32 namespace WebCore {
33
34 // Hack, not entirely correct
isCharStop(UChar c)35 static inline bool isCharStop(UChar c)
36 {
37 CharCategory charCategory = category(c);
38 return charCategory != Mark_NonSpacing && (charCategory != Other_Surrogate || (c < 0xd800 || c >= 0xdc00));
39 }
40
isLineStop(UChar c)41 static inline bool isLineStop(UChar c)
42 {
43 return category(c) != Separator_Line;
44 }
45
isSentenceStop(UChar c)46 static inline bool isSentenceStop(UChar c)
47 {
48 return isPunct(c);
49 }
50
51 class TextBreakIterator {
52 public:
reset(const UChar * str,int len)53 void reset(const UChar* str, int len)
54 {
55 string = str;
56 length = len;
57 currentPos = 0;
58 }
59 virtual int first() = 0;
60 virtual int next() = 0;
61 virtual int previous() = 0;
following(int position)62 int following(int position)
63 {
64 currentPos = position;
65 return next();
66 }
preceding(int position)67 int preceding(int position)
68 {
69 currentPos = position;
70 return previous();
71 }
72
73 int currentPos;
74 const UChar* string;
75 int length;
76 };
77
78 struct WordBreakIterator: TextBreakIterator {
79 virtual int first();
80 virtual int next();
81 virtual int previous();
82 };
83
84 struct CharBreakIterator: TextBreakIterator {
85 virtual int first();
86 virtual int next();
87 virtual int previous();
88 };
89
90 struct LineBreakIterator: TextBreakIterator {
91 virtual int first();
92 virtual int next();
93 virtual int previous();
94 };
95
96 struct SentenceBreakIterator : TextBreakIterator {
97 virtual int first();
98 virtual int next();
99 virtual int previous();
100 };
101
first()102 int WordBreakIterator::first()
103 {
104 currentPos = 0;
105 return currentPos;
106 }
107
next()108 int WordBreakIterator::next()
109 {
110 if (currentPos == length) {
111 currentPos = -1;
112 return currentPos;
113 }
114 bool haveSpace = false;
115 while (currentPos < length) {
116 if (haveSpace && !isSpace(string[currentPos]))
117 break;
118 if (isSpace(string[currentPos]))
119 haveSpace = true;
120 ++currentPos;
121 }
122 return currentPos;
123 }
124
previous()125 int WordBreakIterator::previous()
126 {
127 if (!currentPos) {
128 currentPos = -1;
129 return currentPos;
130 }
131 bool haveSpace = false;
132 while (currentPos > 0) {
133 if (haveSpace && !isSpace(string[currentPos]))
134 break;
135 if (isSpace(string[currentPos]))
136 haveSpace = true;
137 --currentPos;
138 }
139 return currentPos;
140 }
141
first()142 int CharBreakIterator::first()
143 {
144 currentPos = 0;
145 return currentPos;
146 }
147
next()148 int CharBreakIterator::next()
149 {
150 if (currentPos >= length)
151 return -1;
152 ++currentPos;
153 while (currentPos < length && !isCharStop(string[currentPos]))
154 ++currentPos;
155 return currentPos;
156 }
157
previous()158 int CharBreakIterator::previous()
159 {
160 if (currentPos <= 0)
161 return -1;
162 if (currentPos > length)
163 currentPos = length;
164 --currentPos;
165 while (currentPos > 0 && !isCharStop(string[currentPos]))
166 --currentPos;
167 return currentPos;
168 }
169
first()170 int LineBreakIterator::first()
171 {
172 currentPos = 0;
173 return currentPos;
174 }
175
next()176 int LineBreakIterator::next()
177 {
178 if (currentPos == length) {
179 currentPos = -1;
180 return currentPos;
181 }
182 bool haveSpace = false;
183 while (currentPos < length) {
184 if (haveSpace && !isLineStop(string[currentPos]))
185 break;
186 if (isLineStop(string[currentPos]))
187 haveSpace = true;
188 ++currentPos;
189 }
190 return currentPos;
191 }
192
previous()193 int LineBreakIterator::previous()
194 {
195 if (!currentPos) {
196 currentPos = -1;
197 return currentPos;
198 }
199 bool haveSpace = false;
200 while (currentPos > 0) {
201 if (haveSpace && !isLineStop(string[currentPos]))
202 break;
203 if (isLineStop(string[currentPos]))
204 haveSpace = true;
205 --currentPos;
206 }
207 return currentPos;
208 }
209
first()210 int SentenceBreakIterator::first()
211 {
212 currentPos = 0;
213 return currentPos;
214 }
215
next()216 int SentenceBreakIterator::next()
217 {
218 if (currentPos == length) {
219 currentPos = -1;
220 return currentPos;
221 }
222 bool haveSpace = false;
223 while (currentPos < length) {
224 if (haveSpace && !isSentenceStop(string[currentPos]))
225 break;
226 if (isSentenceStop(string[currentPos]))
227 haveSpace = true;
228 ++currentPos;
229 }
230 return currentPos;
231 }
232
previous()233 int SentenceBreakIterator::previous()
234 {
235 if (!currentPos) {
236 currentPos = -1;
237 return currentPos;
238 }
239 bool haveSpace = false;
240 while (currentPos > 0) {
241 if (haveSpace && !isSentenceStop(string[currentPos]))
242 break;
243 if (isSentenceStop(string[currentPos]))
244 haveSpace = true;
245 --currentPos;
246 }
247 return currentPos;
248 }
249
wordBreakIterator(const UChar * string,int length)250 TextBreakIterator* wordBreakIterator(const UChar* string, int length)
251 {
252 DEFINE_STATIC_LOCAL(WordBreakIterator, iterator, ());
253 iterator.reset(string, length);
254 return &iterator;
255 }
256
characterBreakIterator(const UChar * string,int length)257 TextBreakIterator* characterBreakIterator(const UChar* string, int length)
258 {
259 DEFINE_STATIC_LOCAL(CharBreakIterator, iterator, ());
260 iterator.reset(string, length);
261 return &iterator;
262 }
263
264 static TextBreakIterator* staticLineBreakIterator;
265
acquireLineBreakIterator(const UChar * string,int length)266 TextBreakIterator* acquireLineBreakIterator(const UChar* string, int length)
267 {
268 TextBreakIterator* lineBreakIterator = 0;
269 if (staticLineBreakIterator) {
270 staticLineBreakIterator->reset(string, length);
271 swap(staticLineBreakIterator, lineBreakIterator);
272 }
273
274 if (!lineBreakIterator && string && length) {
275 lineBreakIterator = new LineBreakIterator;
276 lineBreakIterator->reset(string, length);
277 }
278
279 return lineBreakIterator;
280 }
281
releaseLineBreakIterator(TextBreakIterator * iterator)282 void releaseLineBreakIterator(TextBreakIterator* iterator)
283 {
284 ASSERT(iterator);
285
286 if (!staticLineBreakIterator)
287 staticLineBreakIterator = iterator;
288 else
289 delete iterator;
290 }
291
sentenceBreakIterator(const UChar * string,int length)292 TextBreakIterator* sentenceBreakIterator(const UChar* string, int length)
293 {
294 DEFINE_STATIC_LOCAL(SentenceBreakIterator, iterator, ());
295 iterator.reset(string, length);
296 return &iterator;
297 }
298
textBreakFirst(TextBreakIterator * breakIterator)299 int textBreakFirst(TextBreakIterator* breakIterator)
300 {
301 return breakIterator->first();
302 }
303
textBreakNext(TextBreakIterator * breakIterator)304 int textBreakNext(TextBreakIterator* breakIterator)
305 {
306 return breakIterator->next();
307 }
308
textBreakPreceding(TextBreakIterator * breakIterator,int position)309 int textBreakPreceding(TextBreakIterator* breakIterator, int position)
310 {
311 return breakIterator->preceding(position);
312 }
313
textBreakFollowing(TextBreakIterator * breakIterator,int position)314 int textBreakFollowing(TextBreakIterator* breakIterator, int position)
315 {
316 return breakIterator->following(position);
317 }
318
textBreakCurrent(TextBreakIterator * breakIterator)319 int textBreakCurrent(TextBreakIterator* breakIterator)
320 {
321 return breakIterator->currentPos;
322 }
323
isTextBreak(TextBreakIterator *,int)324 bool isTextBreak(TextBreakIterator*, int)
325 {
326 return true;
327 }
328
cursorMovementIterator(const UChar * string,int length)329 TextBreakIterator* cursorMovementIterator(const UChar* string, int length)
330 {
331 return characterBreakIterator(string, length);
332 }
333
334 } // namespace WebCore
335