1 /*
2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #ifndef TextIterator_h
27 #define TextIterator_h
28
29 #include "FindOptions.h"
30 #include "InlineTextBox.h"
31 #include "Range.h"
32 #include <wtf/Vector.h>
33
34 namespace WebCore {
35
36 class RenderText;
37 class RenderTextFragment;
38
39 enum TextIteratorBehavior {
40 TextIteratorDefaultBehavior = 0,
41 TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
42 TextIteratorEntersTextControls = 1 << 1,
43 TextIteratorEmitsTextsWithoutTranscoding = 1 << 2,
44 TextIteratorIgnoresStyleVisibility = 1 << 3,
45 TextIteratorEmitsObjectReplacementCharacters = 1 << 4,
46 #if OS(ANDROID)
47 TextIteratorStopsOnFormControls = 1 << 6
48 #endif
49 };
50
51 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
52 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
isCollapsibleWhitespace(UChar c)53 inline bool isCollapsibleWhitespace(UChar c)
54 {
55 switch (c) {
56 case ' ':
57 case '\n':
58 return true;
59 default:
60 return false;
61 }
62 }
63
64 String plainText(const Range*, TextIteratorBehavior defaultBehavior = TextIteratorDefaultBehavior);
65 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString, TextIteratorBehavior = TextIteratorDefaultBehavior);
66 PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions);
67
68 class BitStack {
69 public:
70 BitStack();
71 ~BitStack();
72
73 void push(bool);
74 void pop();
75
76 bool top() const;
77 unsigned size() const;
78
79 private:
80 unsigned m_size;
81 Vector<unsigned, 1> m_words;
82 };
83
84 // Iterates through the DOM range, returning all the text, and 0-length boundaries
85 // at points where replaced elements break up the text flow. The text comes back in
86 // chunks so as to optimize for performance of the iteration.
87
88 class TextIterator {
89 public:
90 TextIterator();
91 ~TextIterator();
92 explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
93
94 bool atEnd() const;
95 void advance();
96
length()97 int length() const { return m_textLength; }
characters()98 const UChar* characters() const { return m_textCharacters; }
99
100 PassRefPtr<Range> range() const;
101 Node* node() const;
102
103 static int rangeLength(const Range*, bool spacesForReplacedElements = false);
104 static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
105 static bool locationAndLengthFromRange(const Range*, size_t& location, size_t& length);
106 static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
107
108 private:
109 void exitNode();
110 bool shouldRepresentNodeOffsetZero();
111 bool shouldEmitSpaceBeforeAndAfterNode(Node*);
112 void representNodeOffsetZero();
113 bool handleTextNode();
114 bool handleReplacedElement();
115 bool handleNonTextNode();
116 void handleTextBox();
117 void handleTextNodeFirstLetter(RenderTextFragment*);
118 bool hasVisibleTextNode(RenderText*);
119 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
120 void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset);
121 void emitText(Node* textNode, int textStartOffset, int textEndOffset);
122
123 // Current position, not necessarily of the text being returned, but position
124 // as we walk through the DOM tree.
125 Node* m_node;
126 int m_offset;
127 bool m_handledNode;
128 bool m_handledChildren;
129 BitStack m_fullyClippedStack;
130
131 // The range.
132 Node* m_startContainer;
133 int m_startOffset;
134 Node* m_endContainer;
135 int m_endOffset;
136 Node* m_pastEndNode;
137
138 // The current text and its position, in the form to be returned from the iterator.
139 Node* m_positionNode;
140 mutable Node* m_positionOffsetBaseNode;
141 mutable int m_positionStartOffset;
142 mutable int m_positionEndOffset;
143 const UChar* m_textCharacters;
144 int m_textLength;
145 // Hold string m_textCharacters points to so we ensure it won't be deleted.
146 String m_text;
147
148 // Used when there is still some pending text from the current node; when these
149 // are false and 0, we go back to normal iterating.
150 bool m_needsAnotherNewline;
151 InlineTextBox* m_textBox;
152 // Used when iteration over :first-letter text to save pointer to
153 // remaining text box.
154 InlineTextBox* m_remainingTextBox;
155 // Used to point to RenderText object for :first-letter.
156 RenderText *m_firstLetterText;
157
158 // Used to do the whitespace collapsing logic.
159 Node* m_lastTextNode;
160 bool m_lastTextNodeEndedWithCollapsedSpace;
161 UChar m_lastCharacter;
162
163 // Used for whitespace characters that aren't in the DOM, so we can point at them.
164 UChar m_singleCharacterBuffer;
165
166 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
167 Vector<InlineTextBox*> m_sortedTextBoxes;
168 size_t m_sortedTextBoxesPosition;
169
170 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
171 bool m_hasEmitted;
172
173 // Used by selection preservation code. There should be one character emitted between every VisiblePosition
174 // in the Range used to create the TextIterator.
175 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
176 // moveParagraphs to not clone/destroy moved content.
177 bool m_emitsCharactersBetweenAllVisiblePositions;
178 bool m_entersTextControls;
179
180 // Used when we want texts for copying, pasting, and transposing.
181 bool m_emitsTextWithoutTranscoding;
182 // Used when deciding text fragment created by :first-letter should be looked into.
183 bool m_handledFirstLetter;
184 // Used when the visibility of the style should not affect text gathering.
185 bool m_ignoresStyleVisibility;
186 // Used when emitting the special 0xFFFC character is required.
187 bool m_emitsObjectReplacementCharacters;
188 #if OS(ANDROID)
189 // Used when the iteration should stop if form controls are reached.
190 bool m_stopsOnFormControls;
191 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
192 bool m_shouldStop;
193 #endif
194 };
195
196 // Iterates through the DOM range, returning all the text, and 0-length boundaries
197 // at points where replaced elements break up the text flow. The text comes back in
198 // chunks so as to optimize for performance of the iteration.
199 class SimplifiedBackwardsTextIterator {
200 public:
201 SimplifiedBackwardsTextIterator();
202 explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
203
204 bool atEnd() const;
205 void advance();
206
length()207 int length() const { return m_textLength; }
characters()208 const UChar* characters() const { return m_textCharacters; }
209
210 PassRefPtr<Range> range() const;
211
212 private:
213 void exitNode();
214 bool handleTextNode();
215 bool handleReplacedElement();
216 bool handleNonTextNode();
217 void emitCharacter(UChar, Node*, int startOffset, int endOffset);
218 bool advanceRespectingRange(Node*);
219
220 TextIteratorBehavior m_behavior;
221 // Current position, not necessarily of the text being returned, but position
222 // as we walk through the DOM tree.
223 Node* m_node;
224 int m_offset;
225 bool m_handledNode;
226 bool m_handledChildren;
227 BitStack m_fullyClippedStack;
228
229 // End of the range.
230 Node* m_startNode;
231 int m_startOffset;
232 // Start of the range.
233 Node* m_endNode;
234 int m_endOffset;
235
236 // The current text and its position, in the form to be returned from the iterator.
237 Node* m_positionNode;
238 int m_positionStartOffset;
239 int m_positionEndOffset;
240 const UChar* m_textCharacters;
241 int m_textLength;
242
243 // Used to do the whitespace logic.
244 Node* m_lastTextNode;
245 UChar m_lastCharacter;
246
247 // Used for whitespace characters that aren't in the DOM, so we can point at them.
248 UChar m_singleCharacterBuffer;
249
250 // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
251 bool m_havePassedStartNode;
252
253 #if OS(ANDROID)
254 // Used when the iteration should stop if form controls are reached.
255 bool m_stopsOnFormControls;
256 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
257 bool m_shouldStop;
258 #endif
259 };
260
261 // Builds on the text iterator, adding a character position so we can walk one
262 // character at a time, or faster, as needed. Useful for searching.
263 class CharacterIterator {
264 public:
265 CharacterIterator();
266 explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
267
268 void advance(int numCharacters);
269
atBreak()270 bool atBreak() const { return m_atBreak; }
atEnd()271 bool atEnd() const { return m_textIterator.atEnd(); }
272
length()273 int length() const { return m_textIterator.length() - m_runOffset; }
characters()274 const UChar* characters() const { return m_textIterator.characters() + m_runOffset; }
275 String string(int numChars);
276
characterOffset()277 int characterOffset() const { return m_offset; }
278 PassRefPtr<Range> range() const;
279
280 private:
281 int m_offset;
282 int m_runOffset;
283 bool m_atBreak;
284
285 TextIterator m_textIterator;
286 };
287
288 class BackwardsCharacterIterator {
289 public:
290 BackwardsCharacterIterator();
291 explicit BackwardsCharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
292
293 void advance(int);
294
atEnd()295 bool atEnd() const { return m_textIterator.atEnd(); }
296
297 PassRefPtr<Range> range() const;
298
299 private:
300 TextIteratorBehavior m_behavior;
301 int m_offset;
302 int m_runOffset;
303 bool m_atBreak;
304
305 SimplifiedBackwardsTextIterator m_textIterator;
306 };
307
308 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
309 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching.
310 class WordAwareIterator {
311 public:
312 WordAwareIterator();
313 explicit WordAwareIterator(const Range*);
314 ~WordAwareIterator();
315
atEnd()316 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
317 void advance();
318
319 int length() const;
320 const UChar* characters() const;
321
322 // Range of the text we're currently returning
range()323 PassRefPtr<Range> range() const { return m_range; }
324
325 private:
326 // text from the previous chunk from the textIterator
327 const UChar* m_previousText;
328 int m_previousLength;
329
330 // many chunks from textIterator concatenated
331 Vector<UChar> m_buffer;
332
333 // Did we have to look ahead in the textIterator to confirm the current chunk?
334 bool m_didLookAhead;
335
336 RefPtr<Range> m_range;
337
338 TextIterator m_textIterator;
339 };
340
341 }
342
343 #endif
344