1 /*
2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #ifndef TextIterator_h
27 #define TextIterator_h
28
29 #include "FindOptions.h"
30 #include "InlineTextBox.h"
31 #include "Range.h"
32 #include <wtf/Vector.h>
33
34 namespace WebCore {
35
36 class RenderText;
37 class RenderTextFragment;
38
39 enum TextIteratorBehavior {
40 TextIteratorDefaultBehavior = 0,
41 TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
42 TextIteratorEntersTextControls = 1 << 1,
43 TextIteratorEmitsTextsWithoutTranscoding = 1 << 2,
44 TextIteratorIgnoresStyleVisibility = 1 << 3,
45 TextIteratorEmitsObjectReplacementCharacters = 1 << 4
46 };
47
48 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
49 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
isCollapsibleWhitespace(UChar c)50 inline bool isCollapsibleWhitespace(UChar c)
51 {
52 switch (c) {
53 case ' ':
54 case '\n':
55 return true;
56 default:
57 return false;
58 }
59 }
60
61 String plainText(const Range*, TextIteratorBehavior defaultBehavior = TextIteratorDefaultBehavior);
62 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString, TextIteratorBehavior = TextIteratorDefaultBehavior);
63 PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions);
64
65 class BitStack {
66 public:
67 BitStack();
68 ~BitStack();
69
70 void push(bool);
71 void pop();
72
73 bool top() const;
74 unsigned size() const;
75
76 private:
77 unsigned m_size;
78 Vector<unsigned, 1> m_words;
79 };
80
81 // Iterates through the DOM range, returning all the text, and 0-length boundaries
82 // at points where replaced elements break up the text flow. The text comes back in
83 // chunks so as to optimize for performance of the iteration.
84
85 class TextIterator {
86 public:
87 TextIterator();
88 ~TextIterator();
89 explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
90
atEnd()91 bool atEnd() const { return !m_positionNode; }
92 void advance();
93
length()94 int length() const { return m_textLength; }
characters()95 const UChar* characters() const { return m_textCharacters; }
96
97 PassRefPtr<Range> range() const;
98 Node* node() const;
99
100 static int rangeLength(const Range*, bool spacesForReplacedElements = false);
101 static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
102 static bool locationAndLengthFromRange(const Range*, size_t& location, size_t& length);
103 static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
104
105 private:
106 void exitNode();
107 bool shouldRepresentNodeOffsetZero();
108 bool shouldEmitSpaceBeforeAndAfterNode(Node*);
109 void representNodeOffsetZero();
110 bool handleTextNode();
111 bool handleReplacedElement();
112 bool handleNonTextNode();
113 void handleTextBox();
114 void handleTextNodeFirstLetter(RenderTextFragment*);
115 bool hasVisibleTextNode(RenderText*);
116 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
117 void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset);
118 void emitText(Node* textNode, int textStartOffset, int textEndOffset);
119
120 // Current position, not necessarily of the text being returned, but position
121 // as we walk through the DOM tree.
122 Node* m_node;
123 int m_offset;
124 bool m_handledNode;
125 bool m_handledChildren;
126 BitStack m_fullyClippedStack;
127
128 // The range.
129 Node* m_startContainer;
130 int m_startOffset;
131 Node* m_endContainer;
132 int m_endOffset;
133 Node* m_pastEndNode;
134
135 // The current text and its position, in the form to be returned from the iterator.
136 Node* m_positionNode;
137 mutable Node* m_positionOffsetBaseNode;
138 mutable int m_positionStartOffset;
139 mutable int m_positionEndOffset;
140 const UChar* m_textCharacters;
141 int m_textLength;
142 // Hold string m_textCharacters points to so we ensure it won't be deleted.
143 String m_text;
144
145 // Used when there is still some pending text from the current node; when these
146 // are false and 0, we go back to normal iterating.
147 bool m_needsAnotherNewline;
148 InlineTextBox* m_textBox;
149 // Used when iteration over :first-letter text to save pointer to
150 // remaining text box.
151 InlineTextBox* m_remainingTextBox;
152 // Used to point to RenderText object for :first-letter.
153 RenderText *m_firstLetterText;
154
155 // Used to do the whitespace collapsing logic.
156 Node* m_lastTextNode;
157 bool m_lastTextNodeEndedWithCollapsedSpace;
158 UChar m_lastCharacter;
159
160 // Used for whitespace characters that aren't in the DOM, so we can point at them.
161 UChar m_singleCharacterBuffer;
162
163 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
164 Vector<InlineTextBox*> m_sortedTextBoxes;
165 size_t m_sortedTextBoxesPosition;
166
167 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
168 bool m_hasEmitted;
169
170 // Used by selection preservation code. There should be one character emitted between every VisiblePosition
171 // in the Range used to create the TextIterator.
172 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
173 // moveParagraphs to not clone/destroy moved content.
174 bool m_emitsCharactersBetweenAllVisiblePositions;
175 bool m_entersTextControls;
176
177 // Used when we want texts for copying, pasting, and transposing.
178 bool m_emitsTextWithoutTranscoding;
179 // Used when deciding text fragment created by :first-letter should be looked into.
180 bool m_handledFirstLetter;
181 // Used when the visibility of the style should not affect text gathering.
182 bool m_ignoresStyleVisibility;
183 // Used when emitting the special 0xFFFC character is required.
184 bool m_emitsObjectReplacementCharacters;
185 };
186
187 // Iterates through the DOM range, returning all the text, and 0-length boundaries
188 // at points where replaced elements break up the text flow. The text comes back in
189 // chunks so as to optimize for performance of the iteration.
190 class SimplifiedBackwardsTextIterator {
191 public:
192 SimplifiedBackwardsTextIterator();
193 explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
194
atEnd()195 bool atEnd() const { return !m_positionNode; }
196 void advance();
197
length()198 int length() const { return m_textLength; }
characters()199 const UChar* characters() const { return m_textCharacters; }
200
201 PassRefPtr<Range> range() const;
202
203 private:
204 void exitNode();
205 bool handleTextNode();
206 bool handleReplacedElement();
207 bool handleNonTextNode();
208 void emitCharacter(UChar, Node*, int startOffset, int endOffset);
209 bool advanceRespectingRange(Node*);
210
211 TextIteratorBehavior m_behavior;
212 // Current position, not necessarily of the text being returned, but position
213 // as we walk through the DOM tree.
214 Node* m_node;
215 int m_offset;
216 bool m_handledNode;
217 bool m_handledChildren;
218 BitStack m_fullyClippedStack;
219
220 // End of the range.
221 Node* m_startNode;
222 int m_startOffset;
223 // Start of the range.
224 Node* m_endNode;
225 int m_endOffset;
226
227 // The current text and its position, in the form to be returned from the iterator.
228 Node* m_positionNode;
229 int m_positionStartOffset;
230 int m_positionEndOffset;
231 const UChar* m_textCharacters;
232 int m_textLength;
233
234 // Used to do the whitespace logic.
235 Node* m_lastTextNode;
236 UChar m_lastCharacter;
237
238 // Used for whitespace characters that aren't in the DOM, so we can point at them.
239 UChar m_singleCharacterBuffer;
240
241 // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
242 bool m_havePassedStartNode;
243 };
244
245 // Builds on the text iterator, adding a character position so we can walk one
246 // character at a time, or faster, as needed. Useful for searching.
247 class CharacterIterator {
248 public:
249 CharacterIterator();
250 explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
251
252 void advance(int numCharacters);
253
atBreak()254 bool atBreak() const { return m_atBreak; }
atEnd()255 bool atEnd() const { return m_textIterator.atEnd(); }
256
length()257 int length() const { return m_textIterator.length() - m_runOffset; }
characters()258 const UChar* characters() const { return m_textIterator.characters() + m_runOffset; }
259 String string(int numChars);
260
characterOffset()261 int characterOffset() const { return m_offset; }
262 PassRefPtr<Range> range() const;
263
264 private:
265 int m_offset;
266 int m_runOffset;
267 bool m_atBreak;
268
269 TextIterator m_textIterator;
270 };
271
272 class BackwardsCharacterIterator {
273 public:
274 BackwardsCharacterIterator();
275 explicit BackwardsCharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
276
277 void advance(int);
278
atEnd()279 bool atEnd() const { return m_textIterator.atEnd(); }
280
281 PassRefPtr<Range> range() const;
282
283 private:
284 TextIteratorBehavior m_behavior;
285 int m_offset;
286 int m_runOffset;
287 bool m_atBreak;
288
289 SimplifiedBackwardsTextIterator m_textIterator;
290 };
291
292 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
293 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching.
294 class WordAwareIterator {
295 public:
296 WordAwareIterator();
297 explicit WordAwareIterator(const Range*);
298 ~WordAwareIterator();
299
atEnd()300 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
301 void advance();
302
303 int length() const;
304 const UChar* characters() const;
305
306 // Range of the text we're currently returning
range()307 PassRefPtr<Range> range() const { return m_range; }
308
309 private:
310 // text from the previous chunk from the textIterator
311 const UChar* m_previousText;
312 int m_previousLength;
313
314 // many chunks from textIterator concatenated
315 Vector<UChar> m_buffer;
316
317 // Did we have to look ahead in the textIterator to confirm the current chunk?
318 bool m_didLookAhead;
319
320 RefPtr<Range> m_range;
321
322 TextIterator m_textIterator;
323 };
324
325 }
326
327 #endif
328