1 /*
2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #ifndef TextIterator_h
27 #define TextIterator_h
28
29 #include "core/dom/Range.h"
30 #include "core/editing/FindOptions.h"
31 #include "wtf/Vector.h"
32
33 namespace WebCore {
34
35 class InlineTextBox;
36 class RenderText;
37 class RenderTextFragment;
38
39 enum TextIteratorBehavior {
40 TextIteratorDefaultBehavior = 0,
41 TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
42 TextIteratorEntersTextControls = 1 << 1,
43 TextIteratorIgnoresStyleVisibility = 1 << 2,
44 TextIteratorEmitsOriginalText = 1 << 3,
45 TextIteratorStopsOnFormControls = 1 << 4,
46 TextIteratorEmitsImageAltText = 1 << 5,
47 TextIteratorEntersAuthorShadowRoots = 1 << 6
48 };
49 typedef unsigned TextIteratorBehaviorFlags;
50
51 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
52 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
isCollapsibleWhitespace(UChar c)53 inline bool isCollapsibleWhitespace(UChar c)
54 {
55 switch (c) {
56 case ' ':
57 case '\n':
58 return true;
59 default:
60 return false;
61 }
62 }
63
64 String plainText(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
65 PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions);
66
67 class BitStack {
68 public:
69 BitStack();
70 ~BitStack();
71
72 void push(bool);
73 void pop();
74
75 bool top() const;
76 unsigned size() const;
77
78 private:
79 unsigned m_size;
80 Vector<unsigned, 1> m_words;
81 };
82
83 // Iterates through the DOM range, returning all the text, and 0-length boundaries
84 // at points where replaced elements break up the text flow. The text comes back in
85 // chunks so as to optimize for performance of the iteration.
86
87 class TextIterator {
88 public:
89 explicit TextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
90 ~TextIterator();
91
atEnd()92 bool atEnd() const { return !m_positionNode || m_shouldStop; }
93 void advance();
94
length()95 int length() const { return m_textLength; }
96 UChar characterAt(unsigned index) const;
97 String substring(unsigned position, unsigned length) const;
98 void appendTextToStringBuilder(StringBuilder&, unsigned position = 0, unsigned maxLength = UINT_MAX) const;
99
100 template<typename BufferType>
101 void appendTextTo(BufferType& output, unsigned position = 0)
102 {
103 ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(length()));
104 unsigned lengthToAppend = length() - position;
105 if (!lengthToAppend)
106 return;
107 if (m_singleCharacterBuffer) {
108 ASSERT(!position);
109 ASSERT(length() == 1);
110 output.append(&m_singleCharacterBuffer, 1);
111 } else {
112 string().appendTo(output, startOffset() + position, lengthToAppend);
113 }
114 }
115
116 PassRefPtr<Range> range() const;
117 Node* node() const;
118
119 static int rangeLength(const Range*, bool spacesForReplacedElements = false);
120 static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
121
122 private:
123 enum IterationProgress {
124 HandledNone,
125 HandledAuthorShadowRoots,
126 HandledUserAgentShadowRoot,
127 HandledNode,
128 HandledChildren
129 };
130
startOffset()131 int startOffset() const { return m_positionStartOffset; }
string()132 const String& string() const { return m_text; }
133 void exitNode();
134 bool shouldRepresentNodeOffsetZero();
135 bool shouldEmitSpaceBeforeAndAfterNode(Node*);
136 void representNodeOffsetZero();
137 bool handleTextNode();
138 bool handleReplacedElement();
139 bool handleNonTextNode();
140 void handleTextBox();
141 void handleTextNodeFirstLetter(RenderTextFragment*);
142 bool hasVisibleTextNode(RenderText*);
143 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
144 void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset);
145 void emitText(Node* textNode, int textStartOffset, int textEndOffset);
146
147 // Current position, not necessarily of the text being returned, but position
148 // as we walk through the DOM tree.
149 Node* m_node;
150 int m_offset;
151 IterationProgress m_iterationProgress;
152 BitStack m_fullyClippedStack;
153 int m_shadowDepth;
154
155 // The range.
156 Node* m_startContainer;
157 int m_startOffset;
158 Node* m_endContainer;
159 int m_endOffset;
160 Node* m_pastEndNode;
161
162 // The current text and its position, in the form to be returned from the iterator.
163 Node* m_positionNode;
164 mutable Node* m_positionOffsetBaseNode;
165 mutable int m_positionStartOffset;
166 mutable int m_positionEndOffset;
167 int m_textLength;
168 String m_text;
169
170 // Used when there is still some pending text from the current node; when these
171 // are false and 0, we go back to normal iterating.
172 bool m_needsAnotherNewline;
173 InlineTextBox* m_textBox;
174 // Used when iteration over :first-letter text to save pointer to
175 // remaining text box.
176 InlineTextBox* m_remainingTextBox;
177 // Used to point to RenderText object for :first-letter.
178 RenderText *m_firstLetterText;
179
180 // Used to do the whitespace collapsing logic.
181 Node* m_lastTextNode;
182 bool m_lastTextNodeEndedWithCollapsedSpace;
183 UChar m_lastCharacter;
184
185 // Used for whitespace characters that aren't in the DOM, so we can point at them.
186 // If non-zero, overrides m_text.
187 UChar m_singleCharacterBuffer;
188
189 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
190 Vector<InlineTextBox*> m_sortedTextBoxes;
191 size_t m_sortedTextBoxesPosition;
192
193 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
194 bool m_hasEmitted;
195
196 // Used by selection preservation code. There should be one character emitted between every VisiblePosition
197 // in the Range used to create the TextIterator.
198 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
199 // moveParagraphs to not clone/destroy moved content.
200 bool m_emitsCharactersBetweenAllVisiblePositions;
201 bool m_entersTextControls;
202
203 // Used in pasting inside password field.
204 bool m_emitsOriginalText;
205 // Used when deciding text fragment created by :first-letter should be looked into.
206 bool m_handledFirstLetter;
207 // Used when the visibility of the style should not affect text gathering.
208 bool m_ignoresStyleVisibility;
209 // Used when the iteration should stop if form controls are reached.
210 bool m_stopsOnFormControls;
211 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
212 bool m_shouldStop;
213
214 bool m_emitsImageAltText;
215
216 bool m_entersAuthorShadowRoots;
217 };
218
219 // Iterates through the DOM range, returning all the text, and 0-length boundaries
220 // at points where replaced elements break up the text flow. The text comes back in
221 // chunks so as to optimize for performance of the iteration.
222 class SimplifiedBackwardsTextIterator {
223 public:
224 explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
225
atEnd()226 bool atEnd() const { return !m_positionNode || m_shouldStop; }
227 void advance();
228
length()229 int length() const { return m_textLength; }
230
node()231 Node* node() const { return m_node; }
232
233 template<typename BufferType>
prependTextTo(BufferType & output)234 void prependTextTo(BufferType& output)
235 {
236 if (!m_textLength)
237 return;
238 if (m_singleCharacterBuffer)
239 output.prepend(&m_singleCharacterBuffer, 1);
240 else
241 m_textContainer.prependTo(output, m_textOffset, m_textLength);
242 }
243
244 PassRefPtr<Range> range() const;
245
246 private:
247 void exitNode();
248 bool handleTextNode();
249 RenderText* handleFirstLetter(int& startOffset, int& offsetInNode);
250 bool handleReplacedElement();
251 bool handleNonTextNode();
252 void emitCharacter(UChar, Node*, int startOffset, int endOffset);
253 bool advanceRespectingRange(Node*);
254
255 // Current position, not necessarily of the text being returned, but position
256 // as we walk through the DOM tree.
257 Node* m_node;
258 int m_offset;
259 bool m_handledNode;
260 bool m_handledChildren;
261 BitStack m_fullyClippedStack;
262
263 // End of the range.
264 Node* m_startNode;
265 int m_startOffset;
266 // Start of the range.
267 Node* m_endNode;
268 int m_endOffset;
269
270 // The current text and its position, in the form to be returned from the iterator.
271 Node* m_positionNode;
272 int m_positionStartOffset;
273 int m_positionEndOffset;
274
275 String m_textContainer; // We're interested in the range [m_textOffset, m_textOffset + m_textLength) of m_textContainer.
276 int m_textOffset;
277 int m_textLength;
278
279 // Used to do the whitespace logic.
280 Node* m_lastTextNode;
281 UChar m_lastCharacter;
282
283 // Used for whitespace characters that aren't in the DOM, so we can point at them.
284 UChar m_singleCharacterBuffer;
285
286 // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
287 bool m_havePassedStartNode;
288
289 // Should handle first-letter renderer in the next call to handleTextNode.
290 bool m_shouldHandleFirstLetter;
291
292 // Used when the iteration should stop if form controls are reached.
293 bool m_stopsOnFormControls;
294
295 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
296 bool m_shouldStop;
297
298 // Used in pasting inside password field.
299 bool m_emitsOriginalText;
300 };
301
302 // Builds on the text iterator, adding a character position so we can walk one
303 // character at a time, or faster, as needed. Useful for searching.
304 class CharacterIterator {
305 public:
306 explicit CharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
307
308 void advance(int numCharacters);
309
atBreak()310 bool atBreak() const { return m_atBreak; }
atEnd()311 bool atEnd() const { return m_textIterator.atEnd(); }
312
length()313 int length() const { return m_textIterator.length() - m_runOffset; }
characterAt(unsigned index)314 UChar characterAt(unsigned index) const { return m_textIterator.characterAt(m_runOffset + index); }
315
316 template<typename BufferType>
appendTextTo(BufferType & output)317 void appendTextTo(BufferType& output) { m_textIterator.appendTextTo(output, m_runOffset); }
318
319 String string(int numChars);
320
characterOffset()321 int characterOffset() const { return m_offset; }
322 PassRefPtr<Range> range() const;
323
324 private:
325 int m_offset;
326 int m_runOffset;
327 bool m_atBreak;
328
329 TextIterator m_textIterator;
330 };
331
332 class BackwardsCharacterIterator {
333 public:
334 explicit BackwardsCharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
335
336 void advance(int);
337
atEnd()338 bool atEnd() const { return m_textIterator.atEnd(); }
339
340 PassRefPtr<Range> range() const;
341
342 private:
343 int m_offset;
344 int m_runOffset;
345 bool m_atBreak;
346
347 SimplifiedBackwardsTextIterator m_textIterator;
348 };
349
350 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
351 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching.
352 class WordAwareIterator {
353 public:
354 explicit WordAwareIterator(const Range*);
355 ~WordAwareIterator();
356
atEnd()357 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
358 void advance();
359
360 String substring(unsigned position, unsigned length) const;
361 UChar characterAt(unsigned index) const;
362 int length() const;
363
364 // Range of the text we're currently returning
range()365 PassRefPtr<Range> range() const { return m_range; }
366
367 private:
368 Vector<UChar> m_buffer;
369 // Did we have to look ahead in the textIterator to confirm the current chunk?
370 bool m_didLookAhead;
371 RefPtr<Range> m_range;
372 TextIterator m_textIterator;
373 };
374
375 }
376
377 #endif
378