• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #ifndef TextIterator_h
27 #define TextIterator_h
28 
29 #include "core/dom/Range.h"
30 #include "core/editing/FindOptions.h"
31 #include "platform/heap/Handle.h"
32 #include "wtf/Vector.h"
33 
34 namespace blink {
35 
36 class InlineTextBox;
37 class RenderText;
38 class RenderTextFragment;
39 
40 enum TextIteratorBehavior {
41     TextIteratorDefaultBehavior = 0,
42     TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
43     TextIteratorEntersTextControls = 1 << 1,
44     TextIteratorIgnoresStyleVisibility = 1 << 2,
45     TextIteratorEmitsOriginalText = 1 << 3,
46     TextIteratorStopsOnFormControls = 1 << 4,
47     TextIteratorEmitsImageAltText = 1 << 5,
48     TextIteratorEntersAuthorShadowRoots = 1 << 6,
49     TextIteratorEmitsObjectReplacementCharacter = 1 << 7
50 };
51 typedef unsigned TextIteratorBehaviorFlags;
52 
53 String plainText(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
54 String plainText(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
55 PassRefPtrWillBeRawPtr<Range> findPlainText(const Range*, const String&, FindOptions);
56 void findPlainText(const Position& inputStart, const Position& inputEnd, const String&, FindOptions, Position& resultStart, Position& resultEnd);
57 
58 class BitStack {
59 public:
60     BitStack();
61     ~BitStack();
62 
63     void push(bool);
64     void pop();
65 
66     bool top() const;
67     unsigned size() const;
68 
69 private:
70     unsigned m_size;
71     Vector<unsigned, 1> m_words;
72 };
73 
74 // Iterates through the DOM range, returning all the text, and 0-length boundaries
75 // at points where replaced elements break up the text flow.  The text comes back in
76 // chunks so as to optimize for performance of the iteration.
77 
78 class TextIterator {
79     STACK_ALLOCATED();
80 public:
81     explicit TextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
82     // [start, end] indicates the document range that the iteration should take place within (both ends inclusive).
83     TextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
84     ~TextIterator();
85 
atEnd()86     bool atEnd() const { return !m_positionNode || m_shouldStop; }
87     void advance();
88     bool isInsideReplacedElement() const;
89 
length()90     int length() const { return m_textLength; }
91     UChar characterAt(unsigned index) const;
92     String substring(unsigned position, unsigned length) const;
93     void appendTextToStringBuilder(StringBuilder&, unsigned position = 0, unsigned maxLength = UINT_MAX) const;
94 
95     template<typename BufferType>
96     void appendTextTo(BufferType& output, unsigned position = 0)
97     {
98         ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(length()));
99         unsigned lengthToAppend = length() - position;
100         if (!lengthToAppend)
101             return;
102         if (m_singleCharacterBuffer) {
103             ASSERT(!position);
104             ASSERT(length() == 1);
105             output.append(&m_singleCharacterBuffer, 1);
106         } else {
107             string().appendTo(output, startOffset() + position, lengthToAppend);
108         }
109     }
110 
111     PassRefPtrWillBeRawPtr<Range> createRange() const;
112     Node* node() const;
113 
114     Document* ownerDocument() const;
115     Node* startContainer() const;
116     Node* endContainer() const;
117     int startOffset() const;
118     int endOffset() const;
119     Position startPosition() const;
120     Position endPosition() const;
121 
122     // Computes the length of the given range using a text iterator. The default
123     // iteration behavior is to always emit object replacement characters for
124     // replaced elements. When |forSelectionPreservation| is set to true, it
125     // also emits spaces for other non-text nodes using the
126     // |TextIteratorEmitsCharactersBetweenAllVisiblePosition| mode.
127     static int rangeLength(const Range*, bool forSelectionPreservation = false);
128     static int rangeLength(const Position& start, const Position& end, bool forSelectionPreservation = false);
129     static PassRefPtrWillBeRawPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
130     static void subrange(Position& start, Position& end, int characterOffset, int characterCount);
131 
132 private:
133     enum IterationProgress {
134         HandledNone,
135         HandledAuthorShadowRoots,
136         HandledUserAgentShadowRoot,
137         HandledNode,
138         HandledChildren
139     };
140 
141     void initialize(const Position& start, const Position& end);
142 
143     void flushPositionOffsets() const;
positionStartOffset()144     int positionStartOffset() const { return m_positionStartOffset; }
string()145     const String& string() const { return m_text; }
146     void exitNode();
147     bool shouldRepresentNodeOffsetZero();
148     bool shouldEmitSpaceBeforeAndAfterNode(Node*);
149     void representNodeOffsetZero();
150     bool handleTextNode();
151     bool handleReplacedElement();
152     bool handleNonTextNode();
153     void handleTextBox();
154     void handleTextNodeFirstLetter(RenderTextFragment*);
155     bool hasVisibleTextNode(RenderText*);
156     void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
157     void emitText(Node* textNode, RenderText* renderer, int textStartOffset, int textEndOffset);
158 
159     // Current position, not necessarily of the text being returned, but position
160     // as we walk through the DOM tree.
161     RawPtrWillBeMember<Node> m_node;
162     int m_offset;
163     IterationProgress m_iterationProgress;
164     BitStack m_fullyClippedStack;
165     int m_shadowDepth;
166 
167     // The range.
168     RawPtrWillBeMember<Node> m_startContainer;
169     int m_startOffset;
170     RawPtrWillBeMember<Node> m_endContainer;
171     int m_endOffset;
172     RawPtrWillBeMember<Node> m_pastEndNode;
173 
174     // The current text and its position, in the form to be returned from the iterator.
175     RawPtrWillBeMember<Node> m_positionNode;
176     mutable RawPtrWillBeMember<Node> m_positionOffsetBaseNode;
177     mutable int m_positionStartOffset;
178     mutable int m_positionEndOffset;
179     int m_textLength;
180     String m_text;
181 
182     // Used when there is still some pending text from the current node; when these
183     // are false and 0, we go back to normal iterating.
184     bool m_needsAnotherNewline;
185     InlineTextBox* m_textBox;
186     // Used when iteration over :first-letter text to save pointer to
187     // remaining text box.
188     InlineTextBox* m_remainingTextBox;
189     // Used to point to RenderText object for :first-letter.
190     RawPtrWillBeMember<RenderText> m_firstLetterText;
191 
192     // Used to do the whitespace collapsing logic.
193     RawPtrWillBeMember<Text> m_lastTextNode;
194     bool m_lastTextNodeEndedWithCollapsedSpace;
195     UChar m_lastCharacter;
196 
197     // Used for whitespace characters that aren't in the DOM, so we can point at them.
198     // If non-zero, overrides m_text.
199     UChar m_singleCharacterBuffer;
200 
201     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
202     Vector<InlineTextBox*> m_sortedTextBoxes;
203     size_t m_sortedTextBoxesPosition;
204 
205     // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
206     bool m_hasEmitted;
207 
208     // Used by selection preservation code.  There should be one character emitted between every VisiblePosition
209     // in the Range used to create the TextIterator.
210     // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
211     // moveParagraphs to not clone/destroy moved content.
212     bool m_emitsCharactersBetweenAllVisiblePositions;
213     bool m_entersTextControls;
214 
215     // Used in pasting inside password field.
216     bool m_emitsOriginalText;
217     // Used when deciding text fragment created by :first-letter should be looked into.
218     bool m_handledFirstLetter;
219     // Used when the visibility of the style should not affect text gathering.
220     bool m_ignoresStyleVisibility;
221     // Used when the iteration should stop if form controls are reached.
222     bool m_stopsOnFormControls;
223     // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
224     bool m_shouldStop;
225 
226     bool m_emitsImageAltText;
227 
228     bool m_entersAuthorShadowRoots;
229 
230     bool m_emitsObjectReplacementCharacter;
231 };
232 
233 // Iterates through the DOM range, returning all the text, and 0-length boundaries
234 // at points where replaced elements break up the text flow. The text comes back in
235 // chunks so as to optimize for performance of the iteration.
236 class SimplifiedBackwardsTextIterator {
237     STACK_ALLOCATED();
238 public:
239     explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
240     SimplifiedBackwardsTextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
241 
atEnd()242     bool atEnd() const { return !m_positionNode || m_shouldStop; }
243     void advance();
244 
length()245     int length() const { return m_textLength; }
246 
node()247     Node* node() const { return m_node; }
248 
249     template<typename BufferType>
prependTextTo(BufferType & output)250     void prependTextTo(BufferType& output)
251     {
252         if (!m_textLength)
253             return;
254         if (m_singleCharacterBuffer)
255             output.prepend(&m_singleCharacterBuffer, 1);
256         else
257             m_textContainer.prependTo(output, m_textOffset, m_textLength);
258     }
259 
260     Node* startContainer() const;
261     int endOffset() const;
262     Position startPosition() const;
263     Position endPosition() const;
264 
265 private:
266     void init(Node* startNode, Node* endNode, int startOffset, int endOffset);
267     void exitNode();
268     bool handleTextNode();
269     RenderText* handleFirstLetter(int& startOffset, int& offsetInNode);
270     bool handleReplacedElement();
271     bool handleNonTextNode();
272     void emitCharacter(UChar, Node*, int startOffset, int endOffset);
273     bool advanceRespectingRange(Node*);
274 
275     // Current position, not necessarily of the text being returned, but position
276     // as we walk through the DOM tree.
277     RawPtrWillBeMember<Node> m_node;
278     int m_offset;
279     bool m_handledNode;
280     bool m_handledChildren;
281     BitStack m_fullyClippedStack;
282 
283     // End of the range.
284     RawPtrWillBeMember<Node> m_startNode;
285     int m_startOffset;
286     // Start of the range.
287     RawPtrWillBeMember<Node> m_endNode;
288     int m_endOffset;
289 
290     // The current text and its position, in the form to be returned from the iterator.
291     RawPtrWillBeMember<Node> m_positionNode;
292     int m_positionStartOffset;
293     int m_positionEndOffset;
294 
295     String m_textContainer; // We're interested in the range [m_textOffset, m_textOffset + m_textLength) of m_textContainer.
296     int m_textOffset;
297     int m_textLength;
298 
299     // Used to do the whitespace logic.
300     RawPtrWillBeMember<Text> m_lastTextNode;
301     UChar m_lastCharacter;
302 
303     // Used for whitespace characters that aren't in the DOM, so we can point at them.
304     UChar m_singleCharacterBuffer;
305 
306     // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
307     bool m_havePassedStartNode;
308 
309     // Should handle first-letter renderer in the next call to handleTextNode.
310     bool m_shouldHandleFirstLetter;
311 
312     // Used when the iteration should stop if form controls are reached.
313     bool m_stopsOnFormControls;
314 
315     // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
316     bool m_shouldStop;
317 
318     // Used in pasting inside password field.
319     bool m_emitsOriginalText;
320 };
321 
322 // Builds on the text iterator, adding a character position so we can walk one
323 // character at a time, or faster, as needed. Useful for searching.
324 class CharacterIterator {
325     STACK_ALLOCATED();
326 public:
327     explicit CharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
328     CharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
329 
330     void advance(int numCharacters);
331 
atBreak()332     bool atBreak() const { return m_atBreak; }
atEnd()333     bool atEnd() const { return m_textIterator.atEnd(); }
334 
length()335     int length() const { return m_textIterator.length() - m_runOffset; }
characterAt(unsigned index)336     UChar characterAt(unsigned index) const { return m_textIterator.characterAt(m_runOffset + index); }
337 
338     template<typename BufferType>
appendTextTo(BufferType & output)339     void appendTextTo(BufferType& output) { m_textIterator.appendTextTo(output, m_runOffset); }
340 
characterOffset()341     int characterOffset() const { return m_offset; }
342     PassRefPtrWillBeRawPtr<Range> createRange() const;
343 
344     Document* ownerDocument() const;
345     Node* startContainer() const;
346     Node* endContainer() const;
347     int startOffset() const;
348     int endOffset() const;
349     Position startPosition() const;
350     Position endPosition() const;
351 
352 private:
353     void initialize();
354 
355     int m_offset;
356     int m_runOffset;
357     bool m_atBreak;
358 
359     TextIterator m_textIterator;
360 };
361 
362 class BackwardsCharacterIterator {
363     STACK_ALLOCATED();
364 public:
365     explicit BackwardsCharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
366     BackwardsCharacterIterator(const Position&, const Position&, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
367 
368     void advance(int);
369 
atEnd()370     bool atEnd() const { return m_textIterator.atEnd(); }
371 
372     Position endPosition() const;
373 
374 private:
375     int m_offset;
376     int m_runOffset;
377     bool m_atBreak;
378 
379     SimplifiedBackwardsTextIterator m_textIterator;
380 };
381 
382 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
383 // meaning they never end split up a word.  This is useful for spellcheck or (perhaps one day) searching.
384 class WordAwareIterator {
385     STACK_ALLOCATED();
386 public:
387     explicit WordAwareIterator(const Position& start, const Position& end);
388     ~WordAwareIterator();
389 
atEnd()390     bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
391     void advance();
392 
393     String substring(unsigned position, unsigned length) const;
394     UChar characterAt(unsigned index) const;
395     int length() const;
396 
397 private:
398     Vector<UChar> m_buffer;
399     // Did we have to look ahead in the textIterator to confirm the current chunk?
400     bool m_didLookAhead;
401     TextIterator m_textIterator;
402 };
403 
404 }
405 
406 #endif
407