• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #ifndef TextIterator_h
27 #define TextIterator_h
28 
29 #include "core/dom/Range.h"
30 #include "core/editing/FindOptions.h"
31 #include "wtf/Vector.h"
32 
33 namespace WebCore {
34 
35 class InlineTextBox;
36 class RenderText;
37 class RenderTextFragment;
38 
39 enum TextIteratorBehavior {
40     TextIteratorDefaultBehavior = 0,
41     TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
42     TextIteratorEntersTextControls = 1 << 1,
43     TextIteratorIgnoresStyleVisibility = 1 << 2,
44     TextIteratorEmitsOriginalText = 1 << 3,
45     TextIteratorStopsOnFormControls = 1 << 4,
46     TextIteratorEmitsImageAltText = 1 << 5,
47     TextIteratorEntersAuthorShadowRoots = 1 << 6
48 };
49 typedef unsigned TextIteratorBehaviorFlags;
50 
51 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
52 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
isCollapsibleWhitespace(UChar c)53 inline bool isCollapsibleWhitespace(UChar c)
54 {
55     switch (c) {
56         case ' ':
57         case '\n':
58             return true;
59         default:
60             return false;
61     }
62 }
63 
64 String plainText(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
65 PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions);
66 
67 class BitStack {
68 public:
69     BitStack();
70     ~BitStack();
71 
72     void push(bool);
73     void pop();
74 
75     bool top() const;
76     unsigned size() const;
77 
78 private:
79     unsigned m_size;
80     Vector<unsigned, 1> m_words;
81 };
82 
83 // Iterates through the DOM range, returning all the text, and 0-length boundaries
84 // at points where replaced elements break up the text flow.  The text comes back in
85 // chunks so as to optimize for performance of the iteration.
86 
87 class TextIterator {
88 public:
89     explicit TextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
90     ~TextIterator();
91 
atEnd()92     bool atEnd() const { return !m_positionNode || m_shouldStop; }
93     void advance();
94 
length()95     int length() const { return m_textLength; }
96     UChar characterAt(unsigned index) const;
97     String substring(unsigned position, unsigned length) const;
98     void appendTextToStringBuilder(StringBuilder&, unsigned position = 0, unsigned maxLength = UINT_MAX) const;
99 
100     template<typename BufferType>
101     void appendTextTo(BufferType& output, unsigned position = 0)
102     {
103         ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(length()));
104         unsigned lengthToAppend = length() - position;
105         if (!lengthToAppend)
106             return;
107         if (m_singleCharacterBuffer) {
108             ASSERT(!position);
109             ASSERT(length() == 1);
110             output.append(&m_singleCharacterBuffer, 1);
111         } else {
112             string().appendTo(output, startOffset() + position, lengthToAppend);
113         }
114     }
115 
116     PassRefPtr<Range> range() const;
117     Node* node() const;
118 
119     static int rangeLength(const Range*, bool spacesForReplacedElements = false);
120     static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
121 
122 private:
123     enum IterationProgress {
124         HandledNone,
125         HandledAuthorShadowRoots,
126         HandledUserAgentShadowRoot,
127         HandledNode,
128         HandledChildren
129     };
130 
startOffset()131     int startOffset() const { return m_positionStartOffset; }
string()132     const String& string() const { return m_text; }
133     void exitNode();
134     bool shouldRepresentNodeOffsetZero();
135     bool shouldEmitSpaceBeforeAndAfterNode(Node*);
136     void representNodeOffsetZero();
137     bool handleTextNode();
138     bool handleReplacedElement();
139     bool handleNonTextNode();
140     void handleTextBox();
141     void handleTextNodeFirstLetter(RenderTextFragment*);
142     bool hasVisibleTextNode(RenderText*);
143     void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
144     void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset);
145     void emitText(Node* textNode, int textStartOffset, int textEndOffset);
146 
147     // Current position, not necessarily of the text being returned, but position
148     // as we walk through the DOM tree.
149     Node* m_node;
150     int m_offset;
151     IterationProgress m_iterationProgress;
152     BitStack m_fullyClippedStack;
153     int m_shadowDepth;
154 
155     // The range.
156     Node* m_startContainer;
157     int m_startOffset;
158     Node* m_endContainer;
159     int m_endOffset;
160     Node* m_pastEndNode;
161 
162     // The current text and its position, in the form to be returned from the iterator.
163     Node* m_positionNode;
164     mutable Node* m_positionOffsetBaseNode;
165     mutable int m_positionStartOffset;
166     mutable int m_positionEndOffset;
167     int m_textLength;
168     String m_text;
169 
170     // Used when there is still some pending text from the current node; when these
171     // are false and 0, we go back to normal iterating.
172     bool m_needsAnotherNewline;
173     InlineTextBox* m_textBox;
174     // Used when iteration over :first-letter text to save pointer to
175     // remaining text box.
176     InlineTextBox* m_remainingTextBox;
177     // Used to point to RenderText object for :first-letter.
178     RenderText *m_firstLetterText;
179 
180     // Used to do the whitespace collapsing logic.
181     Node* m_lastTextNode;
182     bool m_lastTextNodeEndedWithCollapsedSpace;
183     UChar m_lastCharacter;
184 
185     // Used for whitespace characters that aren't in the DOM, so we can point at them.
186     // If non-zero, overrides m_text.
187     UChar m_singleCharacterBuffer;
188 
189     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
190     Vector<InlineTextBox*> m_sortedTextBoxes;
191     size_t m_sortedTextBoxesPosition;
192 
193     // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
194     bool m_hasEmitted;
195 
196     // Used by selection preservation code.  There should be one character emitted between every VisiblePosition
197     // in the Range used to create the TextIterator.
198     // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
199     // moveParagraphs to not clone/destroy moved content.
200     bool m_emitsCharactersBetweenAllVisiblePositions;
201     bool m_entersTextControls;
202 
203     // Used in pasting inside password field.
204     bool m_emitsOriginalText;
205     // Used when deciding text fragment created by :first-letter should be looked into.
206     bool m_handledFirstLetter;
207     // Used when the visibility of the style should not affect text gathering.
208     bool m_ignoresStyleVisibility;
209     // Used when the iteration should stop if form controls are reached.
210     bool m_stopsOnFormControls;
211     // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
212     bool m_shouldStop;
213 
214     bool m_emitsImageAltText;
215 
216     bool m_entersAuthorShadowRoots;
217 };
218 
219 // Iterates through the DOM range, returning all the text, and 0-length boundaries
220 // at points where replaced elements break up the text flow. The text comes back in
221 // chunks so as to optimize for performance of the iteration.
222 class SimplifiedBackwardsTextIterator {
223 public:
224     explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
225 
atEnd()226     bool atEnd() const { return !m_positionNode || m_shouldStop; }
227     void advance();
228 
length()229     int length() const { return m_textLength; }
230 
node()231     Node* node() const { return m_node; }
232 
233     template<typename BufferType>
prependTextTo(BufferType & output)234     void prependTextTo(BufferType& output)
235     {
236         if (!m_textLength)
237             return;
238         if (m_singleCharacterBuffer)
239             output.prepend(&m_singleCharacterBuffer, 1);
240         else
241             m_textContainer.prependTo(output, m_textOffset, m_textLength);
242     }
243 
244     PassRefPtr<Range> range() const;
245 
246 private:
247     void exitNode();
248     bool handleTextNode();
249     RenderText* handleFirstLetter(int& startOffset, int& offsetInNode);
250     bool handleReplacedElement();
251     bool handleNonTextNode();
252     void emitCharacter(UChar, Node*, int startOffset, int endOffset);
253     bool advanceRespectingRange(Node*);
254 
255     // Current position, not necessarily of the text being returned, but position
256     // as we walk through the DOM tree.
257     Node* m_node;
258     int m_offset;
259     bool m_handledNode;
260     bool m_handledChildren;
261     BitStack m_fullyClippedStack;
262 
263     // End of the range.
264     Node* m_startNode;
265     int m_startOffset;
266     // Start of the range.
267     Node* m_endNode;
268     int m_endOffset;
269 
270     // The current text and its position, in the form to be returned from the iterator.
271     Node* m_positionNode;
272     int m_positionStartOffset;
273     int m_positionEndOffset;
274 
275     String m_textContainer; // We're interested in the range [m_textOffset, m_textOffset + m_textLength) of m_textContainer.
276     int m_textOffset;
277     int m_textLength;
278 
279     // Used to do the whitespace logic.
280     Node* m_lastTextNode;
281     UChar m_lastCharacter;
282 
283     // Used for whitespace characters that aren't in the DOM, so we can point at them.
284     UChar m_singleCharacterBuffer;
285 
286     // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
287     bool m_havePassedStartNode;
288 
289     // Should handle first-letter renderer in the next call to handleTextNode.
290     bool m_shouldHandleFirstLetter;
291 
292     // Used when the iteration should stop if form controls are reached.
293     bool m_stopsOnFormControls;
294 
295     // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
296     bool m_shouldStop;
297 
298     // Used in pasting inside password field.
299     bool m_emitsOriginalText;
300 };
301 
302 // Builds on the text iterator, adding a character position so we can walk one
303 // character at a time, or faster, as needed. Useful for searching.
304 class CharacterIterator {
305 public:
306     explicit CharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
307 
308     void advance(int numCharacters);
309 
atBreak()310     bool atBreak() const { return m_atBreak; }
atEnd()311     bool atEnd() const { return m_textIterator.atEnd(); }
312 
length()313     int length() const { return m_textIterator.length() - m_runOffset; }
characterAt(unsigned index)314     UChar characterAt(unsigned index) const { return m_textIterator.characterAt(m_runOffset + index); }
315 
316     template<typename BufferType>
appendTextTo(BufferType & output)317     void appendTextTo(BufferType& output) { m_textIterator.appendTextTo(output, m_runOffset); }
318 
319     String string(int numChars);
320 
characterOffset()321     int characterOffset() const { return m_offset; }
322     PassRefPtr<Range> range() const;
323 
324 private:
325     int m_offset;
326     int m_runOffset;
327     bool m_atBreak;
328 
329     TextIterator m_textIterator;
330 };
331 
332 class BackwardsCharacterIterator {
333 public:
334     explicit BackwardsCharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior);
335 
336     void advance(int);
337 
atEnd()338     bool atEnd() const { return m_textIterator.atEnd(); }
339 
340     PassRefPtr<Range> range() const;
341 
342 private:
343     int m_offset;
344     int m_runOffset;
345     bool m_atBreak;
346 
347     SimplifiedBackwardsTextIterator m_textIterator;
348 };
349 
350 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
351 // meaning they never end split up a word.  This is useful for spellcheck or (perhaps one day) searching.
352 class WordAwareIterator {
353 public:
354     explicit WordAwareIterator(const Range*);
355     ~WordAwareIterator();
356 
atEnd()357     bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
358     void advance();
359 
360     String substring(unsigned position, unsigned length) const;
361     UChar characterAt(unsigned index) const;
362     int length() const;
363 
364     // Range of the text we're currently returning
range()365     PassRefPtr<Range> range() const { return m_range; }
366 
367 private:
368     Vector<UChar> m_buffer;
369     // Did we have to look ahead in the textIterator to confirm the current chunk?
370     bool m_didLookAhead;
371     RefPtr<Range> m_range;
372     TextIterator m_textIterator;
373 };
374 
375 }
376 
377 #endif
378