• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #ifndef TextIterator_h
27 #define TextIterator_h
28 
29 #include "FindOptions.h"
30 #include "InlineTextBox.h"
31 #include "Range.h"
32 #include <wtf/Vector.h>
33 
34 namespace WebCore {
35 
36 class RenderText;
37 class RenderTextFragment;
38 
39 enum TextIteratorBehavior {
40     TextIteratorDefaultBehavior = 0,
41     TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
42     TextIteratorEntersTextControls = 1 << 1,
43     TextIteratorEmitsTextsWithoutTranscoding = 1 << 2,
44     TextIteratorIgnoresStyleVisibility = 1 << 3,
45     TextIteratorEmitsObjectReplacementCharacters = 1 << 4,
46 #if OS(ANDROID)
47     TextIteratorStopsOnFormControls = 1 << 6
48 #endif
49 };
50 
51 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
52 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
isCollapsibleWhitespace(UChar c)53 inline bool isCollapsibleWhitespace(UChar c)
54 {
55     switch (c) {
56         case ' ':
57         case '\n':
58             return true;
59         default:
60             return false;
61     }
62 }
63 
64 String plainText(const Range*, TextIteratorBehavior defaultBehavior = TextIteratorDefaultBehavior);
65 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString, TextIteratorBehavior = TextIteratorDefaultBehavior);
66 PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions);
67 
68 class BitStack {
69 public:
70     BitStack();
71     ~BitStack();
72 
73     void push(bool);
74     void pop();
75 
76     bool top() const;
77     unsigned size() const;
78 
79 private:
80     unsigned m_size;
81     Vector<unsigned, 1> m_words;
82 };
83 
84 // Iterates through the DOM range, returning all the text, and 0-length boundaries
85 // at points where replaced elements break up the text flow.  The text comes back in
86 // chunks so as to optimize for performance of the iteration.
87 
88 class TextIterator {
89 public:
90     TextIterator();
91     ~TextIterator();
92     explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
93 
94     bool atEnd() const;
95     void advance();
96 
length()97     int length() const { return m_textLength; }
characters()98     const UChar* characters() const { return m_textCharacters; }
99 
100     PassRefPtr<Range> range() const;
101     Node* node() const;
102 
103     static int rangeLength(const Range*, bool spacesForReplacedElements = false);
104     static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
105     static bool locationAndLengthFromRange(const Range*, size_t& location, size_t& length);
106     static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
107 
108 private:
109     void exitNode();
110     bool shouldRepresentNodeOffsetZero();
111     bool shouldEmitSpaceBeforeAndAfterNode(Node*);
112     void representNodeOffsetZero();
113     bool handleTextNode();
114     bool handleReplacedElement();
115     bool handleNonTextNode();
116     void handleTextBox();
117     void handleTextNodeFirstLetter(RenderTextFragment*);
118     bool hasVisibleTextNode(RenderText*);
119     void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
120     void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset);
121     void emitText(Node* textNode, int textStartOffset, int textEndOffset);
122 
123     // Current position, not necessarily of the text being returned, but position
124     // as we walk through the DOM tree.
125     Node* m_node;
126     int m_offset;
127     bool m_handledNode;
128     bool m_handledChildren;
129     BitStack m_fullyClippedStack;
130 
131     // The range.
132     Node* m_startContainer;
133     int m_startOffset;
134     Node* m_endContainer;
135     int m_endOffset;
136     Node* m_pastEndNode;
137 
138     // The current text and its position, in the form to be returned from the iterator.
139     Node* m_positionNode;
140     mutable Node* m_positionOffsetBaseNode;
141     mutable int m_positionStartOffset;
142     mutable int m_positionEndOffset;
143     const UChar* m_textCharacters;
144     int m_textLength;
145     // Hold string m_textCharacters points to so we ensure it won't be deleted.
146     String m_text;
147 
148     // Used when there is still some pending text from the current node; when these
149     // are false and 0, we go back to normal iterating.
150     bool m_needsAnotherNewline;
151     InlineTextBox* m_textBox;
152     // Used when iteration over :first-letter text to save pointer to
153     // remaining text box.
154     InlineTextBox* m_remainingTextBox;
155     // Used to point to RenderText object for :first-letter.
156     RenderText *m_firstLetterText;
157 
158     // Used to do the whitespace collapsing logic.
159     Node* m_lastTextNode;
160     bool m_lastTextNodeEndedWithCollapsedSpace;
161     UChar m_lastCharacter;
162 
163     // Used for whitespace characters that aren't in the DOM, so we can point at them.
164     UChar m_singleCharacterBuffer;
165 
166     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
167     Vector<InlineTextBox*> m_sortedTextBoxes;
168     size_t m_sortedTextBoxesPosition;
169 
170     // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
171     bool m_hasEmitted;
172 
173     // Used by selection preservation code.  There should be one character emitted between every VisiblePosition
174     // in the Range used to create the TextIterator.
175     // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
176     // moveParagraphs to not clone/destroy moved content.
177     bool m_emitsCharactersBetweenAllVisiblePositions;
178     bool m_entersTextControls;
179 
180     // Used when we want texts for copying, pasting, and transposing.
181     bool m_emitsTextWithoutTranscoding;
182     // Used when deciding text fragment created by :first-letter should be looked into.
183     bool m_handledFirstLetter;
184     // Used when the visibility of the style should not affect text gathering.
185     bool m_ignoresStyleVisibility;
186     // Used when emitting the special 0xFFFC character is required.
187     bool m_emitsObjectReplacementCharacters;
188 #if OS(ANDROID)
189     // Used when the iteration should stop if form controls are reached.
190     bool m_stopsOnFormControls;
191     // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
192     bool m_shouldStop;
193 #endif
194 };
195 
196 // Iterates through the DOM range, returning all the text, and 0-length boundaries
197 // at points where replaced elements break up the text flow. The text comes back in
198 // chunks so as to optimize for performance of the iteration.
199 class SimplifiedBackwardsTextIterator {
200 public:
201     SimplifiedBackwardsTextIterator();
202     explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
203 
204     bool atEnd() const;
205     void advance();
206 
length()207     int length() const { return m_textLength; }
characters()208     const UChar* characters() const { return m_textCharacters; }
209 
210     PassRefPtr<Range> range() const;
211 
212 private:
213     void exitNode();
214     bool handleTextNode();
215     bool handleReplacedElement();
216     bool handleNonTextNode();
217     void emitCharacter(UChar, Node*, int startOffset, int endOffset);
218     bool advanceRespectingRange(Node*);
219 
220     TextIteratorBehavior m_behavior;
221     // Current position, not necessarily of the text being returned, but position
222     // as we walk through the DOM tree.
223     Node* m_node;
224     int m_offset;
225     bool m_handledNode;
226     bool m_handledChildren;
227     BitStack m_fullyClippedStack;
228 
229     // End of the range.
230     Node* m_startNode;
231     int m_startOffset;
232     // Start of the range.
233     Node* m_endNode;
234     int m_endOffset;
235 
236     // The current text and its position, in the form to be returned from the iterator.
237     Node* m_positionNode;
238     int m_positionStartOffset;
239     int m_positionEndOffset;
240     const UChar* m_textCharacters;
241     int m_textLength;
242 
243     // Used to do the whitespace logic.
244     Node* m_lastTextNode;
245     UChar m_lastCharacter;
246 
247     // Used for whitespace characters that aren't in the DOM, so we can point at them.
248     UChar m_singleCharacterBuffer;
249 
250     // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
251     bool m_havePassedStartNode;
252 
253 #if OS(ANDROID)
254     // Used when the iteration should stop if form controls are reached.
255     bool m_stopsOnFormControls;
256     // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
257     bool m_shouldStop;
258 #endif
259 };
260 
261 // Builds on the text iterator, adding a character position so we can walk one
262 // character at a time, or faster, as needed. Useful for searching.
263 class CharacterIterator {
264 public:
265     CharacterIterator();
266     explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
267 
268     void advance(int numCharacters);
269 
atBreak()270     bool atBreak() const { return m_atBreak; }
atEnd()271     bool atEnd() const { return m_textIterator.atEnd(); }
272 
length()273     int length() const { return m_textIterator.length() - m_runOffset; }
characters()274     const UChar* characters() const { return m_textIterator.characters() + m_runOffset; }
275     String string(int numChars);
276 
characterOffset()277     int characterOffset() const { return m_offset; }
278     PassRefPtr<Range> range() const;
279 
280 private:
281     int m_offset;
282     int m_runOffset;
283     bool m_atBreak;
284 
285     TextIterator m_textIterator;
286 };
287 
288 class BackwardsCharacterIterator {
289 public:
290     BackwardsCharacterIterator();
291     explicit BackwardsCharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
292 
293     void advance(int);
294 
atEnd()295     bool atEnd() const { return m_textIterator.atEnd(); }
296 
297     PassRefPtr<Range> range() const;
298 
299 private:
300     TextIteratorBehavior m_behavior;
301     int m_offset;
302     int m_runOffset;
303     bool m_atBreak;
304 
305     SimplifiedBackwardsTextIterator m_textIterator;
306 };
307 
308 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
309 // meaning they never end split up a word.  This is useful for spellcheck or (perhaps one day) searching.
310 class WordAwareIterator {
311 public:
312     WordAwareIterator();
313     explicit WordAwareIterator(const Range*);
314     ~WordAwareIterator();
315 
atEnd()316     bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
317     void advance();
318 
319     int length() const;
320     const UChar* characters() const;
321 
322     // Range of the text we're currently returning
range()323     PassRefPtr<Range> range() const { return m_range; }
324 
325 private:
326     // text from the previous chunk from the textIterator
327     const UChar* m_previousText;
328     int m_previousLength;
329 
330     // many chunks from textIterator concatenated
331     Vector<UChar> m_buffer;
332 
333     // Did we have to look ahead in the textIterator to confirm the current chunk?
334     bool m_didLookAhead;
335 
336     RefPtr<Range> m_range;
337 
338     TextIterator m_textIterator;
339 };
340 
341 }
342 
343 #endif
344