• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #ifndef TextIterator_h
27 #define TextIterator_h
28 
29 #include "FindOptions.h"
30 #include "InlineTextBox.h"
31 #include "Range.h"
32 #include <wtf/Vector.h>
33 
34 namespace WebCore {
35 
36 class RenderText;
37 class RenderTextFragment;
38 
39 enum TextIteratorBehavior {
40     TextIteratorDefaultBehavior = 0,
41     TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
42     TextIteratorEntersTextControls = 1 << 1,
43     TextIteratorEmitsTextsWithoutTranscoding = 1 << 2,
44     TextIteratorIgnoresStyleVisibility = 1 << 3,
45     TextIteratorEmitsObjectReplacementCharacters = 1 << 4
46 };
47 
48 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
49 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
isCollapsibleWhitespace(UChar c)50 inline bool isCollapsibleWhitespace(UChar c)
51 {
52     switch (c) {
53         case ' ':
54         case '\n':
55             return true;
56         default:
57             return false;
58     }
59 }
60 
61 String plainText(const Range*, TextIteratorBehavior defaultBehavior = TextIteratorDefaultBehavior);
62 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString, TextIteratorBehavior = TextIteratorDefaultBehavior);
63 PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions);
64 
65 class BitStack {
66 public:
67     BitStack();
68     ~BitStack();
69 
70     void push(bool);
71     void pop();
72 
73     bool top() const;
74     unsigned size() const;
75 
76 private:
77     unsigned m_size;
78     Vector<unsigned, 1> m_words;
79 };
80 
81 // Iterates through the DOM range, returning all the text, and 0-length boundaries
82 // at points where replaced elements break up the text flow.  The text comes back in
83 // chunks so as to optimize for performance of the iteration.
84 
85 class TextIterator {
86 public:
87     TextIterator();
88     ~TextIterator();
89     explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
90 
atEnd()91     bool atEnd() const { return !m_positionNode; }
92     void advance();
93 
length()94     int length() const { return m_textLength; }
characters()95     const UChar* characters() const { return m_textCharacters; }
96 
97     PassRefPtr<Range> range() const;
98     Node* node() const;
99 
100     static int rangeLength(const Range*, bool spacesForReplacedElements = false);
101     static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
102     static bool locationAndLengthFromRange(const Range*, size_t& location, size_t& length);
103     static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
104 
105 private:
106     void exitNode();
107     bool shouldRepresentNodeOffsetZero();
108     bool shouldEmitSpaceBeforeAndAfterNode(Node*);
109     void representNodeOffsetZero();
110     bool handleTextNode();
111     bool handleReplacedElement();
112     bool handleNonTextNode();
113     void handleTextBox();
114     void handleTextNodeFirstLetter(RenderTextFragment*);
115     bool hasVisibleTextNode(RenderText*);
116     void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
117     void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset);
118     void emitText(Node* textNode, int textStartOffset, int textEndOffset);
119 
120     // Current position, not necessarily of the text being returned, but position
121     // as we walk through the DOM tree.
122     Node* m_node;
123     int m_offset;
124     bool m_handledNode;
125     bool m_handledChildren;
126     BitStack m_fullyClippedStack;
127 
128     // The range.
129     Node* m_startContainer;
130     int m_startOffset;
131     Node* m_endContainer;
132     int m_endOffset;
133     Node* m_pastEndNode;
134 
135     // The current text and its position, in the form to be returned from the iterator.
136     Node* m_positionNode;
137     mutable Node* m_positionOffsetBaseNode;
138     mutable int m_positionStartOffset;
139     mutable int m_positionEndOffset;
140     const UChar* m_textCharacters;
141     int m_textLength;
142     // Hold string m_textCharacters points to so we ensure it won't be deleted.
143     String m_text;
144 
145     // Used when there is still some pending text from the current node; when these
146     // are false and 0, we go back to normal iterating.
147     bool m_needsAnotherNewline;
148     InlineTextBox* m_textBox;
149     // Used when iteration over :first-letter text to save pointer to
150     // remaining text box.
151     InlineTextBox* m_remainingTextBox;
152     // Used to point to RenderText object for :first-letter.
153     RenderText *m_firstLetterText;
154 
155     // Used to do the whitespace collapsing logic.
156     Node* m_lastTextNode;
157     bool m_lastTextNodeEndedWithCollapsedSpace;
158     UChar m_lastCharacter;
159 
160     // Used for whitespace characters that aren't in the DOM, so we can point at them.
161     UChar m_singleCharacterBuffer;
162 
163     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
164     Vector<InlineTextBox*> m_sortedTextBoxes;
165     size_t m_sortedTextBoxesPosition;
166 
167     // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
168     bool m_hasEmitted;
169 
170     // Used by selection preservation code.  There should be one character emitted between every VisiblePosition
171     // in the Range used to create the TextIterator.
172     // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
173     // moveParagraphs to not clone/destroy moved content.
174     bool m_emitsCharactersBetweenAllVisiblePositions;
175     bool m_entersTextControls;
176 
177     // Used when we want texts for copying, pasting, and transposing.
178     bool m_emitsTextWithoutTranscoding;
179     // Used when deciding text fragment created by :first-letter should be looked into.
180     bool m_handledFirstLetter;
181     // Used when the visibility of the style should not affect text gathering.
182     bool m_ignoresStyleVisibility;
183     // Used when emitting the special 0xFFFC character is required.
184     bool m_emitsObjectReplacementCharacters;
185 };
186 
187 // Iterates through the DOM range, returning all the text, and 0-length boundaries
188 // at points where replaced elements break up the text flow. The text comes back in
189 // chunks so as to optimize for performance of the iteration.
190 class SimplifiedBackwardsTextIterator {
191 public:
192     SimplifiedBackwardsTextIterator();
193     explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
194 
atEnd()195     bool atEnd() const { return !m_positionNode; }
196     void advance();
197 
length()198     int length() const { return m_textLength; }
characters()199     const UChar* characters() const { return m_textCharacters; }
200 
201     PassRefPtr<Range> range() const;
202 
203 private:
204     void exitNode();
205     bool handleTextNode();
206     bool handleReplacedElement();
207     bool handleNonTextNode();
208     void emitCharacter(UChar, Node*, int startOffset, int endOffset);
209     bool advanceRespectingRange(Node*);
210 
211     TextIteratorBehavior m_behavior;
212     // Current position, not necessarily of the text being returned, but position
213     // as we walk through the DOM tree.
214     Node* m_node;
215     int m_offset;
216     bool m_handledNode;
217     bool m_handledChildren;
218     BitStack m_fullyClippedStack;
219 
220     // End of the range.
221     Node* m_startNode;
222     int m_startOffset;
223     // Start of the range.
224     Node* m_endNode;
225     int m_endOffset;
226 
227     // The current text and its position, in the form to be returned from the iterator.
228     Node* m_positionNode;
229     int m_positionStartOffset;
230     int m_positionEndOffset;
231     const UChar* m_textCharacters;
232     int m_textLength;
233 
234     // Used to do the whitespace logic.
235     Node* m_lastTextNode;
236     UChar m_lastCharacter;
237 
238     // Used for whitespace characters that aren't in the DOM, so we can point at them.
239     UChar m_singleCharacterBuffer;
240 
241     // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
242     bool m_havePassedStartNode;
243 };
244 
245 // Builds on the text iterator, adding a character position so we can walk one
246 // character at a time, or faster, as needed. Useful for searching.
247 class CharacterIterator {
248 public:
249     CharacterIterator();
250     explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
251 
252     void advance(int numCharacters);
253 
atBreak()254     bool atBreak() const { return m_atBreak; }
atEnd()255     bool atEnd() const { return m_textIterator.atEnd(); }
256 
length()257     int length() const { return m_textIterator.length() - m_runOffset; }
characters()258     const UChar* characters() const { return m_textIterator.characters() + m_runOffset; }
259     String string(int numChars);
260 
characterOffset()261     int characterOffset() const { return m_offset; }
262     PassRefPtr<Range> range() const;
263 
264 private:
265     int m_offset;
266     int m_runOffset;
267     bool m_atBreak;
268 
269     TextIterator m_textIterator;
270 };
271 
272 class BackwardsCharacterIterator {
273 public:
274     BackwardsCharacterIterator();
275     explicit BackwardsCharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
276 
277     void advance(int);
278 
atEnd()279     bool atEnd() const { return m_textIterator.atEnd(); }
280 
281     PassRefPtr<Range> range() const;
282 
283 private:
284     TextIteratorBehavior m_behavior;
285     int m_offset;
286     int m_runOffset;
287     bool m_atBreak;
288 
289     SimplifiedBackwardsTextIterator m_textIterator;
290 };
291 
292 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
293 // meaning they never end split up a word.  This is useful for spellcheck or (perhaps one day) searching.
294 class WordAwareIterator {
295 public:
296     WordAwareIterator();
297     explicit WordAwareIterator(const Range*);
298     ~WordAwareIterator();
299 
atEnd()300     bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
301     void advance();
302 
303     int length() const;
304     const UChar* characters() const;
305 
306     // Range of the text we're currently returning
range()307     PassRefPtr<Range> range() const { return m_range; }
308 
309 private:
310     // text from the previous chunk from the textIterator
311     const UChar* m_previousText;
312     int m_previousLength;
313 
314     // many chunks from textIterator concatenated
315     Vector<UChar> m_buffer;
316 
317     // Did we have to look ahead in the textIterator to confirm the current chunk?
318     bool m_didLookAhead;
319 
320     RefPtr<Range> m_range;
321 
322     TextIterator m_textIterator;
323 };
324 
325 }
326 
327 #endif
328