• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #ifndef TextIterator_h
27 #define TextIterator_h
28 
29 #include "InlineTextBox.h"
30 #include "Range.h"
31 #include <wtf/Vector.h>
32 
33 namespace WebCore {
34 
35 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
36 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
isCollapsibleWhitespace(UChar c)37 inline bool isCollapsibleWhitespace(UChar c)
38 {
39     switch (c) {
40         case ' ':
41         case '\n':
42             return true;
43         default:
44             return false;
45     }
46 }
47 
48 String plainText(const Range*);
49 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString);
50 PassRefPtr<Range> findPlainText(const Range*, const String&, bool forward, bool caseSensitive);
51 
52 class BitStack {
53 public:
54     BitStack();
55 
56     void push(bool);
57     void pop();
58 
59     bool top() const;
60     unsigned size() const;
61 
62 private:
63     unsigned m_size;
64     Vector<unsigned, 1> m_words;
65 };
66 
67 // Iterates through the DOM range, returning all the text, and 0-length boundaries
68 // at points where replaced elements break up the text flow.  The text comes back in
69 // chunks so as to optimize for performance of the iteration.
70 
71 class TextIterator {
72 public:
73     TextIterator();
74     explicit TextIterator(const Range*, bool emitCharactersBetweenAllVisiblePositions = false, bool enterTextControls = false);
75 
atEnd()76     bool atEnd() const { return !m_positionNode; }
77     void advance();
78 
length()79     int length() const { return m_textLength; }
characters()80     const UChar* characters() const { return m_textCharacters; }
81 
82     PassRefPtr<Range> range() const;
83     Node* node() const;
84 
85     static int rangeLength(const Range*, bool spacesForReplacedElements = false);
86     static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
87     static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
88 
89 private:
90     void exitNode();
91     bool shouldRepresentNodeOffsetZero();
92     bool shouldEmitSpaceBeforeAndAfterNode(Node*);
93     void representNodeOffsetZero();
94     bool handleTextNode();
95     bool handleReplacedElement();
96     bool handleNonTextNode();
97     void handleTextBox();
98     void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
99     void emitText(Node* textNode, int textStartOffset, int textEndOffset);
100 
101     // Current position, not necessarily of the text being returned, but position
102     // as we walk through the DOM tree.
103     Node* m_node;
104     int m_offset;
105     bool m_handledNode;
106     bool m_handledChildren;
107     BitStack m_fullyClippedStack;
108 
109     // The range.
110     Node* m_startContainer;
111     int m_startOffset;
112     Node* m_endContainer;
113     int m_endOffset;
114     Node* m_pastEndNode;
115 
116     // The current text and its position, in the form to be returned from the iterator.
117     Node* m_positionNode;
118     mutable Node* m_positionOffsetBaseNode;
119     mutable int m_positionStartOffset;
120     mutable int m_positionEndOffset;
121     const UChar* m_textCharacters;
122     int m_textLength;
123 
124     // Used when there is still some pending text from the current node; when these
125     // are false and 0, we go back to normal iterating.
126     bool m_needAnotherNewline;
127     InlineTextBox* m_textBox;
128 
129     // Used to do the whitespace collapsing logic.
130     Node* m_lastTextNode;
131     bool m_lastTextNodeEndedWithCollapsedSpace;
132     UChar m_lastCharacter;
133 
134     // Used for whitespace characters that aren't in the DOM, so we can point at them.
135     UChar m_singleCharacterBuffer;
136 
137     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
138     Vector<InlineTextBox*> m_sortedTextBoxes;
139     size_t m_sortedTextBoxesPosition;
140 
141     // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
142     bool m_haveEmitted;
143 
144     // Used by selection preservation code.  There should be one character emitted between every VisiblePosition
145     // in the Range used to create the TextIterator.
146     // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
147     // moveParagraphs to not clone/destroy moved content.
148     bool m_emitCharactersBetweenAllVisiblePositions;
149     bool m_enterTextControls;
150 };
151 
152 // Iterates through the DOM range, returning all the text, and 0-length boundaries
153 // at points where replaced elements break up the text flow. The text comes back in
154 // chunks so as to optimize for performance of the iteration.
155 class SimplifiedBackwardsTextIterator {
156 public:
157     SimplifiedBackwardsTextIterator();
158     explicit SimplifiedBackwardsTextIterator(const Range*);
159 
atEnd()160     bool atEnd() const { return !m_positionNode; }
161     void advance();
162 
length()163     int length() const { return m_textLength; }
characters()164     const UChar* characters() const { return m_textCharacters; }
165 
166     PassRefPtr<Range> range() const;
167 
168 private:
169     void exitNode();
170     bool handleTextNode();
171     bool handleReplacedElement();
172     bool handleNonTextNode();
173     void emitCharacter(UChar, Node*, int startOffset, int endOffset);
174 
175     // Current position, not necessarily of the text being returned, but position
176     // as we walk through the DOM tree.
177     Node* m_node;
178     int m_offset;
179     bool m_handledNode;
180     bool m_handledChildren;
181     BitStack m_fullyClippedStack;
182 
183     // End of the range.
184     Node* m_startNode;
185     int m_startOffset;
186     // Start of the range.
187     Node* m_endNode;
188     int m_endOffset;
189 
190     // The current text and its position, in the form to be returned from the iterator.
191     Node* m_positionNode;
192     int m_positionStartOffset;
193     int m_positionEndOffset;
194     const UChar* m_textCharacters;
195     int m_textLength;
196 
197     // Used to do the whitespace logic.
198     Node* m_lastTextNode;
199     UChar m_lastCharacter;
200 
201     // Used for whitespace characters that aren't in the DOM, so we can point at them.
202     UChar m_singleCharacterBuffer;
203 
204     // The node after the last node this iterator should process.
205     Node* m_pastStartNode;
206 };
207 
208 // Builds on the text iterator, adding a character position so we can walk one
209 // character at a time, or faster, as needed. Useful for searching.
210 class CharacterIterator {
211 public:
212     CharacterIterator();
213     explicit CharacterIterator(const Range*, bool emitCharactersBetweenAllVisiblePositions = false, bool enterTextControls = false);
214 
215     void advance(int numCharacters);
216 
atBreak()217     bool atBreak() const { return m_atBreak; }
atEnd()218     bool atEnd() const { return m_textIterator.atEnd(); }
219 
length()220     int length() const { return m_textIterator.length() - m_runOffset; }
characters()221     const UChar* characters() const { return m_textIterator.characters() + m_runOffset; }
222     String string(int numChars);
223 
characterOffset()224     int characterOffset() const { return m_offset; }
225     PassRefPtr<Range> range() const;
226 
227 private:
228     int m_offset;
229     int m_runOffset;
230     bool m_atBreak;
231 
232     TextIterator m_textIterator;
233 };
234 
235 class BackwardsCharacterIterator {
236 public:
237     BackwardsCharacterIterator();
238     explicit BackwardsCharacterIterator(const Range*);
239 
240     void advance(int);
241 
atEnd()242     bool atEnd() const { return m_textIterator.atEnd(); }
243 
244     PassRefPtr<Range> range() const;
245 
246 private:
247     int m_offset;
248     int m_runOffset;
249     bool m_atBreak;
250 
251     SimplifiedBackwardsTextIterator m_textIterator;
252 };
253 
254 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
255 // meaning they never end split up a word.  This is useful for spellcheck or (perhaps one day) searching.
256 class WordAwareIterator {
257 public:
258     WordAwareIterator();
259     explicit WordAwareIterator(const Range*);
260 
atEnd()261     bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
262     void advance();
263 
264     int length() const;
265     const UChar* characters() const;
266 
267     // Range of the text we're currently returning
range()268     PassRefPtr<Range> range() const { return m_range; }
269 
270 private:
271     // text from the previous chunk from the textIterator
272     const UChar* m_previousText;
273     int m_previousLength;
274 
275     // many chunks from textIterator concatenated
276     Vector<UChar> m_buffer;
277 
278     // Did we have to look ahead in the textIterator to confirm the current chunk?
279     bool m_didLookAhead;
280 
281     RefPtr<Range> m_range;
282 
283     TextIterator m_textIterator;
284 };
285 
286 }
287 
288 #endif
289