1 /*
2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #ifndef TextIterator_h
27 #define TextIterator_h
28
29 #include "InlineTextBox.h"
30 #include "Range.h"
31 #include <wtf/Vector.h>
32
33 namespace WebCore {
34
35 // FIXME: Can't really answer this question correctly without knowing the white-space mode.
36 // FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
isCollapsibleWhitespace(UChar c)37 inline bool isCollapsibleWhitespace(UChar c)
38 {
39 switch (c) {
40 case ' ':
41 case '\n':
42 return true;
43 default:
44 return false;
45 }
46 }
47
48 String plainText(const Range*);
49 UChar* plainTextToMallocAllocatedBuffer(const Range*, unsigned& bufferLength, bool isDisplayString);
50 PassRefPtr<Range> findPlainText(const Range*, const String&, bool forward, bool caseSensitive);
51
52 class BitStack {
53 public:
54 BitStack();
55
56 void push(bool);
57 void pop();
58
59 bool top() const;
60 unsigned size() const;
61
62 private:
63 unsigned m_size;
64 Vector<unsigned, 1> m_words;
65 };
66
67 // Iterates through the DOM range, returning all the text, and 0-length boundaries
68 // at points where replaced elements break up the text flow. The text comes back in
69 // chunks so as to optimize for performance of the iteration.
70
71 class TextIterator {
72 public:
73 TextIterator();
74 explicit TextIterator(const Range*, bool emitCharactersBetweenAllVisiblePositions = false, bool enterTextControls = false);
75
atEnd()76 bool atEnd() const { return !m_positionNode; }
77 void advance();
78
length()79 int length() const { return m_textLength; }
characters()80 const UChar* characters() const { return m_textCharacters; }
81
82 PassRefPtr<Range> range() const;
83 Node* node() const;
84
85 static int rangeLength(const Range*, bool spacesForReplacedElements = false);
86 static PassRefPtr<Range> rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
87 static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
88
89 private:
90 void exitNode();
91 bool shouldRepresentNodeOffsetZero();
92 bool shouldEmitSpaceBeforeAndAfterNode(Node*);
93 void representNodeOffsetZero();
94 bool handleTextNode();
95 bool handleReplacedElement();
96 bool handleNonTextNode();
97 void handleTextBox();
98 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
99 void emitText(Node* textNode, int textStartOffset, int textEndOffset);
100
101 // Current position, not necessarily of the text being returned, but position
102 // as we walk through the DOM tree.
103 Node* m_node;
104 int m_offset;
105 bool m_handledNode;
106 bool m_handledChildren;
107 BitStack m_fullyClippedStack;
108
109 // The range.
110 Node* m_startContainer;
111 int m_startOffset;
112 Node* m_endContainer;
113 int m_endOffset;
114 Node* m_pastEndNode;
115
116 // The current text and its position, in the form to be returned from the iterator.
117 Node* m_positionNode;
118 mutable Node* m_positionOffsetBaseNode;
119 mutable int m_positionStartOffset;
120 mutable int m_positionEndOffset;
121 const UChar* m_textCharacters;
122 int m_textLength;
123
124 // Used when there is still some pending text from the current node; when these
125 // are false and 0, we go back to normal iterating.
126 bool m_needAnotherNewline;
127 InlineTextBox* m_textBox;
128
129 // Used to do the whitespace collapsing logic.
130 Node* m_lastTextNode;
131 bool m_lastTextNodeEndedWithCollapsedSpace;
132 UChar m_lastCharacter;
133
134 // Used for whitespace characters that aren't in the DOM, so we can point at them.
135 UChar m_singleCharacterBuffer;
136
137 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
138 Vector<InlineTextBox*> m_sortedTextBoxes;
139 size_t m_sortedTextBoxesPosition;
140
141 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
142 bool m_haveEmitted;
143
144 // Used by selection preservation code. There should be one character emitted between every VisiblePosition
145 // in the Range used to create the TextIterator.
146 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
147 // moveParagraphs to not clone/destroy moved content.
148 bool m_emitCharactersBetweenAllVisiblePositions;
149 bool m_enterTextControls;
150 };
151
152 // Iterates through the DOM range, returning all the text, and 0-length boundaries
153 // at points where replaced elements break up the text flow. The text comes back in
154 // chunks so as to optimize for performance of the iteration.
155 class SimplifiedBackwardsTextIterator {
156 public:
157 SimplifiedBackwardsTextIterator();
158 explicit SimplifiedBackwardsTextIterator(const Range*);
159
atEnd()160 bool atEnd() const { return !m_positionNode; }
161 void advance();
162
length()163 int length() const { return m_textLength; }
characters()164 const UChar* characters() const { return m_textCharacters; }
165
166 PassRefPtr<Range> range() const;
167
168 private:
169 void exitNode();
170 bool handleTextNode();
171 bool handleReplacedElement();
172 bool handleNonTextNode();
173 void emitCharacter(UChar, Node*, int startOffset, int endOffset);
174
175 // Current position, not necessarily of the text being returned, but position
176 // as we walk through the DOM tree.
177 Node* m_node;
178 int m_offset;
179 bool m_handledNode;
180 bool m_handledChildren;
181 BitStack m_fullyClippedStack;
182
183 // End of the range.
184 Node* m_startNode;
185 int m_startOffset;
186 // Start of the range.
187 Node* m_endNode;
188 int m_endOffset;
189
190 // The current text and its position, in the form to be returned from the iterator.
191 Node* m_positionNode;
192 int m_positionStartOffset;
193 int m_positionEndOffset;
194 const UChar* m_textCharacters;
195 int m_textLength;
196
197 // Used to do the whitespace logic.
198 Node* m_lastTextNode;
199 UChar m_lastCharacter;
200
201 // Used for whitespace characters that aren't in the DOM, so we can point at them.
202 UChar m_singleCharacterBuffer;
203
204 // The node after the last node this iterator should process.
205 Node* m_pastStartNode;
206 };
207
208 // Builds on the text iterator, adding a character position so we can walk one
209 // character at a time, or faster, as needed. Useful for searching.
210 class CharacterIterator {
211 public:
212 CharacterIterator();
213 explicit CharacterIterator(const Range*, bool emitCharactersBetweenAllVisiblePositions = false, bool enterTextControls = false);
214
215 void advance(int numCharacters);
216
atBreak()217 bool atBreak() const { return m_atBreak; }
atEnd()218 bool atEnd() const { return m_textIterator.atEnd(); }
219
length()220 int length() const { return m_textIterator.length() - m_runOffset; }
characters()221 const UChar* characters() const { return m_textIterator.characters() + m_runOffset; }
222 String string(int numChars);
223
characterOffset()224 int characterOffset() const { return m_offset; }
225 PassRefPtr<Range> range() const;
226
227 private:
228 int m_offset;
229 int m_runOffset;
230 bool m_atBreak;
231
232 TextIterator m_textIterator;
233 };
234
235 class BackwardsCharacterIterator {
236 public:
237 BackwardsCharacterIterator();
238 explicit BackwardsCharacterIterator(const Range*);
239
240 void advance(int);
241
atEnd()242 bool atEnd() const { return m_textIterator.atEnd(); }
243
244 PassRefPtr<Range> range() const;
245
246 private:
247 int m_offset;
248 int m_runOffset;
249 bool m_atBreak;
250
251 SimplifiedBackwardsTextIterator m_textIterator;
252 };
253
254 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
255 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching.
256 class WordAwareIterator {
257 public:
258 WordAwareIterator();
259 explicit WordAwareIterator(const Range*);
260
atEnd()261 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
262 void advance();
263
264 int length() const;
265 const UChar* characters() const;
266
267 // Range of the text we're currently returning
range()268 PassRefPtr<Range> range() const { return m_range; }
269
270 private:
271 // text from the previous chunk from the textIterator
272 const UChar* m_previousText;
273 int m_previousLength;
274
275 // many chunks from textIterator concatenated
276 Vector<UChar> m_buffer;
277
278 // Did we have to look ahead in the textIterator to confirm the current chunk?
279 bool m_didLookAhead;
280
281 RefPtr<Range> m_range;
282
283 TextIterator m_textIterator;
284 };
285
286 }
287
288 #endif
289