1 /* 2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef TextIterator_h 27 #define TextIterator_h 28 29 #include "core/dom/Range.h" 30 #include "core/editing/FindOptions.h" 31 #include "platform/heap/Handle.h" 32 #include "wtf/Vector.h" 33 34 namespace WebCore { 35 36 class InlineTextBox; 37 class RenderText; 38 class RenderTextFragment; 39 40 enum TextIteratorBehavior { 41 TextIteratorDefaultBehavior = 0, 42 TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0, 43 TextIteratorEntersTextControls = 1 << 1, 44 TextIteratorIgnoresStyleVisibility = 1 << 2, 45 TextIteratorEmitsOriginalText = 1 << 3, 46 TextIteratorStopsOnFormControls = 1 << 4, 47 TextIteratorEmitsImageAltText = 1 << 5, 48 TextIteratorEntersAuthorShadowRoots = 1 << 6, 49 TextIteratorEmitsObjectReplacementCharacter = 1 << 7 50 }; 51 typedef unsigned TextIteratorBehaviorFlags; 52 53 String plainText(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 54 PassRefPtrWillBeRawPtr<Range> findPlainText(const Range*, const String&, FindOptions); 55 void findPlainText(const Position& inputStart, const Position& inputEnd, const String&, FindOptions, Position& resultStart, Position& resultEnd); 56 57 class BitStack { 58 public: 59 BitStack(); 60 ~BitStack(); 61 62 void push(bool); 63 void pop(); 64 65 bool top() const; 66 unsigned size() const; 67 68 private: 69 unsigned m_size; 70 Vector<unsigned, 1> m_words; 71 }; 72 73 // Iterates through the DOM range, returning all the text, and 0-length boundaries 74 // at points where replaced elements break up the text flow. The text comes back in 75 // chunks so as to optimize for performance of the iteration. 76 77 class TextIterator { 78 STACK_ALLOCATED(); 79 public: 80 explicit TextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 81 // [start, end] indicates the document range that the iteration should take place within (both ends inclusive). 82 TextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 83 ~TextIterator(); 84 atEnd()85 bool atEnd() const { return !m_positionNode || m_shouldStop; } 86 void advance(); 87 length()88 int length() const { return m_textLength; } 89 UChar characterAt(unsigned index) const; 90 String substring(unsigned position, unsigned length) const; 91 void appendTextToStringBuilder(StringBuilder&, unsigned position = 0, unsigned maxLength = UINT_MAX) const; 92 93 template<typename BufferType> 94 void appendTextTo(BufferType& output, unsigned position = 0) 95 { 96 ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(length())); 97 unsigned lengthToAppend = length() - position; 98 if (!lengthToAppend) 99 return; 100 if (m_singleCharacterBuffer) { 101 ASSERT(!position); 102 ASSERT(length() == 1); 103 output.append(&m_singleCharacterBuffer, 1); 104 } else { 105 string().appendTo(output, startOffset() + position, lengthToAppend); 106 } 107 } 108 109 PassRefPtrWillBeRawPtr<Range> range() const; 110 Node* node() const; 111 112 // Computes the length of the given range using a text iterator. The default 113 // iteration behavior is to always emit object replacement characters for 114 // replaced elements. When |forSelectionPreservation| is set to true, it 115 // also emits spaces for other non-text nodes using the 116 // |TextIteratorEmitsCharactersBetweenAllVisiblePosition| mode. 117 static int rangeLength(const Range*, bool forSelectionPreservation = false); 118 static PassRefPtrWillBeRawPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount); 119 120 private: 121 enum IterationProgress { 122 HandledNone, 123 HandledAuthorShadowRoots, 124 HandledUserAgentShadowRoot, 125 HandledNode, 126 HandledChildren 127 }; 128 129 void initialize(const Position& start, const Position& end); 130 startOffset()131 int startOffset() const { return m_positionStartOffset; } string()132 const String& string() const { return m_text; } 133 void exitNode(); 134 bool shouldRepresentNodeOffsetZero(); 135 bool shouldEmitSpaceBeforeAndAfterNode(Node*); 136 void representNodeOffsetZero(); 137 bool handleTextNode(); 138 bool handleReplacedElement(); 139 bool handleNonTextNode(); 140 void handleTextBox(); 141 void handleTextNodeFirstLetter(RenderTextFragment*); 142 bool hasVisibleTextNode(RenderText*); 143 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset); 144 void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset); 145 void emitText(Node* textNode, int textStartOffset, int textEndOffset); 146 147 // Current position, not necessarily of the text being returned, but position 148 // as we walk through the DOM tree. 149 RawPtrWillBeMember<Node> m_node; 150 int m_offset; 151 IterationProgress m_iterationProgress; 152 BitStack m_fullyClippedStack; 153 int m_shadowDepth; 154 155 // The range. 156 RawPtrWillBeMember<Node> m_startContainer; 157 int m_startOffset; 158 RawPtrWillBeMember<Node> m_endContainer; 159 int m_endOffset; 160 RawPtrWillBeMember<Node> m_pastEndNode; 161 162 // The current text and its position, in the form to be returned from the iterator. 163 RawPtrWillBeMember<Node> m_positionNode; 164 mutable RawPtrWillBeMember<Node> m_positionOffsetBaseNode; 165 mutable int m_positionStartOffset; 166 mutable int m_positionEndOffset; 167 int m_textLength; 168 String m_text; 169 170 // Used when there is still some pending text from the current node; when these 171 // are false and 0, we go back to normal iterating. 172 bool m_needsAnotherNewline; 173 InlineTextBox* m_textBox; 174 // Used when iteration over :first-letter text to save pointer to 175 // remaining text box. 176 InlineTextBox* m_remainingTextBox; 177 // Used to point to RenderText object for :first-letter. 178 RenderText *m_firstLetterText; 179 180 // Used to do the whitespace collapsing logic. 181 RawPtrWillBeMember<Node> m_lastTextNode; 182 bool m_lastTextNodeEndedWithCollapsedSpace; 183 UChar m_lastCharacter; 184 185 // Used for whitespace characters that aren't in the DOM, so we can point at them. 186 // If non-zero, overrides m_text. 187 UChar m_singleCharacterBuffer; 188 189 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 190 Vector<InlineTextBox*> m_sortedTextBoxes; 191 size_t m_sortedTextBoxesPosition; 192 193 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content 194 bool m_hasEmitted; 195 196 // Used by selection preservation code. There should be one character emitted between every VisiblePosition 197 // in the Range used to create the TextIterator. 198 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite 199 // moveParagraphs to not clone/destroy moved content. 200 bool m_emitsCharactersBetweenAllVisiblePositions; 201 bool m_entersTextControls; 202 203 // Used in pasting inside password field. 204 bool m_emitsOriginalText; 205 // Used when deciding text fragment created by :first-letter should be looked into. 206 bool m_handledFirstLetter; 207 // Used when the visibility of the style should not affect text gathering. 208 bool m_ignoresStyleVisibility; 209 // Used when the iteration should stop if form controls are reached. 210 bool m_stopsOnFormControls; 211 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing. 212 bool m_shouldStop; 213 214 bool m_emitsImageAltText; 215 216 bool m_entersAuthorShadowRoots; 217 218 bool m_emitsObjectReplacementCharacter; 219 }; 220 221 // Iterates through the DOM range, returning all the text, and 0-length boundaries 222 // at points where replaced elements break up the text flow. The text comes back in 223 // chunks so as to optimize for performance of the iteration. 224 class SimplifiedBackwardsTextIterator { 225 STACK_ALLOCATED(); 226 public: 227 explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 228 atEnd()229 bool atEnd() const { return !m_positionNode || m_shouldStop; } 230 void advance(); 231 length()232 int length() const { return m_textLength; } 233 node()234 Node* node() const { return m_node; } 235 236 template<typename BufferType> prependTextTo(BufferType & output)237 void prependTextTo(BufferType& output) 238 { 239 if (!m_textLength) 240 return; 241 if (m_singleCharacterBuffer) 242 output.prepend(&m_singleCharacterBuffer, 1); 243 else 244 m_textContainer.prependTo(output, m_textOffset, m_textLength); 245 } 246 247 PassRefPtrWillBeRawPtr<Range> range() const; 248 249 private: 250 void exitNode(); 251 bool handleTextNode(); 252 RenderText* handleFirstLetter(int& startOffset, int& offsetInNode); 253 bool handleReplacedElement(); 254 bool handleNonTextNode(); 255 void emitCharacter(UChar, Node*, int startOffset, int endOffset); 256 bool advanceRespectingRange(Node*); 257 258 // Current position, not necessarily of the text being returned, but position 259 // as we walk through the DOM tree. 260 RawPtrWillBeMember<Node> m_node; 261 int m_offset; 262 bool m_handledNode; 263 bool m_handledChildren; 264 BitStack m_fullyClippedStack; 265 266 // End of the range. 267 RawPtrWillBeMember<Node> m_startNode; 268 int m_startOffset; 269 // Start of the range. 270 RawPtrWillBeMember<Node> m_endNode; 271 int m_endOffset; 272 273 // The current text and its position, in the form to be returned from the iterator. 274 RawPtrWillBeMember<Node> m_positionNode; 275 int m_positionStartOffset; 276 int m_positionEndOffset; 277 278 String m_textContainer; // We're interested in the range [m_textOffset, m_textOffset + m_textLength) of m_textContainer. 279 int m_textOffset; 280 int m_textLength; 281 282 // Used to do the whitespace logic. 283 RawPtrWillBeMember<Node> m_lastTextNode; 284 UChar m_lastCharacter; 285 286 // Used for whitespace characters that aren't in the DOM, so we can point at them. 287 UChar m_singleCharacterBuffer; 288 289 // Whether m_node has advanced beyond the iteration range (i.e. m_startNode). 290 bool m_havePassedStartNode; 291 292 // Should handle first-letter renderer in the next call to handleTextNode. 293 bool m_shouldHandleFirstLetter; 294 295 // Used when the iteration should stop if form controls are reached. 296 bool m_stopsOnFormControls; 297 298 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing. 299 bool m_shouldStop; 300 301 // Used in pasting inside password field. 302 bool m_emitsOriginalText; 303 }; 304 305 // Builds on the text iterator, adding a character position so we can walk one 306 // character at a time, or faster, as needed. Useful for searching. 307 class CharacterIterator { 308 STACK_ALLOCATED(); 309 public: 310 explicit CharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 311 CharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 312 313 void advance(int numCharacters); 314 atBreak()315 bool atBreak() const { return m_atBreak; } atEnd()316 bool atEnd() const { return m_textIterator.atEnd(); } 317 length()318 int length() const { return m_textIterator.length() - m_runOffset; } characterAt(unsigned index)319 UChar characterAt(unsigned index) const { return m_textIterator.characterAt(m_runOffset + index); } 320 321 template<typename BufferType> appendTextTo(BufferType & output)322 void appendTextTo(BufferType& output) { m_textIterator.appendTextTo(output, m_runOffset); } 323 characterOffset()324 int characterOffset() const { return m_offset; } 325 PassRefPtrWillBeRawPtr<Range> range() const; 326 327 private: 328 void initialize(); 329 330 int m_offset; 331 int m_runOffset; 332 bool m_atBreak; 333 334 TextIterator m_textIterator; 335 }; 336 337 class BackwardsCharacterIterator { 338 STACK_ALLOCATED(); 339 public: 340 explicit BackwardsCharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 341 342 void advance(int); 343 atEnd()344 bool atEnd() const { return m_textIterator.atEnd(); } 345 346 PassRefPtrWillBeRawPtr<Range> range() const; 347 348 private: 349 int m_offset; 350 int m_runOffset; 351 bool m_atBreak; 352 353 SimplifiedBackwardsTextIterator m_textIterator; 354 }; 355 356 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved", 357 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching. 358 class WordAwareIterator { 359 STACK_ALLOCATED(); 360 public: 361 explicit WordAwareIterator(const Range*); 362 ~WordAwareIterator(); 363 atEnd()364 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); } 365 void advance(); 366 367 String substring(unsigned position, unsigned length) const; 368 UChar characterAt(unsigned index) const; 369 int length() const; 370 371 private: 372 Vector<UChar> m_buffer; 373 // Did we have to look ahead in the textIterator to confirm the current chunk? 374 bool m_didLookAhead; 375 RefPtrWillBeMember<Range> m_range; 376 TextIterator m_textIterator; 377 }; 378 379 } 380 381 #endif 382