1 /* 2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef TextIterator_h 27 #define TextIterator_h 28 29 #include "core/dom/Range.h" 30 #include "core/editing/FindOptions.h" 31 #include "platform/heap/Handle.h" 32 #include "wtf/Vector.h" 33 34 namespace blink { 35 36 class InlineTextBox; 37 class RenderText; 38 class RenderTextFragment; 39 40 enum TextIteratorBehavior { 41 TextIteratorDefaultBehavior = 0, 42 TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0, 43 TextIteratorEntersTextControls = 1 << 1, 44 TextIteratorIgnoresStyleVisibility = 1 << 2, 45 TextIteratorEmitsOriginalText = 1 << 3, 46 TextIteratorStopsOnFormControls = 1 << 4, 47 TextIteratorEmitsImageAltText = 1 << 5, 48 TextIteratorEntersAuthorShadowRoots = 1 << 6, 49 TextIteratorEmitsObjectReplacementCharacter = 1 << 7 50 }; 51 typedef unsigned TextIteratorBehaviorFlags; 52 53 String plainText(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 54 String plainText(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 55 PassRefPtrWillBeRawPtr<Range> findPlainText(const Range*, const String&, FindOptions); 56 void findPlainText(const Position& inputStart, const Position& inputEnd, const String&, FindOptions, Position& resultStart, Position& resultEnd); 57 58 class BitStack { 59 public: 60 BitStack(); 61 ~BitStack(); 62 63 void push(bool); 64 void pop(); 65 66 bool top() const; 67 unsigned size() const; 68 69 private: 70 unsigned m_size; 71 Vector<unsigned, 1> m_words; 72 }; 73 74 // Iterates through the DOM range, returning all the text, and 0-length boundaries 75 // at points where replaced elements break up the text flow. The text comes back in 76 // chunks so as to optimize for performance of the iteration. 77 78 class TextIterator { 79 STACK_ALLOCATED(); 80 public: 81 explicit TextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 82 // [start, end] indicates the document range that the iteration should take place within (both ends inclusive). 83 TextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 84 ~TextIterator(); 85 atEnd()86 bool atEnd() const { return !m_positionNode || m_shouldStop; } 87 void advance(); 88 bool isInsideReplacedElement() const; 89 length()90 int length() const { return m_textLength; } 91 UChar characterAt(unsigned index) const; 92 String substring(unsigned position, unsigned length) const; 93 void appendTextToStringBuilder(StringBuilder&, unsigned position = 0, unsigned maxLength = UINT_MAX) const; 94 95 template<typename BufferType> 96 void appendTextTo(BufferType& output, unsigned position = 0) 97 { 98 ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(length())); 99 unsigned lengthToAppend = length() - position; 100 if (!lengthToAppend) 101 return; 102 if (m_singleCharacterBuffer) { 103 ASSERT(!position); 104 ASSERT(length() == 1); 105 output.append(&m_singleCharacterBuffer, 1); 106 } else { 107 string().appendTo(output, startOffset() + position, lengthToAppend); 108 } 109 } 110 111 PassRefPtrWillBeRawPtr<Range> createRange() const; 112 Node* node() const; 113 114 Document* ownerDocument() const; 115 Node* startContainer() const; 116 Node* endContainer() const; 117 int startOffset() const; 118 int endOffset() const; 119 Position startPosition() const; 120 Position endPosition() const; 121 122 // Computes the length of the given range using a text iterator. The default 123 // iteration behavior is to always emit object replacement characters for 124 // replaced elements. When |forSelectionPreservation| is set to true, it 125 // also emits spaces for other non-text nodes using the 126 // |TextIteratorEmitsCharactersBetweenAllVisiblePosition| mode. 127 static int rangeLength(const Range*, bool forSelectionPreservation = false); 128 static int rangeLength(const Position& start, const Position& end, bool forSelectionPreservation = false); 129 static PassRefPtrWillBeRawPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount); 130 static void subrange(Position& start, Position& end, int characterOffset, int characterCount); 131 132 private: 133 enum IterationProgress { 134 HandledNone, 135 HandledAuthorShadowRoots, 136 HandledUserAgentShadowRoot, 137 HandledNode, 138 HandledChildren 139 }; 140 141 void initialize(const Position& start, const Position& end); 142 143 void flushPositionOffsets() const; positionStartOffset()144 int positionStartOffset() const { return m_positionStartOffset; } string()145 const String& string() const { return m_text; } 146 void exitNode(); 147 bool shouldRepresentNodeOffsetZero(); 148 bool shouldEmitSpaceBeforeAndAfterNode(Node*); 149 void representNodeOffsetZero(); 150 bool handleTextNode(); 151 bool handleReplacedElement(); 152 bool handleNonTextNode(); 153 void handleTextBox(); 154 void handleTextNodeFirstLetter(RenderTextFragment*); 155 bool hasVisibleTextNode(RenderText*); 156 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset); 157 void emitText(Node* textNode, RenderText* renderer, int textStartOffset, int textEndOffset); 158 159 // Current position, not necessarily of the text being returned, but position 160 // as we walk through the DOM tree. 161 RawPtrWillBeMember<Node> m_node; 162 int m_offset; 163 IterationProgress m_iterationProgress; 164 BitStack m_fullyClippedStack; 165 int m_shadowDepth; 166 167 // The range. 168 RawPtrWillBeMember<Node> m_startContainer; 169 int m_startOffset; 170 RawPtrWillBeMember<Node> m_endContainer; 171 int m_endOffset; 172 RawPtrWillBeMember<Node> m_pastEndNode; 173 174 // The current text and its position, in the form to be returned from the iterator. 175 RawPtrWillBeMember<Node> m_positionNode; 176 mutable RawPtrWillBeMember<Node> m_positionOffsetBaseNode; 177 mutable int m_positionStartOffset; 178 mutable int m_positionEndOffset; 179 int m_textLength; 180 String m_text; 181 182 // Used when there is still some pending text from the current node; when these 183 // are false and 0, we go back to normal iterating. 184 bool m_needsAnotherNewline; 185 InlineTextBox* m_textBox; 186 // Used when iteration over :first-letter text to save pointer to 187 // remaining text box. 188 InlineTextBox* m_remainingTextBox; 189 // Used to point to RenderText object for :first-letter. 190 RawPtrWillBeMember<RenderText> m_firstLetterText; 191 192 // Used to do the whitespace collapsing logic. 193 RawPtrWillBeMember<Text> m_lastTextNode; 194 bool m_lastTextNodeEndedWithCollapsedSpace; 195 UChar m_lastCharacter; 196 197 // Used for whitespace characters that aren't in the DOM, so we can point at them. 198 // If non-zero, overrides m_text. 199 UChar m_singleCharacterBuffer; 200 201 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 202 Vector<InlineTextBox*> m_sortedTextBoxes; 203 size_t m_sortedTextBoxesPosition; 204 205 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content 206 bool m_hasEmitted; 207 208 // Used by selection preservation code. There should be one character emitted between every VisiblePosition 209 // in the Range used to create the TextIterator. 210 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite 211 // moveParagraphs to not clone/destroy moved content. 212 bool m_emitsCharactersBetweenAllVisiblePositions; 213 bool m_entersTextControls; 214 215 // Used in pasting inside password field. 216 bool m_emitsOriginalText; 217 // Used when deciding text fragment created by :first-letter should be looked into. 218 bool m_handledFirstLetter; 219 // Used when the visibility of the style should not affect text gathering. 220 bool m_ignoresStyleVisibility; 221 // Used when the iteration should stop if form controls are reached. 222 bool m_stopsOnFormControls; 223 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing. 224 bool m_shouldStop; 225 226 bool m_emitsImageAltText; 227 228 bool m_entersAuthorShadowRoots; 229 230 bool m_emitsObjectReplacementCharacter; 231 }; 232 233 // Iterates through the DOM range, returning all the text, and 0-length boundaries 234 // at points where replaced elements break up the text flow. The text comes back in 235 // chunks so as to optimize for performance of the iteration. 236 class SimplifiedBackwardsTextIterator { 237 STACK_ALLOCATED(); 238 public: 239 explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 240 SimplifiedBackwardsTextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 241 atEnd()242 bool atEnd() const { return !m_positionNode || m_shouldStop; } 243 void advance(); 244 length()245 int length() const { return m_textLength; } 246 node()247 Node* node() const { return m_node; } 248 249 template<typename BufferType> prependTextTo(BufferType & output)250 void prependTextTo(BufferType& output) 251 { 252 if (!m_textLength) 253 return; 254 if (m_singleCharacterBuffer) 255 output.prepend(&m_singleCharacterBuffer, 1); 256 else 257 m_textContainer.prependTo(output, m_textOffset, m_textLength); 258 } 259 260 Node* startContainer() const; 261 int endOffset() const; 262 Position startPosition() const; 263 Position endPosition() const; 264 265 private: 266 void init(Node* startNode, Node* endNode, int startOffset, int endOffset); 267 void exitNode(); 268 bool handleTextNode(); 269 RenderText* handleFirstLetter(int& startOffset, int& offsetInNode); 270 bool handleReplacedElement(); 271 bool handleNonTextNode(); 272 void emitCharacter(UChar, Node*, int startOffset, int endOffset); 273 bool advanceRespectingRange(Node*); 274 275 // Current position, not necessarily of the text being returned, but position 276 // as we walk through the DOM tree. 277 RawPtrWillBeMember<Node> m_node; 278 int m_offset; 279 bool m_handledNode; 280 bool m_handledChildren; 281 BitStack m_fullyClippedStack; 282 283 // End of the range. 284 RawPtrWillBeMember<Node> m_startNode; 285 int m_startOffset; 286 // Start of the range. 287 RawPtrWillBeMember<Node> m_endNode; 288 int m_endOffset; 289 290 // The current text and its position, in the form to be returned from the iterator. 291 RawPtrWillBeMember<Node> m_positionNode; 292 int m_positionStartOffset; 293 int m_positionEndOffset; 294 295 String m_textContainer; // We're interested in the range [m_textOffset, m_textOffset + m_textLength) of m_textContainer. 296 int m_textOffset; 297 int m_textLength; 298 299 // Used to do the whitespace logic. 300 RawPtrWillBeMember<Text> m_lastTextNode; 301 UChar m_lastCharacter; 302 303 // Used for whitespace characters that aren't in the DOM, so we can point at them. 304 UChar m_singleCharacterBuffer; 305 306 // Whether m_node has advanced beyond the iteration range (i.e. m_startNode). 307 bool m_havePassedStartNode; 308 309 // Should handle first-letter renderer in the next call to handleTextNode. 310 bool m_shouldHandleFirstLetter; 311 312 // Used when the iteration should stop if form controls are reached. 313 bool m_stopsOnFormControls; 314 315 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing. 316 bool m_shouldStop; 317 318 // Used in pasting inside password field. 319 bool m_emitsOriginalText; 320 }; 321 322 // Builds on the text iterator, adding a character position so we can walk one 323 // character at a time, or faster, as needed. Useful for searching. 324 class CharacterIterator { 325 STACK_ALLOCATED(); 326 public: 327 explicit CharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 328 CharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 329 330 void advance(int numCharacters); 331 atBreak()332 bool atBreak() const { return m_atBreak; } atEnd()333 bool atEnd() const { return m_textIterator.atEnd(); } 334 length()335 int length() const { return m_textIterator.length() - m_runOffset; } characterAt(unsigned index)336 UChar characterAt(unsigned index) const { return m_textIterator.characterAt(m_runOffset + index); } 337 338 template<typename BufferType> appendTextTo(BufferType & output)339 void appendTextTo(BufferType& output) { m_textIterator.appendTextTo(output, m_runOffset); } 340 characterOffset()341 int characterOffset() const { return m_offset; } 342 PassRefPtrWillBeRawPtr<Range> createRange() const; 343 344 Document* ownerDocument() const; 345 Node* startContainer() const; 346 Node* endContainer() const; 347 int startOffset() const; 348 int endOffset() const; 349 Position startPosition() const; 350 Position endPosition() const; 351 352 private: 353 void initialize(); 354 355 int m_offset; 356 int m_runOffset; 357 bool m_atBreak; 358 359 TextIterator m_textIterator; 360 }; 361 362 class BackwardsCharacterIterator { 363 STACK_ALLOCATED(); 364 public: 365 explicit BackwardsCharacterIterator(const Range*, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 366 BackwardsCharacterIterator(const Position&, const Position&, TextIteratorBehaviorFlags = TextIteratorDefaultBehavior); 367 368 void advance(int); 369 atEnd()370 bool atEnd() const { return m_textIterator.atEnd(); } 371 372 Position endPosition() const; 373 374 private: 375 int m_offset; 376 int m_runOffset; 377 bool m_atBreak; 378 379 SimplifiedBackwardsTextIterator m_textIterator; 380 }; 381 382 // Very similar to the TextIterator, except that the chunks of text returned are "well behaved", 383 // meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching. 384 class WordAwareIterator { 385 STACK_ALLOCATED(); 386 public: 387 explicit WordAwareIterator(const Position& start, const Position& end); 388 ~WordAwareIterator(); 389 atEnd()390 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); } 391 void advance(); 392 393 String substring(unsigned position, unsigned length) const; 394 UChar characterAt(unsigned index) const; 395 int length() const; 396 397 private: 398 Vector<UChar> m_buffer; 399 // Did we have to look ahead in the textIterator to confirm the current chunk? 400 bool m_didLookAhead; 401 TextIterator m_textIterator; 402 }; 403 404 } 405 406 #endif 407