• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved.
3  * Copyright (C) 2005 Alexey Proskuryakov.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 #include "core/editing/TextIterator.h"
29 
30 #include "bindings/core/v8/ExceptionStatePlaceholder.h"
31 #include "core/HTMLNames.h"
32 #include "core/dom/Document.h"
33 #include "core/dom/NodeTraversal.h"
34 #include "core/dom/shadow/ShadowRoot.h"
35 #include "core/editing/VisiblePosition.h"
36 #include "core/editing/VisibleUnits.h"
37 #include "core/editing/htmlediting.h"
38 #include "core/frame/FrameView.h"
39 #include "core/html/HTMLElement.h"
40 #include "core/html/HTMLTextFormControlElement.h"
41 #include "core/rendering/InlineTextBox.h"
42 #include "core/rendering/RenderImage.h"
43 #include "core/rendering/RenderTableCell.h"
44 #include "core/rendering/RenderTableRow.h"
45 #include "core/rendering/RenderTextControl.h"
46 #include "core/rendering/RenderTextFragment.h"
47 #include "platform/fonts/Character.h"
48 #include "platform/fonts/Font.h"
49 #include "platform/text/TextBoundaries.h"
50 #include "platform/text/TextBreakIteratorInternalICU.h"
51 #include "platform/text/UnicodeUtilities.h"
52 #include "wtf/text/CString.h"
53 #include "wtf/text/StringBuilder.h"
54 #include "wtf/unicode/CharacterNames.h"
55 #include <unicode/usearch.h>
56 #include <unicode/utf16.h>
57 
58 using namespace WTF::Unicode;
59 
60 namespace blink {
61 
62 using namespace HTMLNames;
63 
64 // Buffer that knows how to compare with a search target.
65 // Keeps enough of the previous text to be able to search in the future, but no more.
66 // Non-breaking spaces are always equal to normal spaces.
67 // Case folding is also done if the CaseInsensitive option is specified.
68 // Matches are further filtered if the AtWordStarts option is specified, although some
69 // matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well.
70 class SearchBuffer {
71     WTF_MAKE_NONCOPYABLE(SearchBuffer);
72 public:
73     SearchBuffer(const String& target, FindOptions);
74     ~SearchBuffer();
75 
76     // Returns number of characters appended; guaranteed to be in the range [1, length].
77     template<typename CharType>
78     void append(const CharType*, size_t length);
numberOfCharactersJustAppended() const79     size_t numberOfCharactersJustAppended() const { return m_numberOfCharactersJustAppended; }
80 
81     bool needsMoreContext() const;
82     void prependContext(const UChar*, size_t length);
83     void reachedBreak();
84 
85     // Result is the size in characters of what was found.
86     // And <startOffset> is the number of characters back to the start of what was found.
87     size_t search(size_t& startOffset);
88     bool atBreak() const;
89 
90 private:
91     bool isBadMatch(const UChar*, size_t length) const;
92     bool isWordStartMatch(size_t start, size_t length) const;
93 
94     Vector<UChar> m_target;
95     FindOptions m_options;
96 
97     Vector<UChar> m_buffer;
98     size_t m_overlap;
99     size_t m_prefixLength;
100     size_t m_numberOfCharactersJustAppended;
101     bool m_atBreak;
102     bool m_needsMoreContext;
103 
104     bool m_targetRequiresKanaWorkaround;
105     Vector<UChar> m_normalizedTarget;
106     mutable Vector<UChar> m_normalizedMatch;
107 };
108 
109 // --------
110 
111 static const unsigned bitsInWord = sizeof(unsigned) * 8;
112 static const unsigned bitInWordMask = bitsInWord - 1;
113 
BitStack()114 BitStack::BitStack()
115     : m_size(0)
116 {
117 }
118 
~BitStack()119 BitStack::~BitStack()
120 {
121 }
122 
push(bool bit)123 void BitStack::push(bool bit)
124 {
125     unsigned index = m_size / bitsInWord;
126     unsigned shift = m_size & bitInWordMask;
127     if (!shift && index == m_words.size()) {
128         m_words.grow(index + 1);
129         m_words[index] = 0;
130     }
131     unsigned& word = m_words[index];
132     unsigned mask = 1U << shift;
133     if (bit)
134         word |= mask;
135     else
136         word &= ~mask;
137     ++m_size;
138 }
139 
pop()140 void BitStack::pop()
141 {
142     if (m_size)
143         --m_size;
144 }
145 
top() const146 bool BitStack::top() const
147 {
148     if (!m_size)
149         return false;
150     unsigned shift = (m_size - 1) & bitInWordMask;
151     return m_words.last() & (1U << shift);
152 }
153 
size() const154 unsigned BitStack::size() const
155 {
156     return m_size;
157 }
158 
159 // --------
160 
161 #if ENABLE(ASSERT)
162 
depthCrossingShadowBoundaries(Node * node)163 static unsigned depthCrossingShadowBoundaries(Node* node)
164 {
165     unsigned depth = 0;
166     for (ContainerNode* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
167         ++depth;
168     return depth;
169 }
170 
171 #endif
172 
173 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees.
nextInPreOrderCrossingShadowBoundaries(Node * rangeEndContainer,int rangeEndOffset)174 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset)
175 {
176     if (!rangeEndContainer)
177         return 0;
178     if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) {
179         if (Node* next = NodeTraversal::childAt(*rangeEndContainer, rangeEndOffset))
180             return next;
181     }
182     for (Node* node = rangeEndContainer; node; node = node->parentOrShadowHostNode()) {
183         if (Node* next = node->nextSibling())
184             return next;
185     }
186     return 0;
187 }
188 
189 // --------
190 
fullyClipsContents(Node * node)191 static inline bool fullyClipsContents(Node* node)
192 {
193     RenderObject* renderer = node->renderer();
194     if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip())
195         return false;
196     return toRenderBox(renderer)->size().isEmpty();
197 }
198 
ignoresContainerClip(Node * node)199 static inline bool ignoresContainerClip(Node* node)
200 {
201     RenderObject* renderer = node->renderer();
202     if (!renderer || renderer->isText())
203         return false;
204     return renderer->style()->hasOutOfFlowPosition();
205 }
206 
pushFullyClippedState(BitStack & stack,Node * node)207 static void pushFullyClippedState(BitStack& stack, Node* node)
208 {
209     ASSERT(stack.size() == depthCrossingShadowBoundaries(node));
210 
211     // FIXME: m_fullyClippedStack was added in response to <https://bugs.webkit.org/show_bug.cgi?id=26364>
212     // ("Search can find text that's hidden by overflow:hidden"), but the logic here will not work correctly if
213     // a shadow tree redistributes nodes. m_fullyClippedStack relies on the assumption that DOM node hierarchy matches
214     // the render tree, which is not necessarily true if there happens to be shadow DOM distribution or other mechanics
215     // that shuffle around the render objects regardless of node tree hierarchy (like CSS flexbox).
216     //
217     // A more appropriate way to handle this situation is to detect overflow:hidden blocks by using only rendering
218     // primitives, not with DOM primitives.
219 
220     // Push true if this node full clips its contents, or if a parent already has fully
221     // clipped and this is not a node that ignores its container's clip.
222     stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node)));
223 }
224 
setUpFullyClippedStack(BitStack & stack,Node * node)225 static void setUpFullyClippedStack(BitStack& stack, Node* node)
226 {
227     // Put the nodes in a vector so we can iterate in reverse order.
228     WillBeHeapVector<RawPtrWillBeMember<ContainerNode>, 100> ancestry;
229     for (ContainerNode* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
230         ancestry.append(parent);
231 
232     // Call pushFullyClippedState on each node starting with the earliest ancestor.
233     size_t size = ancestry.size();
234     for (size_t i = 0; i < size; ++i)
235         pushFullyClippedState(stack, ancestry[size - i - 1]);
236     pushFullyClippedState(stack, node);
237 
238     ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node));
239 }
240 
241 // --------
242 
TextIterator(const Range * range,TextIteratorBehaviorFlags behavior)243 TextIterator::TextIterator(const Range* range, TextIteratorBehaviorFlags behavior)
244     : m_startContainer(nullptr)
245     , m_startOffset(0)
246     , m_endContainer(nullptr)
247     , m_endOffset(0)
248     , m_positionNode(nullptr)
249     , m_textLength(0)
250     , m_needsAnotherNewline(false)
251     , m_textBox(0)
252     , m_remainingTextBox(0)
253     , m_firstLetterText(nullptr)
254     , m_lastTextNode(nullptr)
255     , m_lastTextNodeEndedWithCollapsedSpace(false)
256     , m_lastCharacter(0)
257     , m_sortedTextBoxesPosition(0)
258     , m_hasEmitted(false)
259     , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)
260     , m_entersTextControls(behavior & TextIteratorEntersTextControls)
261     , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText)
262     , m_handledFirstLetter(false)
263     , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility)
264     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
265     , m_shouldStop(false)
266     , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText)
267     , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots)
268     , m_emitsObjectReplacementCharacter(behavior & TextIteratorEmitsObjectReplacementCharacter)
269 {
270     if (range)
271         initialize(range->startPosition(), range->endPosition());
272 }
273 
TextIterator(const Position & start,const Position & end,TextIteratorBehaviorFlags behavior)274 TextIterator::TextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
275     : m_startContainer(nullptr)
276     , m_startOffset(0)
277     , m_endContainer(nullptr)
278     , m_endOffset(0)
279     , m_positionNode(nullptr)
280     , m_textLength(0)
281     , m_needsAnotherNewline(false)
282     , m_textBox(0)
283     , m_remainingTextBox(0)
284     , m_firstLetterText(nullptr)
285     , m_lastTextNode(nullptr)
286     , m_lastTextNodeEndedWithCollapsedSpace(false)
287     , m_lastCharacter(0)
288     , m_sortedTextBoxesPosition(0)
289     , m_hasEmitted(false)
290     , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)
291     , m_entersTextControls(behavior & TextIteratorEntersTextControls)
292     , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText)
293     , m_handledFirstLetter(false)
294     , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility)
295     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
296     , m_shouldStop(false)
297     , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText)
298     , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots)
299     , m_emitsObjectReplacementCharacter(behavior & TextIteratorEmitsObjectReplacementCharacter)
300 {
301     initialize(start, end);
302 }
303 
initialize(const Position & start,const Position & end)304 void TextIterator::initialize(const Position& start, const Position& end)
305 {
306     ASSERT(comparePositions(start, end) <= 0);
307 
308     // Get and validate |start| and |end|.
309     Node* startContainer = start.containerNode();
310     if (!startContainer)
311         return;
312     int startOffset = start.computeOffsetInContainerNode();
313     Node* endContainer = end.containerNode();
314     if (!endContainer)
315         return;
316     int endOffset = end.computeOffsetInContainerNode();
317 
318     // Remember the range - this does not change.
319     m_startContainer = startContainer;
320     m_startOffset = startOffset;
321     m_endContainer = endContainer;
322     m_endOffset = endOffset;
323 
324     // Figure out the initial value of m_shadowDepth: the depth of startContainer's tree scope from
325     // the common ancestor tree scope.
326     const TreeScope* commonAncestorTreeScope = startContainer->treeScope().commonAncestorTreeScope(endContainer->treeScope());
327     ASSERT(commonAncestorTreeScope);
328     m_shadowDepth = 0;
329     for (const TreeScope* treeScope = &startContainer->treeScope(); treeScope != commonAncestorTreeScope; treeScope = treeScope->parentTreeScope())
330         ++m_shadowDepth;
331 
332     // Set up the current node for processing.
333     if (startContainer->offsetInCharacters())
334         m_node = startContainer;
335     else if (Node* child = NodeTraversal::childAt(*startContainer, startOffset))
336         m_node = child;
337     else if (!startOffset)
338         m_node = startContainer;
339     else
340         m_node = NodeTraversal::nextSkippingChildren(*startContainer);
341 
342     if (!m_node)
343         return;
344 
345     m_node->document().updateLayoutIgnorePendingStylesheets();
346 
347     setUpFullyClippedStack(m_fullyClippedStack, m_node);
348     m_offset = m_node == m_startContainer ? m_startOffset : 0;
349     m_iterationProgress = HandledNone;
350 
351     // Calculate first out of bounds node.
352     m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset);
353 
354     // Identify the first run.
355     advance();
356 }
357 
~TextIterator()358 TextIterator::~TextIterator()
359 {
360 }
361 
isInsideReplacedElement() const362 bool TextIterator::isInsideReplacedElement() const
363 {
364     if (atEnd() || length() != 1 || !m_node)
365         return false;
366 
367     RenderObject* renderer = m_node->renderer();
368     return renderer && renderer->isReplaced();
369 }
370 
advance()371 void TextIterator::advance()
372 {
373     if (m_shouldStop)
374         return;
375 
376     ASSERT(!m_node || !m_node->document().needsRenderTreeUpdate());
377 
378     // reset the run information
379     m_positionNode = nullptr;
380     m_textLength = 0;
381 
382     // handle remembered node that needed a newline after the text node's newline
383     if (m_needsAnotherNewline) {
384         // Emit the extra newline, and position it *inside* m_node, after m_node's
385         // contents, in case it's a block, in the same way that we position the first
386         // newline. The range for the emitted newline should start where the line
387         // break begins.
388         // FIXME: It would be cleaner if we emitted two newlines during the last
389         // iteration, instead of using m_needsAnotherNewline.
390         Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node.get();
391         emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
392         m_needsAnotherNewline = false;
393         return;
394     }
395 
396     if (!m_textBox && m_remainingTextBox) {
397         m_textBox = m_remainingTextBox;
398         m_remainingTextBox = 0;
399         m_firstLetterText = nullptr;
400         m_offset = 0;
401     }
402     // handle remembered text box
403     if (m_textBox) {
404         handleTextBox();
405         if (m_positionNode)
406             return;
407     }
408 
409     while (m_node && (m_node != m_pastEndNode || m_shadowDepth > 0)) {
410         if (!m_shouldStop && m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node))
411             m_shouldStop = true;
412 
413         // if the range ends at offset 0 of an element, represent the
414         // position, but not the content, of that element e.g. if the
415         // node is a blockflow element, emit a newline that
416         // precedes the element
417         if (m_node == m_endContainer && !m_endOffset) {
418             representNodeOffsetZero();
419             m_node = nullptr;
420             return;
421         }
422 
423         RenderObject* renderer = m_node->renderer();
424         if (!renderer) {
425             if (m_node->isShadowRoot()) {
426                 // A shadow root doesn't have a renderer, but we want to visit children anyway.
427                 m_iterationProgress = m_iterationProgress < HandledNode ? HandledNode : m_iterationProgress;
428             } else {
429                 m_iterationProgress = HandledChildren;
430             }
431         } else {
432             // Enter author shadow roots, from youngest, if any and if necessary.
433             if (m_iterationProgress < HandledAuthorShadowRoots) {
434                 if (m_entersAuthorShadowRoots && m_node->isElementNode() && toElement(m_node)->hasAuthorShadowRoot()) {
435                     ShadowRoot* youngestShadowRoot = toElement(m_node)->shadowRoot();
436                     ASSERT(youngestShadowRoot->type() == ShadowRoot::AuthorShadowRoot);
437                     m_node = youngestShadowRoot;
438                     m_iterationProgress = HandledNone;
439                     ++m_shadowDepth;
440                     pushFullyClippedState(m_fullyClippedStack, m_node);
441                     continue;
442                 }
443 
444                 m_iterationProgress = HandledAuthorShadowRoots;
445             }
446 
447             // Enter user-agent shadow root, if necessary.
448             if (m_iterationProgress < HandledUserAgentShadowRoot) {
449                 if (m_entersTextControls && renderer->isTextControl()) {
450                     ShadowRoot* userAgentShadowRoot = toElement(m_node)->userAgentShadowRoot();
451                     ASSERT(userAgentShadowRoot->type() == ShadowRoot::UserAgentShadowRoot);
452                     m_node = userAgentShadowRoot;
453                     m_iterationProgress = HandledNone;
454                     ++m_shadowDepth;
455                     pushFullyClippedState(m_fullyClippedStack, m_node);
456                     continue;
457                 }
458                 m_iterationProgress = HandledUserAgentShadowRoot;
459             }
460 
461             // Handle the current node according to its type.
462             if (m_iterationProgress < HandledNode) {
463                 bool handledNode = false;
464                 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { // FIXME: What about CDATA_SECTION_NODE?
465                     handledNode = handleTextNode();
466                 } else if (renderer && (renderer->isImage() || renderer->isWidget()
467                     || (m_node && m_node->isHTMLElement()
468                     && (isHTMLFormControlElement(toHTMLElement(*m_node))
469                     || isHTMLLegendElement(toHTMLElement(*m_node))
470                     || isHTMLMeterElement(toHTMLElement(*m_node))
471                     || isHTMLProgressElement(toHTMLElement(*m_node)))))) {
472                     handledNode = handleReplacedElement();
473                 } else {
474                     handledNode = handleNonTextNode();
475                 }
476                 if (handledNode)
477                     m_iterationProgress = HandledNode;
478                 if (m_positionNode)
479                     return;
480             }
481         }
482 
483         // Find a new current node to handle in depth-first manner,
484         // calling exitNode() as we come back thru a parent node.
485         //
486         // 1. Iterate over child nodes, if we haven't done yet.
487         Node* next = m_iterationProgress < HandledChildren ? m_node->firstChild() : 0;
488         m_offset = 0;
489         if (!next) {
490             // 2. If we've already iterated children or they are not available, go to the next sibling node.
491             next = m_node->nextSibling();
492             if (!next) {
493                 // 3. If we are at the last child, go up the node tree until we find a next sibling.
494                 bool pastEnd = NodeTraversal::next(*m_node) == m_pastEndNode;
495                 ContainerNode* parentNode = m_node->parentNode();
496                 while (!next && parentNode) {
497                     if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode))
498                         return;
499                     bool haveRenderer = m_node->renderer();
500                     m_node = parentNode;
501                     m_fullyClippedStack.pop();
502                     parentNode = m_node->parentNode();
503                     if (haveRenderer)
504                         exitNode();
505                     if (m_positionNode) {
506                         m_iterationProgress = HandledChildren;
507                         return;
508                     }
509                     next = m_node->nextSibling();
510                 }
511 
512                 if (!next && !parentNode && m_shadowDepth > 0) {
513                     // 4. Reached the top of a shadow root. If it's created by author, then try to visit the next
514                     // sibling shadow root, if any.
515                     ShadowRoot* shadowRoot = toShadowRoot(m_node);
516                     if (shadowRoot->type() == ShadowRoot::AuthorShadowRoot) {
517                         ShadowRoot* nextShadowRoot = shadowRoot->olderShadowRoot();
518                         if (nextShadowRoot && nextShadowRoot->type() == ShadowRoot::AuthorShadowRoot) {
519                             m_fullyClippedStack.pop();
520                             m_node = nextShadowRoot;
521                             m_iterationProgress = HandledNone;
522                             // m_shadowDepth is unchanged since we exit from a shadow root and enter another.
523                             pushFullyClippedState(m_fullyClippedStack, m_node);
524                         } else {
525                             // We are the last shadow root; exit from here and go back to where we were.
526                             m_node = shadowRoot->host();
527                             m_iterationProgress = HandledAuthorShadowRoots;
528                             --m_shadowDepth;
529                             m_fullyClippedStack.pop();
530                         }
531                     } else {
532                         // If we are in a user-agent shadow root, then go back to the host.
533                         ASSERT(shadowRoot->type() == ShadowRoot::UserAgentShadowRoot);
534                         m_node = shadowRoot->host();
535                         m_iterationProgress = HandledUserAgentShadowRoot;
536                         --m_shadowDepth;
537                         m_fullyClippedStack.pop();
538                     }
539                     m_handledFirstLetter = false;
540                     m_firstLetterText = nullptr;
541                     continue;
542                 }
543             }
544             m_fullyClippedStack.pop();
545         }
546 
547         // set the new current node
548         m_node = next;
549         if (m_node)
550             pushFullyClippedState(m_fullyClippedStack, m_node);
551         m_iterationProgress = HandledNone;
552         m_handledFirstLetter = false;
553         m_firstLetterText = nullptr;
554 
555         // how would this ever be?
556         if (m_positionNode)
557             return;
558     }
559 }
560 
characterAt(unsigned index) const561 UChar TextIterator::characterAt(unsigned index) const
562 {
563     ASSERT_WITH_SECURITY_IMPLICATION(index < static_cast<unsigned>(length()));
564     if (!(index < static_cast<unsigned>(length())))
565         return 0;
566 
567     if (m_singleCharacterBuffer) {
568         ASSERT(!index);
569         ASSERT(length() == 1);
570         return m_singleCharacterBuffer;
571     }
572 
573     return string()[positionStartOffset() + index];
574 }
575 
substring(unsigned position,unsigned length) const576 String TextIterator::substring(unsigned position, unsigned length) const
577 {
578     ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(this->length()));
579     ASSERT_WITH_SECURITY_IMPLICATION(position + length <= static_cast<unsigned>(this->length()));
580     if (!length)
581         return emptyString();
582     if (m_singleCharacterBuffer) {
583         ASSERT(!position);
584         ASSERT(length == 1);
585         return String(&m_singleCharacterBuffer, 1);
586     }
587     return string().substring(positionStartOffset() + position, length);
588 }
589 
appendTextToStringBuilder(StringBuilder & builder,unsigned position,unsigned maxLength) const590 void TextIterator::appendTextToStringBuilder(StringBuilder& builder, unsigned position, unsigned maxLength) const
591 {
592     unsigned lengthToAppend = std::min(static_cast<unsigned>(length()) - position, maxLength);
593     if (!lengthToAppend)
594         return;
595     if (m_singleCharacterBuffer) {
596         ASSERT(!position);
597         builder.append(m_singleCharacterBuffer);
598     } else {
599         builder.append(string(), positionStartOffset() + position, lengthToAppend);
600     }
601 }
602 
handleTextNode()603 bool TextIterator::handleTextNode()
604 {
605     if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility)
606         return false;
607 
608     Text* textNode = toText(m_node);
609     RenderText* renderer = textNode->renderer();
610 
611     m_lastTextNode = textNode;
612     String str = renderer->text();
613 
614     // handle pre-formatted text
615     if (!renderer->style()->collapseWhiteSpace()) {
616         int runStart = m_offset;
617         if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) {
618             emitCharacter(space, textNode, 0, runStart, runStart);
619             return false;
620         }
621         if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset) {
622             handleTextNodeFirstLetter(toRenderTextFragment(renderer));
623             if (m_firstLetterText) {
624                 String firstLetter = m_firstLetterText->text();
625                 emitText(textNode, m_firstLetterText, m_offset, m_offset + firstLetter.length());
626                 m_firstLetterText = nullptr;
627                 m_textBox = 0;
628                 return false;
629             }
630         }
631         if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
632             return false;
633         int strLength = str.length();
634         int end = (textNode == m_endContainer) ? m_endOffset : INT_MAX;
635         int runEnd = std::min(strLength, end);
636 
637         if (runStart >= runEnd)
638             return true;
639 
640         emitText(textNode, textNode->renderer(), runStart, runEnd);
641         return true;
642     }
643 
644     if (renderer->firstTextBox())
645         m_textBox = renderer->firstTextBox();
646 
647     bool shouldHandleFirstLetter = !m_handledFirstLetter && renderer->isTextFragment() && !m_offset;
648     if (shouldHandleFirstLetter)
649         handleTextNodeFirstLetter(toRenderTextFragment(renderer));
650 
651     if (!renderer->firstTextBox() && str.length() > 0 && !shouldHandleFirstLetter) {
652         if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
653             return false;
654         m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space
655         return true;
656     }
657 
658     if (m_firstLetterText)
659         renderer = m_firstLetterText;
660 
661     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
662     if (renderer->containsReversedText()) {
663         m_sortedTextBoxes.clear();
664         for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) {
665             m_sortedTextBoxes.append(textBox);
666         }
667         std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart);
668         m_sortedTextBoxesPosition = 0;
669         m_textBox = m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0];
670     }
671 
672     handleTextBox();
673     return true;
674 }
675 
handleTextBox()676 void TextIterator::handleTextBox()
677 {
678     RenderText* renderer = m_firstLetterText ? m_firstLetterText.get() : toRenderText(m_node->renderer());
679     if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) {
680         m_textBox = 0;
681         return;
682     }
683     String str = renderer->text();
684     unsigned start = m_offset;
685     unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : INT_MAX;
686     while (m_textBox) {
687         unsigned textBoxStart = m_textBox->start();
688         unsigned runStart = std::max(textBoxStart, start);
689 
690         // Check for collapsed space at the start of this run.
691         InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox();
692         bool needSpace = m_lastTextNodeEndedWithCollapsedSpace
693             || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0);
694         if (needSpace && !renderer->style()->isCollapsibleWhiteSpace(m_lastCharacter) && m_lastCharacter) {
695             if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') {
696                 unsigned spaceRunStart = runStart - 1;
697                 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ')
698                     --spaceRunStart;
699                 emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1);
700             } else {
701                 emitCharacter(space, m_node, 0, runStart, runStart);
702             }
703             return;
704         }
705         unsigned textBoxEnd = textBoxStart + m_textBox->len();
706         unsigned runEnd = std::min(textBoxEnd, end);
707 
708         // Determine what the next text box will be, but don't advance yet
709         InlineTextBox* nextTextBox = 0;
710         if (renderer->containsReversedText()) {
711             if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size())
712                 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1];
713         } else {
714             nextTextBox = m_textBox->nextTextBox();
715         }
716         ASSERT(!nextTextBox || nextTextBox->renderer() == renderer);
717 
718         if (runStart < runEnd) {
719             // Handle either a single newline character (which becomes a space),
720             // or a run of characters that does not include a newline.
721             // This effectively translates newlines to spaces without copying the text.
722             if (str[runStart] == '\n') {
723                 emitCharacter(space, m_node, 0, runStart, runStart + 1);
724                 m_offset = runStart + 1;
725             } else {
726                 size_t subrunEnd = str.find('\n', runStart);
727                 if (subrunEnd == kNotFound || subrunEnd > runEnd)
728                     subrunEnd = runEnd;
729 
730                 m_offset = subrunEnd;
731                 emitText(m_node, renderer, runStart, subrunEnd);
732             }
733 
734             // If we are doing a subrun that doesn't go to the end of the text box,
735             // come back again to finish handling this text box; don't advance to the next one.
736             if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd)
737                 return;
738 
739             // Advance and return
740             unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length();
741             if (nextRunStart > runEnd)
742                 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end
743             m_textBox = nextTextBox;
744             if (renderer->containsReversedText())
745                 ++m_sortedTextBoxesPosition;
746             return;
747         }
748         // Advance and continue
749         m_textBox = nextTextBox;
750         if (renderer->containsReversedText())
751             ++m_sortedTextBoxesPosition;
752     }
753     if (!m_textBox && m_remainingTextBox) {
754         m_textBox = m_remainingTextBox;
755         m_remainingTextBox = 0;
756         m_firstLetterText = nullptr;
757         m_offset = 0;
758         handleTextBox();
759     }
760 }
761 
firstRenderTextInFirstLetter(RenderBoxModelObject * firstLetter)762 static inline RenderText* firstRenderTextInFirstLetter(RenderBoxModelObject* firstLetter)
763 {
764     if (!firstLetter)
765         return 0;
766 
767     // FIXME: Should this check descendent objects?
768     for (RenderObject* current = firstLetter->slowFirstChild(); current; current = current->nextSibling()) {
769         if (current->isText())
770             return toRenderText(current);
771     }
772     return 0;
773 }
774 
handleTextNodeFirstLetter(RenderTextFragment * renderer)775 void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer)
776 {
777     if (renderer->firstLetter()) {
778         RenderBoxModelObject* r = renderer->firstLetter();
779         if (r->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
780             return;
781         if (RenderText* firstLetter = firstRenderTextInFirstLetter(r)) {
782             m_handledFirstLetter = true;
783             m_remainingTextBox = m_textBox;
784             m_textBox = firstLetter->firstTextBox();
785             m_sortedTextBoxes.clear();
786             m_firstLetterText = firstLetter;
787         }
788     }
789     m_handledFirstLetter = true;
790 }
791 
handleReplacedElement()792 bool TextIterator::handleReplacedElement()
793 {
794     if (m_fullyClippedStack.top())
795         return false;
796 
797     RenderObject* renderer = m_node->renderer();
798     if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
799         return false;
800 
801     if (m_emitsObjectReplacementCharacter) {
802         emitCharacter(objectReplacementCharacter, m_node->parentNode(), m_node, 0, 1);
803         return true;
804     }
805 
806     if (m_lastTextNodeEndedWithCollapsedSpace) {
807         emitCharacter(space, m_lastTextNode->parentNode(), m_lastTextNode, 1, 1);
808         return false;
809     }
810 
811     if (m_entersTextControls && renderer->isTextControl()) {
812         // The shadow tree should be already visited.
813         return true;
814     }
815 
816     m_hasEmitted = true;
817 
818     if (m_emitsCharactersBetweenAllVisiblePositions) {
819         // We want replaced elements to behave like punctuation for boundary
820         // finding, and to simply take up space for the selection preservation
821         // code in moveParagraphs, so we use a comma.
822         emitCharacter(',', m_node->parentNode(), m_node, 0, 1);
823         return true;
824     }
825 
826     m_positionNode = m_node->parentNode();
827     m_positionOffsetBaseNode = m_node;
828     m_positionStartOffset = 0;
829     m_positionEndOffset = 1;
830     m_singleCharacterBuffer = 0;
831 
832     if (m_emitsImageAltText && renderer->isImage() && renderer->isRenderImage()) {
833         m_text = toRenderImage(renderer)->altText();
834         if (!m_text.isEmpty()) {
835             m_textLength = m_text.length();
836             m_lastCharacter = m_text[m_textLength - 1];
837             return true;
838         }
839     }
840 
841     m_textLength = 0;
842     m_lastCharacter = 0;
843 
844     return true;
845 }
846 
hasVisibleTextNode(RenderText * renderer)847 bool TextIterator::hasVisibleTextNode(RenderText* renderer)
848 {
849     if (renderer->style()->visibility() == VISIBLE)
850         return true;
851     if (renderer->isTextFragment()) {
852         RenderTextFragment* fragment = toRenderTextFragment(renderer);
853         if (fragment->firstLetter() && fragment->firstLetter()->style()->visibility() == VISIBLE)
854             return true;
855     }
856     return false;
857 }
858 
shouldEmitTabBeforeNode(Node * node)859 static bool shouldEmitTabBeforeNode(Node* node)
860 {
861     RenderObject* r = node->renderer();
862 
863     // Table cells are delimited by tabs.
864     if (!r || !isTableCell(node))
865         return false;
866 
867     // Want a tab before every cell other than the first one
868     RenderTableCell* rc = toRenderTableCell(r);
869     RenderTable* t = rc->table();
870     return t && (t->cellBefore(rc) || t->cellAbove(rc));
871 }
872 
shouldEmitNewlineForNode(Node * node,bool emitsOriginalText)873 static bool shouldEmitNewlineForNode(Node* node, bool emitsOriginalText)
874 {
875     RenderObject* renderer = node->renderer();
876 
877     if (renderer ? !renderer->isBR() : !isHTMLBRElement(node))
878         return false;
879     return emitsOriginalText || !(node->isInShadowTree() && isHTMLInputElement(*node->shadowHost()));
880 }
881 
shouldEmitNewlinesBeforeAndAfterNode(Node & node)882 static bool shouldEmitNewlinesBeforeAndAfterNode(Node& node)
883 {
884     // Block flow (versus inline flow) is represented by having
885     // a newline both before and after the element.
886     RenderObject* r = node.renderer();
887     if (!r) {
888         return (node.hasTagName(blockquoteTag)
889             || node.hasTagName(ddTag)
890             || node.hasTagName(divTag)
891             || node.hasTagName(dlTag)
892             || node.hasTagName(dtTag)
893             || node.hasTagName(h1Tag)
894             || node.hasTagName(h2Tag)
895             || node.hasTagName(h3Tag)
896             || node.hasTagName(h4Tag)
897             || node.hasTagName(h5Tag)
898             || node.hasTagName(h6Tag)
899             || node.hasTagName(hrTag)
900             || node.hasTagName(liTag)
901             || node.hasTagName(listingTag)
902             || node.hasTagName(olTag)
903             || node.hasTagName(pTag)
904             || node.hasTagName(preTag)
905             || node.hasTagName(trTag)
906             || node.hasTagName(ulTag));
907     }
908 
909     // Need to make an exception for option and optgroup, because we want to
910     // keep the legacy behavior before we added renderers to them.
911     if (isHTMLOptionElement(node) || isHTMLOptGroupElement(node))
912         return false;
913 
914     // Need to make an exception for table cells, because they are blocks, but we
915     // want them tab-delimited rather than having newlines before and after.
916     if (isTableCell(&node))
917         return false;
918 
919     // Need to make an exception for table row elements, because they are neither
920     // "inline" or "RenderBlock", but we want newlines for them.
921     if (r->isTableRow()) {
922         RenderTable* t = toRenderTableRow(r)->table();
923         if (t && !t->isInline())
924             return true;
925     }
926 
927     return !r->isInline() && r->isRenderBlock()
928         && !r->isFloatingOrOutOfFlowPositioned() && !r->isBody() && !r->isRubyText();
929 }
930 
shouldEmitNewlineAfterNode(Node & node)931 static bool shouldEmitNewlineAfterNode(Node& node)
932 {
933     // FIXME: It should be better but slower to create a VisiblePosition here.
934     if (!shouldEmitNewlinesBeforeAndAfterNode(node))
935         return false;
936     // Check if this is the very last renderer in the document.
937     // If so, then we should not emit a newline.
938     Node* next = &node;
939     do {
940         next = NodeTraversal::nextSkippingChildren(*next);
941         if (next && next->renderer())
942             return true;
943     } while (next);
944     return false;
945 }
946 
shouldEmitNewlineBeforeNode(Node & node)947 static bool shouldEmitNewlineBeforeNode(Node& node)
948 {
949     return shouldEmitNewlinesBeforeAndAfterNode(node);
950 }
951 
shouldEmitExtraNewlineForNode(Node * node)952 static bool shouldEmitExtraNewlineForNode(Node* node)
953 {
954     // When there is a significant collapsed bottom margin, emit an extra
955     // newline for a more realistic result. We end up getting the right
956     // result even without margin collapsing. For example: <div><p>text</p></div>
957     // will work right even if both the <div> and the <p> have bottom margins.
958     RenderObject* r = node->renderer();
959     if (!r || !r->isBox())
960         return false;
961 
962     // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears
963     // not to do this at all
964     if (node->hasTagName(h1Tag)
965         || node->hasTagName(h2Tag)
966         || node->hasTagName(h3Tag)
967         || node->hasTagName(h4Tag)
968         || node->hasTagName(h5Tag)
969         || node->hasTagName(h6Tag)
970         || node->hasTagName(pTag)) {
971         RenderStyle* style = r->style();
972         if (style) {
973             int bottomMargin = toRenderBox(r)->collapsedMarginAfter();
974             int fontSize = style->fontDescription().computedPixelSize();
975             if (bottomMargin * 2 >= fontSize)
976                 return true;
977         }
978     }
979 
980     return false;
981 }
982 
collapsedSpaceLength(RenderText * renderer,int textEnd)983 static int collapsedSpaceLength(RenderText* renderer, int textEnd)
984 {
985     const String& text = renderer->text();
986     int length = text.length();
987     for (int i = textEnd; i < length; ++i) {
988         if (!renderer->style()->isCollapsibleWhiteSpace(text[i]))
989             return i - textEnd;
990     }
991 
992     return length - textEnd;
993 }
994 
maxOffsetIncludingCollapsedSpaces(Node * node)995 static int maxOffsetIncludingCollapsedSpaces(Node* node)
996 {
997     int offset = caretMaxOffset(node);
998 
999     if (node->renderer() && node->renderer()->isText())
1000         offset += collapsedSpaceLength(toRenderText(node->renderer()), offset);
1001 
1002     return offset;
1003 }
1004 
1005 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic).
shouldRepresentNodeOffsetZero()1006 bool TextIterator::shouldRepresentNodeOffsetZero()
1007 {
1008     if (m_emitsCharactersBetweenAllVisiblePositions && isRenderedTableElement(m_node))
1009         return true;
1010 
1011     // Leave element positioned flush with start of a paragraph
1012     // (e.g. do not insert tab before a table cell at the start of a paragraph)
1013     if (m_lastCharacter == '\n')
1014         return false;
1015 
1016     // Otherwise, show the position if we have emitted any characters
1017     if (m_hasEmitted)
1018         return true;
1019 
1020     // We've not emitted anything yet. Generally, there is no need for any positioning then.
1021     // The only exception is when the element is visually not in the same line as
1022     // the start of the range (e.g. the range starts at the end of the previous paragraph).
1023     // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we
1024     // make quicker checks to possibly avoid that. Another check that we could make is
1025     // is whether the inline vs block flow changed since the previous visible element.
1026     // I think we're already in a special enough case that that won't be needed, tho.
1027 
1028     // No character needed if this is the first node in the range.
1029     if (m_node == m_startContainer)
1030         return false;
1031 
1032     // If we are outside the start container's subtree, assume we need to emit.
1033     // FIXME: m_startContainer could be an inline block
1034     if (!m_node->isDescendantOf(m_startContainer))
1035         return true;
1036 
1037     // If we started as m_startContainer offset 0 and the current node is a descendant of
1038     // the start container, we already had enough context to correctly decide whether to
1039     // emit after a preceding block. We chose not to emit (m_hasEmitted is false),
1040     // so don't second guess that now.
1041     // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably
1042     // immaterial since we likely would have already emitted something by now.
1043     if (!m_startOffset)
1044         return false;
1045 
1046     // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning.
1047     // Additionally, if the range we are iterating over contains huge sections of unrendered content,
1048     // we would create VisiblePositions on every call to this function without this check.
1049     if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE
1050         || (m_node->renderer()->isRenderBlockFlow() && !toRenderBlock(m_node->renderer())->height() && !isHTMLBodyElement(*m_node)))
1051         return false;
1052 
1053     // The startPos.isNotNull() check is needed because the start could be before the body,
1054     // and in that case we'll get null. We don't want to put in newlines at the start in that case.
1055     // The currPos.isNotNull() check is needed because positions in non-HTML content
1056     // (like SVG) do not have visible positions, and we don't want to emit for them either.
1057     VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM);
1058     VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM);
1059     return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos);
1060 }
1061 
shouldEmitSpaceBeforeAndAfterNode(Node * node)1062 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node)
1063 {
1064     return isRenderedTableElement(node) && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions);
1065 }
1066 
representNodeOffsetZero()1067 void TextIterator::representNodeOffsetZero()
1068 {
1069     // Emit a character to show the positioning of m_node.
1070 
1071     // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can
1072     // create VisiblePositions, which is expensive. So, we perform the inexpensive checks
1073     // on m_node to see if it necessitates emitting a character first and will early return
1074     // before encountering shouldRepresentNodeOffsetZero()s worse case behavior.
1075     if (shouldEmitTabBeforeNode(m_node)) {
1076         if (shouldRepresentNodeOffsetZero())
1077             emitCharacter('\t', m_node->parentNode(), m_node, 0, 0);
1078     } else if (shouldEmitNewlineBeforeNode(*m_node)) {
1079         if (shouldRepresentNodeOffsetZero())
1080             emitCharacter('\n', m_node->parentNode(), m_node, 0, 0);
1081     } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) {
1082         if (shouldRepresentNodeOffsetZero())
1083             emitCharacter(space, m_node->parentNode(), m_node, 0, 0);
1084     }
1085 }
1086 
handleNonTextNode()1087 bool TextIterator::handleNonTextNode()
1088 {
1089     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText))
1090         emitCharacter('\n', m_node->parentNode(), m_node, 0, 1);
1091     else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR())
1092         emitCharacter(space, m_node->parentNode(), m_node, 0, 1);
1093     else
1094         representNodeOffsetZero();
1095 
1096     return true;
1097 }
1098 
flushPositionOffsets() const1099 void TextIterator::flushPositionOffsets() const
1100 {
1101     if (!m_positionOffsetBaseNode)
1102         return;
1103     int index = m_positionOffsetBaseNode->nodeIndex();
1104     m_positionStartOffset += index;
1105     m_positionEndOffset += index;
1106     m_positionOffsetBaseNode = nullptr;
1107 }
1108 
exitNode()1109 void TextIterator::exitNode()
1110 {
1111     // prevent emitting a newline when exiting a collapsed block at beginning of the range
1112     // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could
1113     // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and
1114     // therefore look like a blank line.
1115     if (!m_hasEmitted)
1116         return;
1117 
1118     // Emit with a position *inside* m_node, after m_node's contents, in
1119     // case it is a block, because the run should start where the
1120     // emitted character is positioned visually.
1121     Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node.get();
1122     // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making
1123     // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use
1124     // TextIterator in _web_attributedStringFromRange.
1125     // See <rdar://problem/5428427> for an example of how this mismatch will cause problems.
1126     if (m_lastTextNode && shouldEmitNewlineAfterNode(*m_node)) {
1127         // use extra newline to represent margin bottom, as needed
1128         bool addNewline = shouldEmitExtraNewlineForNode(m_node);
1129 
1130         // FIXME: We need to emit a '\n' as we leave an empty block(s) that
1131         // contain a VisiblePosition when doing selection preservation.
1132         if (m_lastCharacter != '\n') {
1133             // insert a newline with a position following this block's contents.
1134             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
1135             // remember whether to later add a newline for the current node
1136             ASSERT(!m_needsAnotherNewline);
1137             m_needsAnotherNewline = addNewline;
1138         } else if (addNewline) {
1139             // insert a newline with a position following this block's contents.
1140             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
1141         }
1142     }
1143 
1144     // If nothing was emitted, see if we need to emit a space.
1145     if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node))
1146         emitCharacter(space, baseNode->parentNode(), baseNode, 1, 1);
1147 }
1148 
emitCharacter(UChar c,Node * textNode,Node * offsetBaseNode,int textStartOffset,int textEndOffset)1149 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset)
1150 {
1151     m_hasEmitted = true;
1152 
1153     // remember information with which to construct the TextIterator::range()
1154     // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode
1155     m_positionNode = textNode;
1156     m_positionOffsetBaseNode = offsetBaseNode;
1157     m_positionStartOffset = textStartOffset;
1158     m_positionEndOffset = textEndOffset;
1159 
1160     // remember information with which to construct the TextIterator::characters() and length()
1161     m_singleCharacterBuffer = c;
1162     ASSERT(m_singleCharacterBuffer);
1163     m_textLength = 1;
1164 
1165     // remember some iteration state
1166     m_lastTextNodeEndedWithCollapsedSpace = false;
1167     m_lastCharacter = c;
1168 }
1169 
emitText(Node * textNode,RenderText * renderer,int textStartOffset,int textEndOffset)1170 void TextIterator::emitText(Node* textNode, RenderText* renderer, int textStartOffset, int textEndOffset)
1171 {
1172     m_text = m_emitsOriginalText ? renderer->originalText() : renderer->text();
1173     ASSERT(!m_text.isEmpty());
1174     ASSERT(0 <= textStartOffset && textStartOffset < static_cast<int>(m_text.length()));
1175     ASSERT(0 <= textEndOffset && textEndOffset <= static_cast<int>(m_text.length()));
1176     ASSERT(textStartOffset <= textEndOffset);
1177 
1178     m_positionNode = textNode;
1179     m_positionOffsetBaseNode = nullptr;
1180     m_positionStartOffset = textStartOffset;
1181     m_positionEndOffset = textEndOffset;
1182     m_singleCharacterBuffer = 0;
1183     m_textLength = textEndOffset - textStartOffset;
1184     m_lastCharacter = m_text[textEndOffset - 1];
1185 
1186     m_lastTextNodeEndedWithCollapsedSpace = false;
1187     m_hasEmitted = true;
1188 }
1189 
createRange() const1190 PassRefPtrWillBeRawPtr<Range> TextIterator::createRange() const
1191 {
1192     // use the current run information, if we have it
1193     if (m_positionNode) {
1194         flushPositionOffsets();
1195         return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
1196     }
1197 
1198     // otherwise, return the end of the overall range we were given
1199     if (m_endContainer)
1200         return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset);
1201 
1202     return nullptr;
1203 }
1204 
ownerDocument() const1205 Document* TextIterator::ownerDocument() const
1206 {
1207     if (m_positionNode)
1208         return &m_positionNode->document();
1209     if (m_endContainer)
1210         return &m_endContainer->document();
1211     return 0;
1212 }
1213 
node() const1214 Node* TextIterator::node() const
1215 {
1216     if (m_positionNode || m_endContainer) {
1217         Node* node = startContainer();
1218         if (node->offsetInCharacters())
1219             return node;
1220         return NodeTraversal::childAt(*node, startOffset());
1221     }
1222     return 0;
1223 }
1224 
startOffset() const1225 int TextIterator::startOffset() const
1226 {
1227     if (m_positionNode) {
1228         flushPositionOffsets();
1229         return m_positionStartOffset;
1230     }
1231     ASSERT(m_endContainer);
1232     return m_endOffset;
1233 }
1234 
endOffset() const1235 int TextIterator::endOffset() const
1236 {
1237     if (m_positionNode) {
1238         flushPositionOffsets();
1239         return m_positionEndOffset;
1240     }
1241     ASSERT(m_endContainer);
1242     return m_endOffset;
1243 }
1244 
startContainer() const1245 Node* TextIterator::startContainer() const
1246 {
1247     if (m_positionNode) {
1248         return m_positionNode;
1249     }
1250     ASSERT(m_endContainer);
1251     return m_endContainer;
1252 }
1253 
endContainer() const1254 Node* TextIterator::endContainer() const
1255 {
1256     return startContainer();
1257 }
1258 
startPosition() const1259 Position TextIterator::startPosition() const
1260 {
1261     return createLegacyEditingPosition(startContainer(), startOffset());
1262 }
1263 
endPosition() const1264 Position TextIterator::endPosition() const
1265 {
1266     return createLegacyEditingPosition(endContainer(), endOffset());
1267 }
1268 
1269 // --------
1270 
SimplifiedBackwardsTextIterator(const Range * r,TextIteratorBehaviorFlags behavior)1271 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehaviorFlags behavior)
1272     : m_node(nullptr)
1273     , m_offset(0)
1274     , m_handledNode(false)
1275     , m_handledChildren(false)
1276     , m_startNode(nullptr)
1277     , m_startOffset(0)
1278     , m_endNode(nullptr)
1279     , m_endOffset(0)
1280     , m_positionNode(nullptr)
1281     , m_positionStartOffset(0)
1282     , m_positionEndOffset(0)
1283     , m_textOffset(0)
1284     , m_textLength(0)
1285     , m_lastTextNode(nullptr)
1286     , m_lastCharacter(0)
1287     , m_singleCharacterBuffer(0)
1288     , m_havePassedStartNode(false)
1289     , m_shouldHandleFirstLetter(false)
1290     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
1291     , m_shouldStop(false)
1292     , m_emitsOriginalText(false)
1293 {
1294     ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls);
1295 
1296     if (!r)
1297         return;
1298 
1299     Node* startNode = r->startContainer();
1300     if (!startNode)
1301         return;
1302     Node* endNode = r->endContainer();
1303     int startOffset = r->startOffset();
1304     int endOffset = r->endOffset();
1305 
1306     init(startNode, endNode, startOffset, endOffset);
1307 }
1308 
SimplifiedBackwardsTextIterator(const Position & start,const Position & end,TextIteratorBehaviorFlags behavior)1309 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
1310     : m_node(nullptr)
1311     , m_offset(0)
1312     , m_handledNode(false)
1313     , m_handledChildren(false)
1314     , m_startNode(nullptr)
1315     , m_startOffset(0)
1316     , m_endNode(nullptr)
1317     , m_endOffset(0)
1318     , m_positionNode(nullptr)
1319     , m_positionStartOffset(0)
1320     , m_positionEndOffset(0)
1321     , m_textOffset(0)
1322     , m_textLength(0)
1323     , m_lastTextNode(nullptr)
1324     , m_lastCharacter(0)
1325     , m_singleCharacterBuffer(0)
1326     , m_havePassedStartNode(false)
1327     , m_shouldHandleFirstLetter(false)
1328     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
1329     , m_shouldStop(false)
1330     , m_emitsOriginalText(false)
1331 {
1332     ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls);
1333 
1334     Node* startNode = start.deprecatedNode();
1335     if (!startNode)
1336         return;
1337     Node* endNode = end.deprecatedNode();
1338     int startOffset = start.deprecatedEditingOffset();
1339     int endOffset = end.deprecatedEditingOffset();
1340 
1341     init(startNode, endNode, startOffset, endOffset);
1342 }
1343 
init(Node * startNode,Node * endNode,int startOffset,int endOffset)1344 void SimplifiedBackwardsTextIterator::init(Node* startNode, Node* endNode, int startOffset, int endOffset)
1345 {
1346     if (!startNode->offsetInCharacters() && startOffset >= 0) {
1347         // NodeTraversal::childAt() will return 0 if the offset is out of range. We rely on this behavior
1348         // instead of calling countChildren() to avoid traversing the children twice.
1349         if (Node* childAtOffset = NodeTraversal::childAt(*startNode, startOffset)) {
1350             startNode = childAtOffset;
1351             startOffset = 0;
1352         }
1353     }
1354     if (!endNode->offsetInCharacters() && endOffset > 0) {
1355         // NodeTraversal::childAt() will return 0 if the offset is out of range. We rely on this behavior
1356         // instead of calling countChildren() to avoid traversing the children twice.
1357         if (Node* childAtOffset = NodeTraversal::childAt(*endNode, endOffset - 1)) {
1358             endNode = childAtOffset;
1359             endOffset = lastOffsetInNode(endNode);
1360         }
1361     }
1362 
1363     m_node = endNode;
1364     setUpFullyClippedStack(m_fullyClippedStack, m_node);
1365     m_offset = endOffset;
1366     m_handledNode = false;
1367     m_handledChildren = !endOffset;
1368 
1369     m_startNode = startNode;
1370     m_startOffset = startOffset;
1371     m_endNode = endNode;
1372     m_endOffset = endOffset;
1373 
1374 #if ENABLE(ASSERT)
1375     // Need this just because of the assert.
1376     m_positionNode = endNode;
1377 #endif
1378 
1379     m_lastTextNode = nullptr;
1380     m_lastCharacter = '\n';
1381 
1382     m_havePassedStartNode = false;
1383 
1384     advance();
1385 }
1386 
advance()1387 void SimplifiedBackwardsTextIterator::advance()
1388 {
1389     ASSERT(m_positionNode);
1390 
1391     if (m_shouldStop)
1392         return;
1393 
1394     if (m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) {
1395         m_shouldStop = true;
1396         return;
1397     }
1398 
1399     m_positionNode = nullptr;
1400     m_textLength = 0;
1401 
1402     while (m_node && !m_havePassedStartNode) {
1403         // Don't handle node if we start iterating at [node, 0].
1404         if (!m_handledNode && !(m_node == m_endNode && !m_endOffset)) {
1405             RenderObject* renderer = m_node->renderer();
1406             if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) {
1407                 // FIXME: What about CDATA_SECTION_NODE?
1408                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
1409                     m_handledNode = handleTextNode();
1410             } else if (renderer && (renderer->isImage() || renderer->isWidget())) {
1411                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
1412                     m_handledNode = handleReplacedElement();
1413             } else {
1414                 m_handledNode = handleNonTextNode();
1415             }
1416             if (m_positionNode)
1417                 return;
1418         }
1419 
1420         if (!m_handledChildren && m_node->hasChildren()) {
1421             m_node = m_node->lastChild();
1422             pushFullyClippedState(m_fullyClippedStack, m_node);
1423         } else {
1424             // Exit empty containers as we pass over them or containers
1425             // where [container, 0] is where we started iterating.
1426             if (!m_handledNode
1427                 && canHaveChildrenForEditing(m_node)
1428                 && m_node->parentNode()
1429                 && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) {
1430                 exitNode();
1431                 if (m_positionNode) {
1432                     m_handledNode = true;
1433                     m_handledChildren = true;
1434                     return;
1435                 }
1436             }
1437 
1438             // Exit all other containers.
1439             while (!m_node->previousSibling()) {
1440                 if (!advanceRespectingRange(m_node->parentOrShadowHostNode()))
1441                     break;
1442                 m_fullyClippedStack.pop();
1443                 exitNode();
1444                 if (m_positionNode) {
1445                     m_handledNode = true;
1446                     m_handledChildren = true;
1447                     return;
1448                 }
1449             }
1450 
1451             m_fullyClippedStack.pop();
1452             if (advanceRespectingRange(m_node->previousSibling()))
1453                 pushFullyClippedState(m_fullyClippedStack, m_node);
1454             else
1455                 m_node = nullptr;
1456         }
1457 
1458         // For the purpose of word boundary detection,
1459         // we should iterate all visible text and trailing (collapsed) whitespaces.
1460         m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0;
1461         m_handledNode = false;
1462         m_handledChildren = false;
1463 
1464         if (m_positionNode)
1465             return;
1466     }
1467 }
1468 
handleTextNode()1469 bool SimplifiedBackwardsTextIterator::handleTextNode()
1470 {
1471     m_lastTextNode = toText(m_node);
1472 
1473     int startOffset;
1474     int offsetInNode;
1475     RenderText* renderer = handleFirstLetter(startOffset, offsetInNode);
1476     if (!renderer)
1477         return true;
1478 
1479     String text = renderer->text();
1480     if (!renderer->firstTextBox() && text.length() > 0)
1481         return true;
1482 
1483     m_positionEndOffset = m_offset;
1484     m_offset = startOffset + offsetInNode;
1485     m_positionNode = m_node;
1486     m_positionStartOffset = m_offset;
1487 
1488     ASSERT(0 <= m_positionStartOffset - offsetInNode && m_positionStartOffset - offsetInNode <= static_cast<int>(text.length()));
1489     ASSERT(1 <= m_positionEndOffset - offsetInNode && m_positionEndOffset - offsetInNode <= static_cast<int>(text.length()));
1490     ASSERT(m_positionStartOffset <= m_positionEndOffset);
1491 
1492     m_textLength = m_positionEndOffset - m_positionStartOffset;
1493     m_textOffset = m_positionStartOffset - offsetInNode;
1494     m_textContainer = text;
1495     m_singleCharacterBuffer = 0;
1496     RELEASE_ASSERT(static_cast<unsigned>(m_textOffset + m_textLength) <= text.length());
1497 
1498     m_lastCharacter = text[m_positionEndOffset - 1];
1499 
1500     return !m_shouldHandleFirstLetter;
1501 }
1502 
handleFirstLetter(int & startOffset,int & offsetInNode)1503 RenderText* SimplifiedBackwardsTextIterator::handleFirstLetter(int& startOffset, int& offsetInNode)
1504 {
1505     RenderText* renderer = toRenderText(m_node->renderer());
1506     startOffset = (m_node == m_startNode) ? m_startOffset : 0;
1507 
1508     if (!renderer->isTextFragment()) {
1509         offsetInNode = 0;
1510         return renderer;
1511     }
1512 
1513     RenderTextFragment* fragment = toRenderTextFragment(renderer);
1514     int offsetAfterFirstLetter = fragment->start();
1515     if (startOffset >= offsetAfterFirstLetter) {
1516         ASSERT(!m_shouldHandleFirstLetter);
1517         offsetInNode = offsetAfterFirstLetter;
1518         return renderer;
1519     }
1520 
1521     if (!m_shouldHandleFirstLetter && offsetAfterFirstLetter < m_offset) {
1522         m_shouldHandleFirstLetter = true;
1523         offsetInNode = offsetAfterFirstLetter;
1524         return renderer;
1525     }
1526 
1527     m_shouldHandleFirstLetter = false;
1528     offsetInNode = 0;
1529     RenderText* firstLetterRenderer = firstRenderTextInFirstLetter(fragment->firstLetter());
1530 
1531     m_offset = firstLetterRenderer->caretMaxOffset();
1532     m_offset += collapsedSpaceLength(firstLetterRenderer, m_offset);
1533 
1534     return firstLetterRenderer;
1535 }
1536 
handleReplacedElement()1537 bool SimplifiedBackwardsTextIterator::handleReplacedElement()
1538 {
1539     unsigned index = m_node->nodeIndex();
1540     // We want replaced elements to behave like punctuation for boundary
1541     // finding, and to simply take up space for the selection preservation
1542     // code in moveParagraphs, so we use a comma. Unconditionally emit
1543     // here because this iterator is only used for boundary finding.
1544     emitCharacter(',', m_node->parentNode(), index, index + 1);
1545     return true;
1546 }
1547 
handleNonTextNode()1548 bool SimplifiedBackwardsTextIterator::handleNonTextNode()
1549 {
1550     // We can use a linefeed in place of a tab because this simple iterator is only used to
1551     // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs.
1552     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineAfterNode(*m_node) || shouldEmitTabBeforeNode(m_node)) {
1553         unsigned index = m_node->nodeIndex();
1554         // The start of this emitted range is wrong. Ensuring correctness would require
1555         // VisiblePositions and so would be slow. previousBoundary expects this.
1556         emitCharacter('\n', m_node->parentNode(), index + 1, index + 1);
1557     }
1558     return true;
1559 }
1560 
exitNode()1561 void SimplifiedBackwardsTextIterator::exitNode()
1562 {
1563     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineBeforeNode(*m_node) || shouldEmitTabBeforeNode(m_node)) {
1564         // The start of this emitted range is wrong. Ensuring correctness would require
1565         // VisiblePositions and so would be slow. previousBoundary expects this.
1566         emitCharacter('\n', m_node, 0, 0);
1567     }
1568 }
1569 
emitCharacter(UChar c,Node * node,int startOffset,int endOffset)1570 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset)
1571 {
1572     m_singleCharacterBuffer = c;
1573     m_positionNode = node;
1574     m_positionStartOffset = startOffset;
1575     m_positionEndOffset = endOffset;
1576     m_textOffset = 0;
1577     m_textLength = 1;
1578     m_lastCharacter = c;
1579 }
1580 
advanceRespectingRange(Node * next)1581 bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next)
1582 {
1583     if (!next)
1584         return false;
1585     m_havePassedStartNode |= m_node == m_startNode;
1586     if (m_havePassedStartNode)
1587         return false;
1588     m_node = next;
1589     return true;
1590 }
1591 
startContainer() const1592 Node* SimplifiedBackwardsTextIterator::startContainer() const
1593 {
1594     if (m_positionNode)
1595         return m_positionNode;
1596     return m_startNode;
1597 }
1598 
endOffset() const1599 int SimplifiedBackwardsTextIterator::endOffset() const
1600 {
1601     if (m_positionNode)
1602         return m_positionEndOffset;
1603     return m_startOffset;
1604 }
1605 
startPosition() const1606 Position SimplifiedBackwardsTextIterator::startPosition() const
1607 {
1608     if (m_positionNode)
1609         return createLegacyEditingPosition(m_positionNode, m_positionStartOffset);
1610     return createLegacyEditingPosition(m_startNode, m_startOffset);
1611 }
1612 
endPosition() const1613 Position SimplifiedBackwardsTextIterator::endPosition() const
1614 {
1615     if (m_positionNode)
1616         return createLegacyEditingPosition(m_positionNode, m_positionEndOffset);
1617     return createLegacyEditingPosition(m_startNode, m_startOffset);
1618 }
1619 
1620 // --------
1621 
CharacterIterator(const Range * range,TextIteratorBehaviorFlags behavior)1622 CharacterIterator::CharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior)
1623     : m_offset(0)
1624     , m_runOffset(0)
1625     , m_atBreak(true)
1626     , m_textIterator(range, behavior)
1627 {
1628     initialize();
1629 }
1630 
CharacterIterator(const Position & start,const Position & end,TextIteratorBehaviorFlags behavior)1631 CharacterIterator::CharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
1632     : m_offset(0)
1633     , m_runOffset(0)
1634     , m_atBreak(true)
1635     , m_textIterator(start, end, behavior)
1636 {
1637     initialize();
1638 }
1639 
initialize()1640 void CharacterIterator::initialize()
1641 {
1642     while (!atEnd() && !m_textIterator.length())
1643         m_textIterator.advance();
1644 }
1645 
createRange() const1646 PassRefPtrWillBeRawPtr<Range> CharacterIterator::createRange() const
1647 {
1648     RefPtrWillBeRawPtr<Range> r = m_textIterator.createRange();
1649     if (!m_textIterator.atEnd()) {
1650         if (m_textIterator.length() <= 1) {
1651             ASSERT(!m_runOffset);
1652         } else {
1653             Node* n = r->startContainer();
1654             ASSERT(n == r->endContainer());
1655             int offset = r->startOffset() + m_runOffset;
1656             r->setStart(n, offset, ASSERT_NO_EXCEPTION);
1657             r->setEnd(n, offset + 1, ASSERT_NO_EXCEPTION);
1658         }
1659     }
1660     return r.release();
1661 }
1662 
ownerDocument() const1663 Document* CharacterIterator::ownerDocument() const
1664 {
1665     return m_textIterator.ownerDocument();
1666 }
1667 
startContainer() const1668 Node* CharacterIterator::startContainer() const
1669 {
1670     return m_textIterator.startContainer();
1671 }
1672 
endContainer() const1673 Node* CharacterIterator::endContainer() const
1674 {
1675     return m_textIterator.endContainer();
1676 }
1677 
startOffset() const1678 int CharacterIterator::startOffset() const
1679 {
1680     if (!m_textIterator.atEnd()) {
1681         if (m_textIterator.length() > 1)
1682             return m_textIterator.startOffset() + m_runOffset;
1683         ASSERT(!m_runOffset);
1684     }
1685     return m_textIterator.startOffset();
1686 }
1687 
endOffset() const1688 int CharacterIterator::endOffset() const
1689 {
1690     if (!m_textIterator.atEnd()) {
1691         if (m_textIterator.length() > 1)
1692             return m_textIterator.startOffset() + m_runOffset + 1;
1693         ASSERT(!m_runOffset);
1694     }
1695     return m_textIterator.endOffset();
1696 }
1697 
startPosition() const1698 Position CharacterIterator::startPosition() const
1699 {
1700     if (!m_textIterator.atEnd()) {
1701         if (m_textIterator.length() > 1) {
1702             Node* n = m_textIterator.startContainer();
1703             int offset = m_textIterator.startOffset() + m_runOffset;
1704             return createLegacyEditingPosition(n, offset);
1705         }
1706         ASSERT(!m_runOffset);
1707     }
1708     return m_textIterator.startPosition();
1709 }
1710 
endPosition() const1711 Position CharacterIterator::endPosition() const
1712 {
1713     if (!m_textIterator.atEnd()) {
1714         if (m_textIterator.length() > 1) {
1715             Node* n = m_textIterator.startContainer();
1716             int offset = m_textIterator.startOffset() + m_runOffset;
1717             return createLegacyEditingPosition(n, offset + 1);
1718         }
1719         ASSERT(!m_runOffset);
1720     }
1721     return m_textIterator.endPosition();
1722 }
1723 
advance(int count)1724 void CharacterIterator::advance(int count)
1725 {
1726     if (count <= 0) {
1727         ASSERT(!count);
1728         return;
1729     }
1730 
1731     m_atBreak = false;
1732 
1733     // easy if there is enough left in the current m_textIterator run
1734     int remaining = m_textIterator.length() - m_runOffset;
1735     if (count < remaining) {
1736         m_runOffset += count;
1737         m_offset += count;
1738         return;
1739     }
1740 
1741     // exhaust the current m_textIterator run
1742     count -= remaining;
1743     m_offset += remaining;
1744 
1745     // move to a subsequent m_textIterator run
1746     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
1747         int runLength = m_textIterator.length();
1748         if (!runLength) {
1749             m_atBreak = true;
1750         } else {
1751             // see whether this is m_textIterator to use
1752             if (count < runLength) {
1753                 m_runOffset = count;
1754                 m_offset += count;
1755                 return;
1756             }
1757 
1758             // exhaust this m_textIterator run
1759             count -= runLength;
1760             m_offset += runLength;
1761         }
1762     }
1763 
1764     // ran to the end of the m_textIterator... no more runs left
1765     m_atBreak = true;
1766     m_runOffset = 0;
1767 }
1768 
calculateCharacterSubrange(CharacterIterator & it,int offset,int length,Position & startPosition,Position & endPosition)1769 static void calculateCharacterSubrange(CharacterIterator& it, int offset, int length, Position& startPosition, Position& endPosition)
1770 {
1771     it.advance(offset);
1772     startPosition = it.startPosition();
1773 
1774     if (length > 1)
1775         it.advance(length - 1);
1776     endPosition = it.endPosition();
1777 }
1778 
BackwardsCharacterIterator(const Range * range,TextIteratorBehaviorFlags behavior)1779 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior)
1780     : m_offset(0)
1781     , m_runOffset(0)
1782     , m_atBreak(true)
1783     , m_textIterator(range, behavior)
1784 {
1785     while (!atEnd() && !m_textIterator.length())
1786         m_textIterator.advance();
1787 }
1788 
BackwardsCharacterIterator(const Position & start,const Position & end,TextIteratorBehaviorFlags behavior)1789 BackwardsCharacterIterator::BackwardsCharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
1790     : m_offset(0)
1791     , m_runOffset(0)
1792     , m_atBreak(true)
1793     , m_textIterator(start, end, behavior)
1794 {
1795     while (!atEnd() && !m_textIterator.length())
1796         m_textIterator.advance();
1797 }
1798 
endPosition() const1799 Position BackwardsCharacterIterator::endPosition() const
1800 {
1801     if (!m_textIterator.atEnd()) {
1802         if (m_textIterator.length() > 1) {
1803             Node* n = m_textIterator.startContainer();
1804             return createLegacyEditingPosition(n, m_textIterator.endOffset() - m_runOffset);
1805         }
1806         ASSERT(!m_runOffset);
1807     }
1808     return m_textIterator.endPosition();
1809 }
1810 
advance(int count)1811 void BackwardsCharacterIterator::advance(int count)
1812 {
1813     if (count <= 0) {
1814         ASSERT(!count);
1815         return;
1816     }
1817 
1818     m_atBreak = false;
1819 
1820     int remaining = m_textIterator.length() - m_runOffset;
1821     if (count < remaining) {
1822         m_runOffset += count;
1823         m_offset += count;
1824         return;
1825     }
1826 
1827     count -= remaining;
1828     m_offset += remaining;
1829 
1830     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
1831         int runLength = m_textIterator.length();
1832         if (!runLength) {
1833             m_atBreak = true;
1834         } else {
1835             if (count < runLength) {
1836                 m_runOffset = count;
1837                 m_offset += count;
1838                 return;
1839             }
1840 
1841             count -= runLength;
1842             m_offset += runLength;
1843         }
1844     }
1845 
1846     m_atBreak = true;
1847     m_runOffset = 0;
1848 }
1849 
1850 // --------
1851 
WordAwareIterator(const Position & start,const Position & end)1852 WordAwareIterator::WordAwareIterator(const Position& start, const Position& end)
1853     : m_didLookAhead(true) // So we consider the first chunk from the text iterator.
1854     , m_textIterator(start, end)
1855 {
1856     advance(); // Get in position over the first chunk of text.
1857 }
1858 
~WordAwareIterator()1859 WordAwareIterator::~WordAwareIterator()
1860 {
1861 }
1862 
1863 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries.
1864 
advance()1865 void WordAwareIterator::advance()
1866 {
1867     m_buffer.clear();
1868 
1869     // If last time we did a look-ahead, start with that looked-ahead chunk now
1870     if (!m_didLookAhead) {
1871         ASSERT(!m_textIterator.atEnd());
1872         m_textIterator.advance();
1873     }
1874     m_didLookAhead = false;
1875 
1876     // Go to next non-empty chunk.
1877     while (!m_textIterator.atEnd() && !m_textIterator.length())
1878         m_textIterator.advance();
1879 
1880     if (m_textIterator.atEnd())
1881         return;
1882 
1883     while (1) {
1884         // If this chunk ends in whitespace we can just use it as our chunk.
1885         if (isSpaceOrNewline(m_textIterator.characterAt(m_textIterator.length() - 1)))
1886             return;
1887 
1888         // If this is the first chunk that failed, save it in m_buffer before look ahead.
1889         if (m_buffer.isEmpty())
1890             m_textIterator.appendTextTo(m_buffer);
1891 
1892         // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff
1893         m_textIterator.advance();
1894         if (m_textIterator.atEnd() || !m_textIterator.length() || isSpaceOrNewline(m_textIterator.characterAt(0))) {
1895             m_didLookAhead = true;
1896             return;
1897         }
1898 
1899         // Start gobbling chunks until we get to a suitable stopping point
1900         m_textIterator.appendTextTo(m_buffer);
1901     }
1902 }
1903 
length() const1904 int WordAwareIterator::length() const
1905 {
1906     if (!m_buffer.isEmpty())
1907         return m_buffer.size();
1908     return m_textIterator.length();
1909 }
1910 
substring(unsigned position,unsigned length) const1911 String WordAwareIterator::substring(unsigned position, unsigned length) const
1912 {
1913     if (!m_buffer.isEmpty())
1914         return String(m_buffer.data() + position, length);
1915     return m_textIterator.substring(position, length);
1916 }
1917 
characterAt(unsigned index) const1918 UChar WordAwareIterator::characterAt(unsigned index) const
1919 {
1920     if (!m_buffer.isEmpty())
1921         return m_buffer[index];
1922     return m_textIterator.characterAt(index);
1923 }
1924 
1925 // --------
1926 
1927 static const size_t minimumSearchBufferSize = 8192;
1928 
1929 #if ENABLE(ASSERT)
1930 static bool searcherInUse;
1931 #endif
1932 
createSearcher()1933 static UStringSearch* createSearcher()
1934 {
1935     // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
1936     // but it doesn't matter exactly what it is, since we don't perform any searches
1937     // without setting both the pattern and the text.
1938     UErrorCode status = U_ZERO_ERROR;
1939     String searchCollatorName = currentSearchLocaleID() + String("@collation=search");
1940     UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status);
1941     ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
1942     return searcher;
1943 }
1944 
searcher()1945 static UStringSearch* searcher()
1946 {
1947     static UStringSearch* searcher = createSearcher();
1948     return searcher;
1949 }
1950 
lockSearcher()1951 static inline void lockSearcher()
1952 {
1953 #if ENABLE(ASSERT)
1954     ASSERT(!searcherInUse);
1955     searcherInUse = true;
1956 #endif
1957 }
1958 
unlockSearcher()1959 static inline void unlockSearcher()
1960 {
1961 #if ENABLE(ASSERT)
1962     ASSERT(searcherInUse);
1963     searcherInUse = false;
1964 #endif
1965 }
1966 
SearchBuffer(const String & target,FindOptions options)1967 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
1968     : m_options(options)
1969     , m_prefixLength(0)
1970     , m_numberOfCharactersJustAppended(0)
1971     , m_atBreak(true)
1972     , m_needsMoreContext(options & AtWordStarts)
1973     , m_targetRequiresKanaWorkaround(containsKanaLetters(target))
1974 {
1975     ASSERT(!target.isEmpty());
1976     target.appendTo(m_target);
1977 
1978     // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
1979     // of doing it in a separate replacement pass here, but ICU doesn't offer a way
1980     // to add tailoring on top of the locale-specific tailoring as of this writing.
1981     foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size());
1982 
1983     size_t targetLength = m_target.size();
1984     m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize));
1985     m_overlap = m_buffer.capacity() / 4;
1986 
1987     if ((m_options & AtWordStarts) && targetLength) {
1988         UChar32 targetFirstCharacter;
1989         U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter);
1990         // Characters in the separator category never really occur at the beginning of a word,
1991         // so if the target begins with such a character, we just ignore the AtWordStart option.
1992         if (isSeparator(targetFirstCharacter)) {
1993             m_options &= ~AtWordStarts;
1994             m_needsMoreContext = false;
1995         }
1996     }
1997 
1998     // Grab the single global searcher.
1999     // If we ever have a reason to do more than once search buffer at once, we'll have
2000     // to move to multiple searchers.
2001     lockSearcher();
2002 
2003     UStringSearch* searcher = blink::searcher();
2004     UCollator* collator = usearch_getCollator(searcher);
2005 
2006     UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
2007     if (ucol_getStrength(collator) != strength) {
2008         ucol_setStrength(collator, strength);
2009         usearch_reset(searcher);
2010     }
2011 
2012     UErrorCode status = U_ZERO_ERROR;
2013     usearch_setPattern(searcher, m_target.data(), targetLength, &status);
2014     ASSERT(status == U_ZERO_ERROR);
2015 
2016     // The kana workaround requires a normalized copy of the target string.
2017     if (m_targetRequiresKanaWorkaround)
2018         normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget);
2019 }
2020 
~SearchBuffer()2021 inline SearchBuffer::~SearchBuffer()
2022 {
2023     // Leave the static object pointing to a valid string.
2024     UErrorCode status = U_ZERO_ERROR;
2025     usearch_setPattern(blink::searcher(), &newlineCharacter, 1, &status);
2026     ASSERT(status == U_ZERO_ERROR);
2027 
2028     unlockSearcher();
2029 }
2030 
2031 template<typename CharType>
append(const CharType * characters,size_t length)2032 inline void SearchBuffer::append(const CharType* characters, size_t length)
2033 {
2034     ASSERT(length);
2035 
2036     if (m_atBreak) {
2037         m_buffer.shrink(0);
2038         m_prefixLength = 0;
2039         m_atBreak = false;
2040     } else if (m_buffer.size() == m_buffer.capacity()) {
2041         memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
2042         m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap);
2043         m_buffer.shrink(m_overlap);
2044     }
2045 
2046     size_t oldLength = m_buffer.size();
2047     size_t usableLength = std::min(m_buffer.capacity() - oldLength, length);
2048     ASSERT(usableLength);
2049     m_buffer.resize(oldLength + usableLength);
2050     UChar* destination = m_buffer.data() + oldLength;
2051     StringImpl::copyChars(destination, characters, usableLength);
2052     foldQuoteMarksAndSoftHyphens(destination, usableLength);
2053     m_numberOfCharactersJustAppended = usableLength;
2054 }
2055 
needsMoreContext() const2056 inline bool SearchBuffer::needsMoreContext() const
2057 {
2058     return m_needsMoreContext;
2059 }
2060 
prependContext(const UChar * characters,size_t length)2061 inline void SearchBuffer::prependContext(const UChar* characters, size_t length)
2062 {
2063     ASSERT(m_needsMoreContext);
2064     ASSERT(m_prefixLength == m_buffer.size());
2065 
2066     if (!length)
2067         return;
2068 
2069     m_atBreak = false;
2070 
2071     size_t wordBoundaryContextStart = length;
2072     if (wordBoundaryContextStart) {
2073         U16_BACK_1(characters, 0, wordBoundaryContextStart);
2074         wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart);
2075     }
2076 
2077     size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart);
2078     m_buffer.prepend(characters + length - usableLength, usableLength);
2079     m_prefixLength += usableLength;
2080 
2081     if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity())
2082         m_needsMoreContext = false;
2083 }
2084 
atBreak() const2085 inline bool SearchBuffer::atBreak() const
2086 {
2087     return m_atBreak;
2088 }
2089 
reachedBreak()2090 inline void SearchBuffer::reachedBreak()
2091 {
2092     m_atBreak = true;
2093 }
2094 
isBadMatch(const UChar * match,size_t matchLength) const2095 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const
2096 {
2097     // This function implements the kana workaround. If usearch treats
2098     // it as a match, but we do not want to, then it's a "bad match".
2099     if (!m_targetRequiresKanaWorkaround)
2100         return false;
2101 
2102     // Normalize into a match buffer. We reuse a single buffer rather than
2103     // creating a new one each time.
2104     normalizeCharactersIntoNFCForm(match, matchLength, m_normalizedMatch);
2105 
2106     return !checkOnlyKanaLettersInStrings(m_normalizedTarget.begin(), m_normalizedTarget.size(), m_normalizedMatch.begin(), m_normalizedMatch.size());
2107 }
2108 
isWordStartMatch(size_t start,size_t length) const2109 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const
2110 {
2111     ASSERT(m_options & AtWordStarts);
2112 
2113     if (!start)
2114         return true;
2115 
2116     int size = m_buffer.size();
2117     int offset = start;
2118     UChar32 firstCharacter;
2119     U16_GET(m_buffer.data(), 0, offset, size, firstCharacter);
2120 
2121     if (m_options & TreatMedialCapitalAsWordStart) {
2122         UChar32 previousCharacter;
2123         U16_PREV(m_buffer.data(), 0, offset, previousCharacter);
2124 
2125         if (isSeparator(firstCharacter)) {
2126             // The start of a separator run is a word start (".org" in "webkit.org").
2127             if (!isSeparator(previousCharacter))
2128                 return true;
2129         } else if (isASCIIUpper(firstCharacter)) {
2130             // The start of an uppercase run is a word start ("Kit" in "WebKit").
2131             if (!isASCIIUpper(previousCharacter))
2132                 return true;
2133             // The last character of an uppercase run followed by a non-separator, non-digit
2134             // is a word start ("Request" in "XMLHTTPRequest").
2135             offset = start;
2136             U16_FWD_1(m_buffer.data(), offset, size);
2137             UChar32 nextCharacter = 0;
2138             if (offset < size)
2139                 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter);
2140             if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter))
2141                 return true;
2142         } else if (isASCIIDigit(firstCharacter)) {
2143             // The start of a digit run is a word start ("2" in "WebKit2").
2144             if (!isASCIIDigit(previousCharacter))
2145                 return true;
2146         } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) {
2147             // The start of a non-separator, non-uppercase, non-digit run is a word start,
2148             // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore").
2149             return true;
2150         }
2151     }
2152 
2153     // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes
2154     // a word, so treat the position before any CJK character as a word start.
2155     if (Character::isCJKIdeographOrSymbol(firstCharacter))
2156         return true;
2157 
2158     size_t wordBreakSearchStart = start + length;
2159     while (wordBreakSearchStart > start)
2160         wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */);
2161     return wordBreakSearchStart == start;
2162 }
2163 
search(size_t & start)2164 inline size_t SearchBuffer::search(size_t& start)
2165 {
2166     size_t size = m_buffer.size();
2167     if (m_atBreak) {
2168         if (!size)
2169             return 0;
2170     } else {
2171         if (size != m_buffer.capacity())
2172             return 0;
2173     }
2174 
2175     UStringSearch* searcher = blink::searcher();
2176 
2177     UErrorCode status = U_ZERO_ERROR;
2178     usearch_setText(searcher, m_buffer.data(), size, &status);
2179     ASSERT(status == U_ZERO_ERROR);
2180 
2181     usearch_setOffset(searcher, m_prefixLength, &status);
2182     ASSERT(status == U_ZERO_ERROR);
2183 
2184     int matchStart = usearch_next(searcher, &status);
2185     ASSERT(status == U_ZERO_ERROR);
2186 
2187 nextMatch:
2188     if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
2189         ASSERT(matchStart == USEARCH_DONE);
2190         return 0;
2191     }
2192 
2193     // Matches that start in the overlap area are only tentative.
2194     // The same match may appear later, matching more characters,
2195     // possibly including a combining character that's not yet in the buffer.
2196     if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
2197         size_t overlap = m_overlap;
2198         if (m_options & AtWordStarts) {
2199             // Ensure that there is sufficient context before matchStart the next time around for
2200             // determining if it is at a word boundary.
2201             int wordBoundaryContextStart = matchStart;
2202             U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
2203             wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart);
2204             overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart));
2205         }
2206         memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar));
2207         m_prefixLength -= std::min(m_prefixLength, size - overlap);
2208         m_buffer.shrink(overlap);
2209         return 0;
2210     }
2211 
2212     size_t matchedLength = usearch_getMatchedLength(searcher);
2213     ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size);
2214 
2215     // If this match is "bad", move on to the next match.
2216     if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) {
2217         matchStart = usearch_next(searcher, &status);
2218         ASSERT(status == U_ZERO_ERROR);
2219         goto nextMatch;
2220     }
2221 
2222     size_t newSize = size - (matchStart + 1);
2223     memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
2224     m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1);
2225     m_buffer.shrink(newSize);
2226 
2227     start = size - matchStart;
2228     return matchedLength;
2229 }
2230 
2231 // --------
2232 
rangeLength(const Range * r,bool forSelectionPreservation)2233 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation)
2234 {
2235     int length = 0;
2236     TextIteratorBehaviorFlags behaviorFlags = TextIteratorEmitsObjectReplacementCharacter;
2237     if (forSelectionPreservation)
2238         behaviorFlags |= TextIteratorEmitsCharactersBetweenAllVisiblePositions;
2239     for (TextIterator it(r, behaviorFlags); !it.atEnd(); it.advance())
2240         length += it.length();
2241 
2242     return length;
2243 }
2244 
rangeLength(const Position & start,const Position & end,bool forSelectionPreservation)2245 int TextIterator::rangeLength(const Position& start, const Position& end, bool forSelectionPreservation)
2246 {
2247     int length = 0;
2248     TextIteratorBehaviorFlags behaviorFlags = TextIteratorEmitsObjectReplacementCharacter;
2249     if (forSelectionPreservation)
2250         behaviorFlags |= TextIteratorEmitsCharactersBetweenAllVisiblePositions;
2251     for (TextIterator it(start, end, behaviorFlags); !it.atEnd(); it.advance())
2252         length += it.length();
2253 
2254     return length;
2255 }
2256 
subrange(Range * entireRange,int characterOffset,int characterCount)2257 PassRefPtrWillBeRawPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount)
2258 {
2259     CharacterIterator entireRangeIterator(entireRange);
2260     Position start;
2261     Position end;
2262     calculateCharacterSubrange(entireRangeIterator, characterOffset, characterCount, start, end);
2263     return Range::create(entireRange->ownerDocument(), start, end);
2264 }
2265 
subrange(Position & start,Position & end,int characterOffset,int characterCount)2266 void TextIterator::subrange(Position& start, Position& end, int characterOffset, int characterCount)
2267 {
2268     CharacterIterator entireRangeIterator(start, end);
2269     calculateCharacterSubrange(entireRangeIterator, characterOffset, characterCount, start, end);
2270 }
2271 
2272 // --------
2273 
createPlainText(TextIterator & it)2274 static String createPlainText(TextIterator& it)
2275 {
2276     // The initial buffer size can be critical for performance: https://bugs.webkit.org/show_bug.cgi?id=81192
2277     static const unsigned initialCapacity = 1 << 15;
2278 
2279     unsigned bufferLength = 0;
2280     StringBuilder builder;
2281     builder.reserveCapacity(initialCapacity);
2282 
2283     for (; !it.atEnd(); it.advance()) {
2284         it.appendTextToStringBuilder(builder);
2285         bufferLength += it.length();
2286     }
2287 
2288     if (!bufferLength)
2289         return emptyString();
2290 
2291     return builder.toString();
2292 }
2293 
plainText(const Range * r,TextIteratorBehaviorFlags behavior)2294 String plainText(const Range* r, TextIteratorBehaviorFlags behavior)
2295 {
2296     TextIterator it(r, behavior);
2297     return createPlainText(it);
2298 }
2299 
plainText(const Position & start,const Position & end,TextIteratorBehaviorFlags behavior)2300 String plainText(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
2301 {
2302     TextIterator it(start, end, behavior);
2303     return createPlainText(it);
2304 }
2305 
collapsedToBoundary(const Range * range,bool forward)2306 static PassRefPtrWillBeRawPtr<Range> collapsedToBoundary(const Range* range, bool forward)
2307 {
2308     RefPtrWillBeRawPtr<Range> result = range->cloneRange();
2309     result->collapse(!forward);
2310     return result.release();
2311 }
2312 
2313 // Check if there's any unpaird surrogate code point.
2314 // Non-character code points are not checked.
isValidUTF16(const String & s)2315 static bool isValidUTF16(const String& s)
2316 {
2317     if (s.is8Bit())
2318         return true;
2319     const UChar* ustr = s.characters16();
2320     size_t length = s.length();
2321     size_t position = 0;
2322     while (position < length) {
2323         UChar32 character;
2324         U16_NEXT(ustr, position, length, character);
2325         if (U_IS_SURROGATE(character))
2326             return false;
2327     }
2328     return true;
2329 }
2330 
findPlainTextInternal(CharacterIterator & it,const String & target,FindOptions options,size_t & matchStart)2331 static size_t findPlainTextInternal(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart)
2332 {
2333     matchStart = 0;
2334     size_t matchLength = 0;
2335 
2336     if (!isValidUTF16(target))
2337         return 0;
2338 
2339     SearchBuffer buffer(target, options);
2340 
2341     if (buffer.needsMoreContext()) {
2342         RefPtrWillBeRawPtr<Range> beforeStartRange = it.ownerDocument()->createRange();
2343         beforeStartRange->setEnd(it.startContainer(), it.startOffset(), IGNORE_EXCEPTION);
2344         for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) {
2345             Vector<UChar, 1024> characters;
2346             backwardsIterator.prependTextTo(characters);
2347             buffer.prependContext(characters.data(), characters.size());
2348             if (!buffer.needsMoreContext())
2349                 break;
2350         }
2351     }
2352 
2353     while (!it.atEnd()) {
2354         it.appendTextTo(buffer);
2355         it.advance(buffer.numberOfCharactersJustAppended());
2356 tryAgain:
2357         size_t matchStartOffset;
2358         if (size_t newMatchLength = buffer.search(matchStartOffset)) {
2359             // Note that we found a match, and where we found it.
2360             size_t lastCharacterInBufferOffset = it.characterOffset();
2361             ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
2362             matchStart = lastCharacterInBufferOffset - matchStartOffset;
2363             matchLength = newMatchLength;
2364             // If searching forward, stop on the first match.
2365             // If searching backward, don't stop, so we end up with the last match.
2366             if (!(options & Backwards))
2367                 break;
2368             goto tryAgain;
2369         }
2370         if (it.atBreak() && !buffer.atBreak()) {
2371             buffer.reachedBreak();
2372             goto tryAgain;
2373         }
2374     }
2375 
2376     return matchLength;
2377 }
2378 
2379 static const TextIteratorBehaviorFlags iteratorFlagsForFindPlainText = TextIteratorEntersTextControls | TextIteratorEntersAuthorShadowRoots;
2380 
findPlainText(const Range * range,const String & target,FindOptions options)2381 PassRefPtrWillBeRawPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options)
2382 {
2383     // First, find the text.
2384     size_t matchStart;
2385     size_t matchLength;
2386     {
2387         CharacterIterator findIterator(range, iteratorFlagsForFindPlainText);
2388         matchLength = findPlainTextInternal(findIterator, target, options, matchStart);
2389         if (!matchLength)
2390             return collapsedToBoundary(range, !(options & Backwards));
2391     }
2392 
2393     // Then, find the document position of the start and the end of the text.
2394     CharacterIterator computeRangeIterator(range, iteratorFlagsForFindPlainText);
2395     Position resultStart;
2396     Position resultEnd;
2397     calculateCharacterSubrange(computeRangeIterator, matchStart, matchLength, resultStart, resultEnd);
2398     return Range::create(range->ownerDocument(), resultStart, resultEnd);
2399 }
2400 
findPlainText(const Position & inputStart,const Position & inputEnd,const String & target,FindOptions options,Position & resultStart,Position & resultEnd)2401 void findPlainText(const Position& inputStart, const Position& inputEnd, const String& target, FindOptions options, Position& resultStart, Position& resultEnd)
2402 {
2403     resultStart.clear();
2404     resultEnd.clear();
2405     // CharacterIterator requires renderers to be up-to-date.
2406     if (!inputStart.inDocument())
2407         return;
2408     ASSERT(inputStart.document() == inputEnd.document());
2409 
2410     // FIXME: Reduce the code duplication with above (but how?).
2411     size_t matchStart;
2412     size_t matchLength;
2413     {
2414         CharacterIterator findIterator(inputStart, inputEnd, iteratorFlagsForFindPlainText);
2415         matchLength = findPlainTextInternal(findIterator, target, options, matchStart);
2416         if (!matchLength) {
2417             const Position& collapseTo = options & Backwards ? inputStart : inputEnd;
2418             resultStart = collapseTo;
2419             resultEnd = collapseTo;
2420             return;
2421         }
2422     }
2423 
2424     CharacterIterator computeRangeIterator(inputStart, inputEnd, iteratorFlagsForFindPlainText);
2425     calculateCharacterSubrange(computeRangeIterator, matchStart, matchLength, resultStart, resultEnd);
2426 }
2427 
2428 }
2429