• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved.
3  * Copyright (C) 2005 Alexey Proskuryakov.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 #include "core/editing/TextIterator.h"
29 
30 #include "bindings/v8/ExceptionStatePlaceholder.h"
31 #include "core/HTMLNames.h"
32 #include "core/dom/Document.h"
33 #include "core/dom/NodeTraversal.h"
34 #include "core/dom/shadow/ShadowRoot.h"
35 #include "core/editing/VisiblePosition.h"
36 #include "core/editing/VisibleUnits.h"
37 #include "core/editing/htmlediting.h"
38 #include "core/html/HTMLElement.h"
39 #include "core/html/HTMLTextFormControlElement.h"
40 #include "core/rendering/InlineTextBox.h"
41 #include "core/rendering/RenderImage.h"
42 #include "core/rendering/RenderTableCell.h"
43 #include "core/rendering/RenderTableRow.h"
44 #include "core/rendering/RenderTextControl.h"
45 #include "core/rendering/RenderTextFragment.h"
46 #include "platform/fonts/Character.h"
47 #include "platform/fonts/Font.h"
48 #include "platform/text/TextBoundaries.h"
49 #include "platform/text/TextBreakIteratorInternalICU.h"
50 #include "platform/text/UnicodeUtilities.h"
51 #include "wtf/text/CString.h"
52 #include "wtf/text/StringBuilder.h"
53 #include "wtf/unicode/CharacterNames.h"
54 #include <unicode/usearch.h>
55 #include <unicode/utf16.h>
56 
57 using namespace WTF::Unicode;
58 
59 namespace WebCore {
60 
61 using namespace HTMLNames;
62 
63 // Buffer that knows how to compare with a search target.
64 // Keeps enough of the previous text to be able to search in the future, but no more.
65 // Non-breaking spaces are always equal to normal spaces.
66 // Case folding is also done if the CaseInsensitive option is specified.
67 // Matches are further filtered if the AtWordStarts option is specified, although some
68 // matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well.
69 class SearchBuffer {
70     WTF_MAKE_NONCOPYABLE(SearchBuffer);
71 public:
72     SearchBuffer(const String& target, FindOptions);
73     ~SearchBuffer();
74 
75     // Returns number of characters appended; guaranteed to be in the range [1, length].
76     template<typename CharType>
77     void append(const CharType*, size_t length);
numberOfCharactersJustAppended() const78     size_t numberOfCharactersJustAppended() const { return m_numberOfCharactersJustAppended; }
79 
80     bool needsMoreContext() const;
81     void prependContext(const UChar*, size_t length);
82     void reachedBreak();
83 
84     // Result is the size in characters of what was found.
85     // And <startOffset> is the number of characters back to the start of what was found.
86     size_t search(size_t& startOffset);
87     bool atBreak() const;
88 
89 private:
90     bool isBadMatch(const UChar*, size_t length) const;
91     bool isWordStartMatch(size_t start, size_t length) const;
92 
93     Vector<UChar> m_target;
94     FindOptions m_options;
95 
96     Vector<UChar> m_buffer;
97     size_t m_overlap;
98     size_t m_prefixLength;
99     size_t m_numberOfCharactersJustAppended;
100     bool m_atBreak;
101     bool m_needsMoreContext;
102 
103     bool m_targetRequiresKanaWorkaround;
104     Vector<UChar> m_normalizedTarget;
105     mutable Vector<UChar> m_normalizedMatch;
106 };
107 
108 // --------
109 
110 static const unsigned bitsInWord = sizeof(unsigned) * 8;
111 static const unsigned bitInWordMask = bitsInWord - 1;
112 
BitStack()113 BitStack::BitStack()
114     : m_size(0)
115 {
116 }
117 
~BitStack()118 BitStack::~BitStack()
119 {
120 }
121 
push(bool bit)122 void BitStack::push(bool bit)
123 {
124     unsigned index = m_size / bitsInWord;
125     unsigned shift = m_size & bitInWordMask;
126     if (!shift && index == m_words.size()) {
127         m_words.grow(index + 1);
128         m_words[index] = 0;
129     }
130     unsigned& word = m_words[index];
131     unsigned mask = 1U << shift;
132     if (bit)
133         word |= mask;
134     else
135         word &= ~mask;
136     ++m_size;
137 }
138 
pop()139 void BitStack::pop()
140 {
141     if (m_size)
142         --m_size;
143 }
144 
top() const145 bool BitStack::top() const
146 {
147     if (!m_size)
148         return false;
149     unsigned shift = (m_size - 1) & bitInWordMask;
150     return m_words.last() & (1U << shift);
151 }
152 
size() const153 unsigned BitStack::size() const
154 {
155     return m_size;
156 }
157 
158 // --------
159 
160 #if ASSERT_ENABLED
161 
depthCrossingShadowBoundaries(Node * node)162 static unsigned depthCrossingShadowBoundaries(Node* node)
163 {
164     unsigned depth = 0;
165     for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
166         ++depth;
167     return depth;
168 }
169 
170 #endif
171 
172 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees.
nextInPreOrderCrossingShadowBoundaries(Node * rangeEndContainer,int rangeEndOffset)173 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset)
174 {
175     if (!rangeEndContainer)
176         return 0;
177     if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) {
178         if (Node* next = rangeEndContainer->traverseToChildAt(rangeEndOffset))
179             return next;
180     }
181     for (Node* node = rangeEndContainer; node; node = node->parentOrShadowHostNode()) {
182         if (Node* next = node->nextSibling())
183             return next;
184     }
185     return 0;
186 }
187 
188 // --------
189 
fullyClipsContents(Node * node)190 static inline bool fullyClipsContents(Node* node)
191 {
192     RenderObject* renderer = node->renderer();
193     if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip())
194         return false;
195     return toRenderBox(renderer)->size().isEmpty();
196 }
197 
ignoresContainerClip(Node * node)198 static inline bool ignoresContainerClip(Node* node)
199 {
200     RenderObject* renderer = node->renderer();
201     if (!renderer || renderer->isText())
202         return false;
203     return renderer->style()->hasOutOfFlowPosition();
204 }
205 
pushFullyClippedState(BitStack & stack,Node * node)206 static void pushFullyClippedState(BitStack& stack, Node* node)
207 {
208     ASSERT(stack.size() == depthCrossingShadowBoundaries(node));
209 
210     // FIXME: m_fullyClippedStack was added in response to <https://bugs.webkit.org/show_bug.cgi?id=26364>
211     // ("Search can find text that's hidden by overflow:hidden"), but the logic here will not work correctly if
212     // a shadow tree redistributes nodes. m_fullyClippedStack relies on the assumption that DOM node hierarchy matches
213     // the render tree, which is not necessarily true if there happens to be shadow DOM distribution or other mechanics
214     // that shuffle around the render objects regardless of node tree hierarchy (like CSS flexbox).
215     //
216     // A more appropriate way to handle this situation is to detect overflow:hidden blocks by using only rendering
217     // primitives, not with DOM primitives.
218 
219     // Push true if this node full clips its contents, or if a parent already has fully
220     // clipped and this is not a node that ignores its container's clip.
221     stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node)));
222 }
223 
setUpFullyClippedStack(BitStack & stack,Node * node)224 static void setUpFullyClippedStack(BitStack& stack, Node* node)
225 {
226     // Put the nodes in a vector so we can iterate in reverse order.
227     Vector<Node*, 100> ancestry;
228     for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
229         ancestry.append(parent);
230 
231     // Call pushFullyClippedState on each node starting with the earliest ancestor.
232     size_t size = ancestry.size();
233     for (size_t i = 0; i < size; ++i)
234         pushFullyClippedState(stack, ancestry[size - i - 1]);
235     pushFullyClippedState(stack, node);
236 
237     ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node));
238 }
239 
240 // --------
241 
TextIterator(const Range * range,TextIteratorBehaviorFlags behavior)242 TextIterator::TextIterator(const Range* range, TextIteratorBehaviorFlags behavior)
243     : m_startContainer(nullptr)
244     , m_startOffset(0)
245     , m_endContainer(nullptr)
246     , m_endOffset(0)
247     , m_positionNode(nullptr)
248     , m_textLength(0)
249     , m_needsAnotherNewline(false)
250     , m_textBox(0)
251     , m_remainingTextBox(0)
252     , m_firstLetterText(0)
253     , m_lastTextNode(nullptr)
254     , m_lastTextNodeEndedWithCollapsedSpace(false)
255     , m_lastCharacter(0)
256     , m_sortedTextBoxesPosition(0)
257     , m_hasEmitted(false)
258     , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)
259     , m_entersTextControls(behavior & TextIteratorEntersTextControls)
260     , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText)
261     , m_handledFirstLetter(false)
262     , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility)
263     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
264     , m_shouldStop(false)
265     , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText)
266     , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots)
267     , m_emitsObjectReplacementCharacter(behavior & TextIteratorEmitsObjectReplacementCharacter)
268 {
269     if (range)
270         initialize(range->startPosition(), range->endPosition());
271 }
272 
TextIterator(const Position & start,const Position & end,TextIteratorBehaviorFlags behavior)273 TextIterator::TextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
274     : m_startContainer(nullptr)
275     , m_startOffset(0)
276     , m_endContainer(nullptr)
277     , m_endOffset(0)
278     , m_positionNode(nullptr)
279     , m_textLength(0)
280     , m_needsAnotherNewline(false)
281     , m_textBox(0)
282     , m_remainingTextBox(0)
283     , m_firstLetterText(0)
284     , m_lastTextNode(nullptr)
285     , m_lastTextNodeEndedWithCollapsedSpace(false)
286     , m_lastCharacter(0)
287     , m_sortedTextBoxesPosition(0)
288     , m_hasEmitted(false)
289     , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)
290     , m_entersTextControls(behavior & TextIteratorEntersTextControls)
291     , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText)
292     , m_handledFirstLetter(false)
293     , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility)
294     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
295     , m_shouldStop(false)
296     , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText)
297     , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots)
298     , m_emitsObjectReplacementCharacter(behavior & TextIteratorEmitsObjectReplacementCharacter)
299 {
300     initialize(start, end);
301 }
302 
initialize(const Position & start,const Position & end)303 void TextIterator::initialize(const Position& start, const Position& end)
304 {
305     ASSERT(comparePositions(start, end) <= 0);
306 
307     // Get and validate |start| and |end|.
308     Node* startContainer = start.containerNode();
309     if (!startContainer)
310         return;
311     int startOffset = start.computeOffsetInContainerNode();
312     Node* endContainer = end.containerNode();
313     if (!endContainer)
314         return;
315     int endOffset = end.computeOffsetInContainerNode();
316 
317     // Remember the range - this does not change.
318     m_startContainer = startContainer;
319     m_startOffset = startOffset;
320     m_endContainer = endContainer;
321     m_endOffset = endOffset;
322 
323     // Figure out the initial value of m_shadowDepth: the depth of startContainer's tree scope from
324     // the common ancestor tree scope.
325     const TreeScope* commonAncestorTreeScope = startContainer->treeScope().commonAncestorTreeScope(endContainer->treeScope());
326     ASSERT(commonAncestorTreeScope);
327     m_shadowDepth = 0;
328     for (const TreeScope* treeScope = &startContainer->treeScope(); treeScope != commonAncestorTreeScope; treeScope = treeScope->parentTreeScope())
329         ++m_shadowDepth;
330 
331     // Set up the current node for processing.
332     if (startContainer->offsetInCharacters())
333         m_node = startContainer;
334     else if (Node* child = startContainer->traverseToChildAt(startOffset))
335         m_node = child;
336     else if (!startOffset)
337         m_node = startContainer;
338     else
339         m_node = NodeTraversal::nextSkippingChildren(*startContainer);
340 
341     if (!m_node)
342         return;
343 
344     setUpFullyClippedStack(m_fullyClippedStack, m_node);
345     m_offset = m_node == m_startContainer ? m_startOffset : 0;
346     m_iterationProgress = HandledNone;
347 
348     // Calculate first out of bounds node.
349     m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset);
350 
351     // Identify the first run.
352     advance();
353 }
354 
~TextIterator()355 TextIterator::~TextIterator()
356 {
357 }
358 
advance()359 void TextIterator::advance()
360 {
361     if (m_shouldStop)
362         return;
363 
364     // reset the run information
365     m_positionNode = nullptr;
366     m_textLength = 0;
367 
368     // handle remembered node that needed a newline after the text node's newline
369     if (m_needsAnotherNewline) {
370         // Emit the extra newline, and position it *inside* m_node, after m_node's
371         // contents, in case it's a block, in the same way that we position the first
372         // newline. The range for the emitted newline should start where the line
373         // break begins.
374         // FIXME: It would be cleaner if we emitted two newlines during the last
375         // iteration, instead of using m_needsAnotherNewline.
376         Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node.get();
377         emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
378         m_needsAnotherNewline = false;
379         return;
380     }
381 
382     if (!m_textBox && m_remainingTextBox) {
383         m_textBox = m_remainingTextBox;
384         m_remainingTextBox = 0;
385         m_firstLetterText = 0;
386         m_offset = 0;
387     }
388     // handle remembered text box
389     if (m_textBox) {
390         handleTextBox();
391         if (m_positionNode)
392             return;
393     }
394 
395     while (m_node && (m_node != m_pastEndNode || m_shadowDepth > 0)) {
396         if (!m_shouldStop && m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node))
397             m_shouldStop = true;
398 
399         // if the range ends at offset 0 of an element, represent the
400         // position, but not the content, of that element e.g. if the
401         // node is a blockflow element, emit a newline that
402         // precedes the element
403         if (m_node == m_endContainer && !m_endOffset) {
404             representNodeOffsetZero();
405             m_node = nullptr;
406             return;
407         }
408 
409         RenderObject* renderer = m_node->renderer();
410         if (!renderer) {
411             if (m_node->isShadowRoot()) {
412                 // A shadow root doesn't have a renderer, but we want to visit children anyway.
413                 m_iterationProgress = m_iterationProgress < HandledNode ? HandledNode : m_iterationProgress;
414             } else {
415                 m_iterationProgress = HandledChildren;
416             }
417         } else {
418             // Enter author shadow roots, from youngest, if any and if necessary.
419             if (m_iterationProgress < HandledAuthorShadowRoots) {
420                 if (m_entersAuthorShadowRoots && m_node->isElementNode() && toElement(m_node)->hasAuthorShadowRoot()) {
421                     ShadowRoot* youngestShadowRoot = toElement(m_node)->shadowRoot();
422                     ASSERT(youngestShadowRoot->type() == ShadowRoot::AuthorShadowRoot);
423                     m_node = youngestShadowRoot;
424                     m_iterationProgress = HandledNone;
425                     ++m_shadowDepth;
426                     pushFullyClippedState(m_fullyClippedStack, m_node);
427                     continue;
428                 }
429 
430                 m_iterationProgress = HandledAuthorShadowRoots;
431             }
432 
433             // Enter user-agent shadow root, if necessary.
434             if (m_iterationProgress < HandledUserAgentShadowRoot) {
435                 if (m_entersTextControls && renderer->isTextControl()) {
436                     ShadowRoot* userAgentShadowRoot = toElement(m_node)->userAgentShadowRoot();
437                     ASSERT(userAgentShadowRoot->type() == ShadowRoot::UserAgentShadowRoot);
438                     m_node = userAgentShadowRoot;
439                     m_iterationProgress = HandledNone;
440                     ++m_shadowDepth;
441                     pushFullyClippedState(m_fullyClippedStack, m_node);
442                     continue;
443                 }
444                 m_iterationProgress = HandledUserAgentShadowRoot;
445             }
446 
447             // Handle the current node according to its type.
448             if (m_iterationProgress < HandledNode) {
449                 bool handledNode = false;
450                 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { // FIXME: What about CDATA_SECTION_NODE?
451                     handledNode = handleTextNode();
452                 } else if (renderer && (renderer->isImage() || renderer->isWidget()
453                     || (m_node && m_node->isElementNode()
454                     && (toElement(m_node)->isFormControlElement()
455                     || isHTMLLegendElement(toElement(*m_node))
456                     || isHTMLMeterElement(toElement(*m_node))
457                     || isHTMLProgressElement(toElement(*m_node)))))) {
458                     handledNode = handleReplacedElement();
459                 } else {
460                     handledNode = handleNonTextNode();
461                 }
462                 if (handledNode)
463                     m_iterationProgress = HandledNode;
464                 if (m_positionNode)
465                     return;
466             }
467         }
468 
469         // Find a new current node to handle in depth-first manner,
470         // calling exitNode() as we come back thru a parent node.
471         //
472         // 1. Iterate over child nodes, if we haven't done yet.
473         Node* next = m_iterationProgress < HandledChildren ? m_node->firstChild() : 0;
474         m_offset = 0;
475         if (!next) {
476             // 2. If we've already iterated children or they are not available, go to the next sibling node.
477             next = m_node->nextSibling();
478             if (!next) {
479                 // 3. If we are at the last child, go up the node tree until we find a next sibling.
480                 bool pastEnd = NodeTraversal::next(*m_node) == m_pastEndNode;
481                 Node* parentNode = m_node->parentNode();
482                 while (!next && parentNode) {
483                     if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode))
484                         return;
485                     bool haveRenderer = m_node->renderer();
486                     m_node = parentNode;
487                     m_fullyClippedStack.pop();
488                     parentNode = m_node->parentNode();
489                     if (haveRenderer)
490                         exitNode();
491                     if (m_positionNode) {
492                         m_iterationProgress = HandledChildren;
493                         return;
494                     }
495                     next = m_node->nextSibling();
496                 }
497 
498                 if (!next && !parentNode && m_shadowDepth > 0) {
499                     // 4. Reached the top of a shadow root. If it's created by author, then try to visit the next
500                     // sibling shadow root, if any.
501                     ShadowRoot* shadowRoot = toShadowRoot(m_node);
502                     if (shadowRoot->type() == ShadowRoot::AuthorShadowRoot) {
503                         ShadowRoot* nextShadowRoot = shadowRoot->olderShadowRoot();
504                         if (nextShadowRoot && nextShadowRoot->type() == ShadowRoot::AuthorShadowRoot) {
505                             m_fullyClippedStack.pop();
506                             m_node = nextShadowRoot;
507                             m_iterationProgress = HandledNone;
508                             // m_shadowDepth is unchanged since we exit from a shadow root and enter another.
509                             pushFullyClippedState(m_fullyClippedStack, m_node);
510                         } else {
511                             // We are the last shadow root; exit from here and go back to where we were.
512                             m_node = shadowRoot->host();
513                             m_iterationProgress = HandledAuthorShadowRoots;
514                             --m_shadowDepth;
515                             m_fullyClippedStack.pop();
516                         }
517                     } else {
518                         // If we are in a user-agent shadow root, then go back to the host.
519                         ASSERT(shadowRoot->type() == ShadowRoot::UserAgentShadowRoot);
520                         m_node = shadowRoot->host();
521                         m_iterationProgress = HandledUserAgentShadowRoot;
522                         --m_shadowDepth;
523                         m_fullyClippedStack.pop();
524                     }
525                     m_handledFirstLetter = false;
526                     m_firstLetterText = 0;
527                     continue;
528                 }
529             }
530             m_fullyClippedStack.pop();
531         }
532 
533         // set the new current node
534         m_node = next;
535         if (m_node)
536             pushFullyClippedState(m_fullyClippedStack, m_node);
537         m_iterationProgress = HandledNone;
538         m_handledFirstLetter = false;
539         m_firstLetterText = 0;
540 
541         // how would this ever be?
542         if (m_positionNode)
543             return;
544     }
545 }
546 
characterAt(unsigned index) const547 UChar TextIterator::characterAt(unsigned index) const
548 {
549     ASSERT_WITH_SECURITY_IMPLICATION(index < static_cast<unsigned>(length()));
550     if (!(index < static_cast<unsigned>(length())))
551         return 0;
552 
553     if (m_singleCharacterBuffer) {
554         ASSERT(!index);
555         ASSERT(length() == 1);
556         return m_singleCharacterBuffer;
557     }
558 
559     return string()[startOffset() + index];
560 }
561 
substring(unsigned position,unsigned length) const562 String TextIterator::substring(unsigned position, unsigned length) const
563 {
564     ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(this->length()));
565     ASSERT_WITH_SECURITY_IMPLICATION(position + length <= static_cast<unsigned>(this->length()));
566     if (!length)
567         return emptyString();
568     if (m_singleCharacterBuffer) {
569         ASSERT(!position);
570         ASSERT(length == 1);
571         return String(&m_singleCharacterBuffer, 1);
572     }
573     return string().substring(startOffset() + position, length);
574 }
575 
appendTextToStringBuilder(StringBuilder & builder,unsigned position,unsigned maxLength) const576 void TextIterator::appendTextToStringBuilder(StringBuilder& builder, unsigned position, unsigned maxLength) const
577 {
578     unsigned lengthToAppend = std::min(static_cast<unsigned>(length()) - position, maxLength);
579     if (!lengthToAppend)
580         return;
581     if (m_singleCharacterBuffer) {
582         ASSERT(!position);
583         builder.append(m_singleCharacterBuffer);
584     } else {
585         builder.append(string(), startOffset() + position, lengthToAppend);
586     }
587 }
588 
handleTextNode()589 bool TextIterator::handleTextNode()
590 {
591     if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility)
592         return false;
593 
594     RenderText* renderer = toRenderText(m_node->renderer());
595 
596     m_lastTextNode = m_node;
597     String str = renderer->text();
598 
599     // handle pre-formatted text
600     if (!renderer->style()->collapseWhiteSpace()) {
601         int runStart = m_offset;
602         if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) {
603             emitCharacter(' ', m_node, 0, runStart, runStart);
604             return false;
605         }
606         if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset) {
607             handleTextNodeFirstLetter(toRenderTextFragment(renderer));
608             if (m_firstLetterText) {
609                 String firstLetter = m_firstLetterText->text();
610                 emitText(m_node, m_firstLetterText, m_offset, m_offset + firstLetter.length());
611                 m_firstLetterText = 0;
612                 m_textBox = 0;
613                 return false;
614             }
615         }
616         if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
617             return false;
618         int strLength = str.length();
619         int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX;
620         int runEnd = std::min(strLength, end);
621 
622         if (runStart >= runEnd)
623             return true;
624 
625         emitText(m_node, runStart, runEnd);
626         return true;
627     }
628 
629     if (renderer->firstTextBox())
630         m_textBox = renderer->firstTextBox();
631 
632     bool shouldHandleFirstLetter = !m_handledFirstLetter && renderer->isTextFragment() && !m_offset;
633     if (shouldHandleFirstLetter)
634         handleTextNodeFirstLetter(toRenderTextFragment(renderer));
635 
636     if (!renderer->firstTextBox() && str.length() > 0 && !shouldHandleFirstLetter) {
637         if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
638             return false;
639         m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space
640         return true;
641     }
642 
643     if (m_firstLetterText)
644         renderer = m_firstLetterText;
645 
646     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
647     if (renderer->containsReversedText()) {
648         m_sortedTextBoxes.clear();
649         for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) {
650             m_sortedTextBoxes.append(textBox);
651         }
652         std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart);
653         m_sortedTextBoxesPosition = 0;
654         m_textBox = m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0];
655     }
656 
657     handleTextBox();
658     return true;
659 }
660 
handleTextBox()661 void TextIterator::handleTextBox()
662 {
663     RenderText* renderer = m_firstLetterText ? m_firstLetterText : toRenderText(m_node->renderer());
664     if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) {
665         m_textBox = 0;
666         return;
667     }
668     String str = renderer->text();
669     unsigned start = m_offset;
670     unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : INT_MAX;
671     while (m_textBox) {
672         unsigned textBoxStart = m_textBox->start();
673         unsigned runStart = std::max(textBoxStart, start);
674 
675         // Check for collapsed space at the start of this run.
676         InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox();
677         bool needSpace = m_lastTextNodeEndedWithCollapsedSpace
678             || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0);
679         if (needSpace && !renderer->style()->isCollapsibleWhiteSpace(m_lastCharacter) && m_lastCharacter) {
680             if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') {
681                 unsigned spaceRunStart = runStart - 1;
682                 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ')
683                     --spaceRunStart;
684                 emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1);
685             } else {
686                 emitCharacter(' ', m_node, 0, runStart, runStart);
687             }
688             return;
689         }
690         unsigned textBoxEnd = textBoxStart + m_textBox->len();
691         unsigned runEnd = std::min(textBoxEnd, end);
692 
693         // Determine what the next text box will be, but don't advance yet
694         InlineTextBox* nextTextBox = 0;
695         if (renderer->containsReversedText()) {
696             if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size())
697                 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1];
698         } else {
699             nextTextBox = m_textBox->nextTextBox();
700         }
701         ASSERT(!nextTextBox || nextTextBox->renderer() == renderer);
702 
703         if (runStart < runEnd) {
704             // Handle either a single newline character (which becomes a space),
705             // or a run of characters that does not include a newline.
706             // This effectively translates newlines to spaces without copying the text.
707             if (str[runStart] == '\n') {
708                 emitCharacter(' ', m_node, 0, runStart, runStart + 1);
709                 m_offset = runStart + 1;
710             } else {
711                 size_t subrunEnd = str.find('\n', runStart);
712                 if (subrunEnd == kNotFound || subrunEnd > runEnd)
713                     subrunEnd = runEnd;
714 
715                 m_offset = subrunEnd;
716                 emitText(m_node, renderer, runStart, subrunEnd);
717             }
718 
719             // If we are doing a subrun that doesn't go to the end of the text box,
720             // come back again to finish handling this text box; don't advance to the next one.
721             if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd)
722                 return;
723 
724             // Advance and return
725             unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length();
726             if (nextRunStart > runEnd)
727                 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end
728             m_textBox = nextTextBox;
729             if (renderer->containsReversedText())
730                 ++m_sortedTextBoxesPosition;
731             return;
732         }
733         // Advance and continue
734         m_textBox = nextTextBox;
735         if (renderer->containsReversedText())
736             ++m_sortedTextBoxesPosition;
737     }
738     if (!m_textBox && m_remainingTextBox) {
739         m_textBox = m_remainingTextBox;
740         m_remainingTextBox = 0;
741         m_firstLetterText = 0;
742         m_offset = 0;
743         handleTextBox();
744     }
745 }
746 
firstRenderTextInFirstLetter(RenderBoxModelObject * firstLetter)747 static inline RenderText* firstRenderTextInFirstLetter(RenderBoxModelObject* firstLetter)
748 {
749     if (!firstLetter)
750         return 0;
751 
752     // FIXME: Should this check descendent objects?
753     for (RenderObject* current = firstLetter->slowFirstChild(); current; current = current->nextSibling()) {
754         if (current->isText())
755             return toRenderText(current);
756     }
757     return 0;
758 }
759 
handleTextNodeFirstLetter(RenderTextFragment * renderer)760 void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer)
761 {
762     if (renderer->firstLetter()) {
763         RenderBoxModelObject* r = renderer->firstLetter();
764         if (r->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
765             return;
766         if (RenderText* firstLetter = firstRenderTextInFirstLetter(r)) {
767             m_handledFirstLetter = true;
768             m_remainingTextBox = m_textBox;
769             m_textBox = firstLetter->firstTextBox();
770             m_sortedTextBoxes.clear();
771             m_firstLetterText = firstLetter;
772         }
773     }
774     m_handledFirstLetter = true;
775 }
776 
handleReplacedElement()777 bool TextIterator::handleReplacedElement()
778 {
779     if (m_fullyClippedStack.top())
780         return false;
781 
782     RenderObject* renderer = m_node->renderer();
783     if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
784         return false;
785 
786     if (m_emitsObjectReplacementCharacter) {
787         emitCharacter(objectReplacementCharacter, m_node->parentNode(), m_node, 0, 1);
788         return true;
789     }
790 
791     if (m_lastTextNodeEndedWithCollapsedSpace) {
792         emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1);
793         return false;
794     }
795 
796     if (m_entersTextControls && renderer->isTextControl()) {
797         // The shadow tree should be already visited.
798         return true;
799     }
800 
801     m_hasEmitted = true;
802 
803     if (m_emitsCharactersBetweenAllVisiblePositions) {
804         // We want replaced elements to behave like punctuation for boundary
805         // finding, and to simply take up space for the selection preservation
806         // code in moveParagraphs, so we use a comma.
807         emitCharacter(',', m_node->parentNode(), m_node, 0, 1);
808         return true;
809     }
810 
811     m_positionNode = m_node->parentNode();
812     m_positionOffsetBaseNode = m_node;
813     m_positionStartOffset = 0;
814     m_positionEndOffset = 1;
815     m_singleCharacterBuffer = 0;
816 
817     if (m_emitsImageAltText && renderer->isImage() && renderer->isRenderImage()) {
818         m_text = toRenderImage(renderer)->altText();
819         if (!m_text.isEmpty()) {
820             m_textLength = m_text.length();
821             m_lastCharacter = m_text[m_textLength - 1];
822             return true;
823         }
824     }
825 
826     m_textLength = 0;
827     m_lastCharacter = 0;
828 
829     return true;
830 }
831 
hasVisibleTextNode(RenderText * renderer)832 bool TextIterator::hasVisibleTextNode(RenderText* renderer)
833 {
834     if (renderer->style()->visibility() == VISIBLE)
835         return true;
836     if (renderer->isTextFragment()) {
837         RenderTextFragment* fragment = toRenderTextFragment(renderer);
838         if (fragment->firstLetter() && fragment->firstLetter()->style()->visibility() == VISIBLE)
839             return true;
840     }
841     return false;
842 }
843 
shouldEmitTabBeforeNode(Node * node)844 static bool shouldEmitTabBeforeNode(Node* node)
845 {
846     RenderObject* r = node->renderer();
847 
848     // Table cells are delimited by tabs.
849     if (!r || !isTableCell(node))
850         return false;
851 
852     // Want a tab before every cell other than the first one
853     RenderTableCell* rc = toRenderTableCell(r);
854     RenderTable* t = rc->table();
855     return t && (t->cellBefore(rc) || t->cellAbove(rc));
856 }
857 
shouldEmitNewlineForNode(Node * node,bool emitsOriginalText)858 static bool shouldEmitNewlineForNode(Node* node, bool emitsOriginalText)
859 {
860     RenderObject* renderer = node->renderer();
861 
862     if (renderer ? !renderer->isBR() : !isHTMLBRElement(node))
863         return false;
864     return emitsOriginalText || !(node->isInShadowTree() && isHTMLInputElement(*node->shadowHost()));
865 }
866 
shouldEmitNewlinesBeforeAndAfterNode(Node & node)867 static bool shouldEmitNewlinesBeforeAndAfterNode(Node& node)
868 {
869     // Block flow (versus inline flow) is represented by having
870     // a newline both before and after the element.
871     RenderObject* r = node.renderer();
872     if (!r) {
873         return (node.hasTagName(blockquoteTag)
874             || node.hasTagName(ddTag)
875             || node.hasTagName(divTag)
876             || node.hasTagName(dlTag)
877             || node.hasTagName(dtTag)
878             || node.hasTagName(h1Tag)
879             || node.hasTagName(h2Tag)
880             || node.hasTagName(h3Tag)
881             || node.hasTagName(h4Tag)
882             || node.hasTagName(h5Tag)
883             || node.hasTagName(h6Tag)
884             || node.hasTagName(hrTag)
885             || node.hasTagName(liTag)
886             || node.hasTagName(listingTag)
887             || node.hasTagName(olTag)
888             || node.hasTagName(pTag)
889             || node.hasTagName(preTag)
890             || node.hasTagName(trTag)
891             || node.hasTagName(ulTag));
892     }
893 
894     // Need to make an exception for table cells, because they are blocks, but we
895     // want them tab-delimited rather than having newlines before and after.
896     if (isTableCell(&node))
897         return false;
898 
899     // Need to make an exception for table row elements, because they are neither
900     // "inline" or "RenderBlock", but we want newlines for them.
901     if (r->isTableRow()) {
902         RenderTable* t = toRenderTableRow(r)->table();
903         if (t && !t->isInline())
904             return true;
905     }
906 
907     return !r->isInline() && r->isRenderBlock()
908         && !r->isFloatingOrOutOfFlowPositioned() && !r->isBody() && !r->isRubyText();
909 }
910 
shouldEmitNewlineAfterNode(Node & node)911 static bool shouldEmitNewlineAfterNode(Node& node)
912 {
913     // FIXME: It should be better but slower to create a VisiblePosition here.
914     if (!shouldEmitNewlinesBeforeAndAfterNode(node))
915         return false;
916     // Check if this is the very last renderer in the document.
917     // If so, then we should not emit a newline.
918     Node* next = &node;
919     while ((next = NodeTraversal::nextSkippingChildren(*next))) {
920         if (next->renderer())
921             return true;
922     }
923     return false;
924 }
925 
shouldEmitNewlineBeforeNode(Node & node)926 static bool shouldEmitNewlineBeforeNode(Node& node)
927 {
928     return shouldEmitNewlinesBeforeAndAfterNode(node);
929 }
930 
shouldEmitExtraNewlineForNode(Node * node)931 static bool shouldEmitExtraNewlineForNode(Node* node)
932 {
933     // When there is a significant collapsed bottom margin, emit an extra
934     // newline for a more realistic result. We end up getting the right
935     // result even without margin collapsing. For example: <div><p>text</p></div>
936     // will work right even if both the <div> and the <p> have bottom margins.
937     RenderObject* r = node->renderer();
938     if (!r || !r->isBox())
939         return false;
940 
941     // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears
942     // not to do this at all
943     if (node->hasTagName(h1Tag)
944         || node->hasTagName(h2Tag)
945         || node->hasTagName(h3Tag)
946         || node->hasTagName(h4Tag)
947         || node->hasTagName(h5Tag)
948         || node->hasTagName(h6Tag)
949         || node->hasTagName(pTag)) {
950         RenderStyle* style = r->style();
951         if (style) {
952             int bottomMargin = toRenderBox(r)->collapsedMarginAfter();
953             int fontSize = style->fontDescription().computedPixelSize();
954             if (bottomMargin * 2 >= fontSize)
955                 return true;
956         }
957     }
958 
959     return false;
960 }
961 
collapsedSpaceLength(RenderText * renderer,int textEnd)962 static int collapsedSpaceLength(RenderText* renderer, int textEnd)
963 {
964     const String& text = renderer->text();
965     int length = text.length();
966     for (int i = textEnd; i < length; ++i) {
967         if (!renderer->style()->isCollapsibleWhiteSpace(text[i]))
968             return i - textEnd;
969     }
970 
971     return length - textEnd;
972 }
973 
maxOffsetIncludingCollapsedSpaces(Node * node)974 static int maxOffsetIncludingCollapsedSpaces(Node* node)
975 {
976     int offset = caretMaxOffset(node);
977 
978     if (node->renderer() && node->renderer()->isText())
979         offset += collapsedSpaceLength(toRenderText(node->renderer()), offset);
980 
981     return offset;
982 }
983 
984 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic).
shouldRepresentNodeOffsetZero()985 bool TextIterator::shouldRepresentNodeOffsetZero()
986 {
987     if (m_emitsCharactersBetweenAllVisiblePositions && isRenderedTable(m_node))
988         return true;
989 
990     // Leave element positioned flush with start of a paragraph
991     // (e.g. do not insert tab before a table cell at the start of a paragraph)
992     if (m_lastCharacter == '\n')
993         return false;
994 
995     // Otherwise, show the position if we have emitted any characters
996     if (m_hasEmitted)
997         return true;
998 
999     // We've not emitted anything yet. Generally, there is no need for any positioning then.
1000     // The only exception is when the element is visually not in the same line as
1001     // the start of the range (e.g. the range starts at the end of the previous paragraph).
1002     // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we
1003     // make quicker checks to possibly avoid that. Another check that we could make is
1004     // is whether the inline vs block flow changed since the previous visible element.
1005     // I think we're already in a special enough case that that won't be needed, tho.
1006 
1007     // No character needed if this is the first node in the range.
1008     if (m_node == m_startContainer)
1009         return false;
1010 
1011     // If we are outside the start container's subtree, assume we need to emit.
1012     // FIXME: m_startContainer could be an inline block
1013     if (!m_node->isDescendantOf(m_startContainer))
1014         return true;
1015 
1016     // If we started as m_startContainer offset 0 and the current node is a descendant of
1017     // the start container, we already had enough context to correctly decide whether to
1018     // emit after a preceding block. We chose not to emit (m_hasEmitted is false),
1019     // so don't second guess that now.
1020     // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably
1021     // immaterial since we likely would have already emitted something by now.
1022     if (!m_startOffset)
1023         return false;
1024 
1025     // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning.
1026     // Additionally, if the range we are iterating over contains huge sections of unrendered content,
1027     // we would create VisiblePositions on every call to this function without this check.
1028     if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE
1029         || (m_node->renderer()->isRenderBlockFlow() && !toRenderBlock(m_node->renderer())->height() && !isHTMLBodyElement(*m_node)))
1030         return false;
1031 
1032     // The startPos.isNotNull() check is needed because the start could be before the body,
1033     // and in that case we'll get null. We don't want to put in newlines at the start in that case.
1034     // The currPos.isNotNull() check is needed because positions in non-HTML content
1035     // (like SVG) do not have visible positions, and we don't want to emit for them either.
1036     VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM);
1037     VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM);
1038     return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos);
1039 }
1040 
shouldEmitSpaceBeforeAndAfterNode(Node * node)1041 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node)
1042 {
1043     return isRenderedTable(node) && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions);
1044 }
1045 
representNodeOffsetZero()1046 void TextIterator::representNodeOffsetZero()
1047 {
1048     // Emit a character to show the positioning of m_node.
1049 
1050     // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can
1051     // create VisiblePositions, which is expensive. So, we perform the inexpensive checks
1052     // on m_node to see if it necessitates emitting a character first and will early return
1053     // before encountering shouldRepresentNodeOffsetZero()s worse case behavior.
1054     if (shouldEmitTabBeforeNode(m_node)) {
1055         if (shouldRepresentNodeOffsetZero())
1056             emitCharacter('\t', m_node->parentNode(), m_node, 0, 0);
1057     } else if (shouldEmitNewlineBeforeNode(*m_node)) {
1058         if (shouldRepresentNodeOffsetZero())
1059             emitCharacter('\n', m_node->parentNode(), m_node, 0, 0);
1060     } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) {
1061         if (shouldRepresentNodeOffsetZero())
1062             emitCharacter(' ', m_node->parentNode(), m_node, 0, 0);
1063     }
1064 }
1065 
handleNonTextNode()1066 bool TextIterator::handleNonTextNode()
1067 {
1068     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText))
1069         emitCharacter('\n', m_node->parentNode(), m_node, 0, 1);
1070     else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR())
1071         emitCharacter(' ', m_node->parentNode(), m_node, 0, 1);
1072     else
1073         representNodeOffsetZero();
1074 
1075     return true;
1076 }
1077 
exitNode()1078 void TextIterator::exitNode()
1079 {
1080     // prevent emitting a newline when exiting a collapsed block at beginning of the range
1081     // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could
1082     // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and
1083     // therefore look like a blank line.
1084     if (!m_hasEmitted)
1085         return;
1086 
1087     // Emit with a position *inside* m_node, after m_node's contents, in
1088     // case it is a block, because the run should start where the
1089     // emitted character is positioned visually.
1090     Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node.get();
1091     // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making
1092     // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use
1093     // TextIterator in _web_attributedStringFromRange.
1094     // See <rdar://problem/5428427> for an example of how this mismatch will cause problems.
1095     if (m_lastTextNode && shouldEmitNewlineAfterNode(*m_node)) {
1096         // use extra newline to represent margin bottom, as needed
1097         bool addNewline = shouldEmitExtraNewlineForNode(m_node);
1098 
1099         // FIXME: We need to emit a '\n' as we leave an empty block(s) that
1100         // contain a VisiblePosition when doing selection preservation.
1101         if (m_lastCharacter != '\n') {
1102             // insert a newline with a position following this block's contents.
1103             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
1104             // remember whether to later add a newline for the current node
1105             ASSERT(!m_needsAnotherNewline);
1106             m_needsAnotherNewline = addNewline;
1107         } else if (addNewline) {
1108             // insert a newline with a position following this block's contents.
1109             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
1110         }
1111     }
1112 
1113     // If nothing was emitted, see if we need to emit a space.
1114     if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node))
1115         emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1);
1116 }
1117 
emitCharacter(UChar c,Node * textNode,Node * offsetBaseNode,int textStartOffset,int textEndOffset)1118 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset)
1119 {
1120     m_hasEmitted = true;
1121 
1122     // remember information with which to construct the TextIterator::range()
1123     // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode
1124     m_positionNode = textNode;
1125     m_positionOffsetBaseNode = offsetBaseNode;
1126     m_positionStartOffset = textStartOffset;
1127     m_positionEndOffset = textEndOffset;
1128 
1129     // remember information with which to construct the TextIterator::characters() and length()
1130     m_singleCharacterBuffer = c;
1131     ASSERT(m_singleCharacterBuffer);
1132     m_textLength = 1;
1133 
1134     // remember some iteration state
1135     m_lastTextNodeEndedWithCollapsedSpace = false;
1136     m_lastCharacter = c;
1137 }
1138 
emitText(Node * textNode,RenderObject * renderObject,int textStartOffset,int textEndOffset)1139 void TextIterator::emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset)
1140 {
1141     RenderText* renderer = toRenderText(renderObject);
1142     m_text = m_emitsOriginalText ? renderer->originalText() : renderer->text();
1143     ASSERT(!m_text.isEmpty());
1144     ASSERT(0 <= textStartOffset && textStartOffset < static_cast<int>(m_text.length()));
1145     ASSERT(0 <= textEndOffset && textEndOffset <= static_cast<int>(m_text.length()));
1146     ASSERT(textStartOffset <= textEndOffset);
1147 
1148     m_positionNode = textNode;
1149     m_positionOffsetBaseNode = nullptr;
1150     m_positionStartOffset = textStartOffset;
1151     m_positionEndOffset = textEndOffset;
1152     m_singleCharacterBuffer = 0;
1153     m_textLength = textEndOffset - textStartOffset;
1154     m_lastCharacter = m_text[textEndOffset - 1];
1155 
1156     m_lastTextNodeEndedWithCollapsedSpace = false;
1157     m_hasEmitted = true;
1158 }
1159 
emitText(Node * textNode,int textStartOffset,int textEndOffset)1160 void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset)
1161 {
1162     emitText(textNode, m_node->renderer(), textStartOffset, textEndOffset);
1163 }
1164 
range() const1165 PassRefPtrWillBeRawPtr<Range> TextIterator::range() const
1166 {
1167     // use the current run information, if we have it
1168     if (m_positionNode) {
1169         if (m_positionOffsetBaseNode) {
1170             int index = m_positionOffsetBaseNode->nodeIndex();
1171             m_positionStartOffset += index;
1172             m_positionEndOffset += index;
1173             m_positionOffsetBaseNode = nullptr;
1174         }
1175         return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
1176     }
1177 
1178     // otherwise, return the end of the overall range we were given
1179     if (m_endContainer)
1180         return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset);
1181 
1182     return nullptr;
1183 }
1184 
node() const1185 Node* TextIterator::node() const
1186 {
1187     RefPtrWillBeRawPtr<Range> textRange = range();
1188     if (!textRange)
1189         return 0;
1190 
1191     Node* node = textRange->startContainer();
1192     if (!node)
1193         return 0;
1194     if (node->offsetInCharacters())
1195         return node;
1196 
1197     return node->traverseToChildAt(textRange->startOffset());
1198 }
1199 
1200 // --------
1201 
SimplifiedBackwardsTextIterator(const Range * r,TextIteratorBehaviorFlags behavior)1202 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehaviorFlags behavior)
1203     : m_node(nullptr)
1204     , m_offset(0)
1205     , m_handledNode(false)
1206     , m_handledChildren(false)
1207     , m_startNode(nullptr)
1208     , m_startOffset(0)
1209     , m_endNode(nullptr)
1210     , m_endOffset(0)
1211     , m_positionNode(nullptr)
1212     , m_positionStartOffset(0)
1213     , m_positionEndOffset(0)
1214     , m_textOffset(0)
1215     , m_textLength(0)
1216     , m_lastTextNode(nullptr)
1217     , m_lastCharacter(0)
1218     , m_singleCharacterBuffer(0)
1219     , m_havePassedStartNode(false)
1220     , m_shouldHandleFirstLetter(false)
1221     , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
1222     , m_shouldStop(false)
1223     , m_emitsOriginalText(false)
1224 {
1225     ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls);
1226 
1227     if (!r)
1228         return;
1229 
1230     Node* startNode = r->startContainer();
1231     if (!startNode)
1232         return;
1233     Node* endNode = r->endContainer();
1234     int startOffset = r->startOffset();
1235     int endOffset = r->endOffset();
1236 
1237     if (!startNode->offsetInCharacters() && startOffset >= 0) {
1238         // traverseToChildAt() will return 0 if the offset is out of range. We rely on this behavior
1239         // instead of calling countChildren() to avoid traversing the children twice.
1240         if (Node* childAtOffset = startNode->traverseToChildAt(startOffset)) {
1241             startNode = childAtOffset;
1242             startOffset = 0;
1243         }
1244     }
1245     if (!endNode->offsetInCharacters() && endOffset > 0) {
1246         // traverseToChildAt() will return 0 if the offset is out of range. We rely on this behavior
1247         // instead of calling countChildren() to avoid traversing the children twice.
1248         if (Node* childAtOffset = endNode->traverseToChildAt(endOffset - 1)) {
1249             endNode = childAtOffset;
1250             endOffset = lastOffsetInNode(endNode);
1251         }
1252     }
1253 
1254     m_node = endNode;
1255     setUpFullyClippedStack(m_fullyClippedStack, m_node);
1256     m_offset = endOffset;
1257     m_handledNode = false;
1258     m_handledChildren = !endOffset;
1259 
1260     m_startNode = startNode;
1261     m_startOffset = startOffset;
1262     m_endNode = endNode;
1263     m_endOffset = endOffset;
1264 
1265 #ifndef NDEBUG
1266     // Need this just because of the assert.
1267     m_positionNode = endNode;
1268 #endif
1269 
1270     m_lastTextNode = nullptr;
1271     m_lastCharacter = '\n';
1272 
1273     m_havePassedStartNode = false;
1274 
1275     advance();
1276 }
1277 
advance()1278 void SimplifiedBackwardsTextIterator::advance()
1279 {
1280     ASSERT(m_positionNode);
1281 
1282     if (m_shouldStop)
1283         return;
1284 
1285     if (m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) {
1286         m_shouldStop = true;
1287         return;
1288     }
1289 
1290     m_positionNode = nullptr;
1291     m_textLength = 0;
1292 
1293     while (m_node && !m_havePassedStartNode) {
1294         // Don't handle node if we start iterating at [node, 0].
1295         if (!m_handledNode && !(m_node == m_endNode && !m_endOffset)) {
1296             RenderObject* renderer = m_node->renderer();
1297             if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) {
1298                 // FIXME: What about CDATA_SECTION_NODE?
1299                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
1300                     m_handledNode = handleTextNode();
1301             } else if (renderer && (renderer->isImage() || renderer->isWidget())) {
1302                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
1303                     m_handledNode = handleReplacedElement();
1304             } else {
1305                 m_handledNode = handleNonTextNode();
1306             }
1307             if (m_positionNode)
1308                 return;
1309         }
1310 
1311         if (!m_handledChildren && m_node->hasChildren()) {
1312             m_node = m_node->lastChild();
1313             pushFullyClippedState(m_fullyClippedStack, m_node);
1314         } else {
1315             // Exit empty containers as we pass over them or containers
1316             // where [container, 0] is where we started iterating.
1317             if (!m_handledNode
1318                 && canHaveChildrenForEditing(m_node)
1319                 && m_node->parentNode()
1320                 && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) {
1321                 exitNode();
1322                 if (m_positionNode) {
1323                     m_handledNode = true;
1324                     m_handledChildren = true;
1325                     return;
1326                 }
1327             }
1328 
1329             // Exit all other containers.
1330             while (!m_node->previousSibling()) {
1331                 if (!advanceRespectingRange(m_node->parentOrShadowHostNode()))
1332                     break;
1333                 m_fullyClippedStack.pop();
1334                 exitNode();
1335                 if (m_positionNode) {
1336                     m_handledNode = true;
1337                     m_handledChildren = true;
1338                     return;
1339                 }
1340             }
1341 
1342             m_fullyClippedStack.pop();
1343             if (advanceRespectingRange(m_node->previousSibling()))
1344                 pushFullyClippedState(m_fullyClippedStack, m_node);
1345             else
1346                 m_node = nullptr;
1347         }
1348 
1349         // For the purpose of word boundary detection,
1350         // we should iterate all visible text and trailing (collapsed) whitespaces.
1351         m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0;
1352         m_handledNode = false;
1353         m_handledChildren = false;
1354 
1355         if (m_positionNode)
1356             return;
1357     }
1358 }
1359 
handleTextNode()1360 bool SimplifiedBackwardsTextIterator::handleTextNode()
1361 {
1362     m_lastTextNode = m_node;
1363 
1364     int startOffset;
1365     int offsetInNode;
1366     RenderText* renderer = handleFirstLetter(startOffset, offsetInNode);
1367     if (!renderer)
1368         return true;
1369 
1370     String text = renderer->text();
1371     if (!renderer->firstTextBox() && text.length() > 0)
1372         return true;
1373 
1374     m_positionEndOffset = m_offset;
1375     m_offset = startOffset + offsetInNode;
1376     m_positionNode = m_node;
1377     m_positionStartOffset = m_offset;
1378 
1379     ASSERT(0 <= m_positionStartOffset - offsetInNode && m_positionStartOffset - offsetInNode <= static_cast<int>(text.length()));
1380     ASSERT(1 <= m_positionEndOffset - offsetInNode && m_positionEndOffset - offsetInNode <= static_cast<int>(text.length()));
1381     ASSERT(m_positionStartOffset <= m_positionEndOffset);
1382 
1383     m_textLength = m_positionEndOffset - m_positionStartOffset;
1384     m_textOffset = m_positionStartOffset - offsetInNode;
1385     m_textContainer = text;
1386     m_singleCharacterBuffer = 0;
1387     RELEASE_ASSERT(static_cast<unsigned>(m_textOffset + m_textLength) <= text.length());
1388 
1389     m_lastCharacter = text[m_positionEndOffset - 1];
1390 
1391     return !m_shouldHandleFirstLetter;
1392 }
1393 
handleFirstLetter(int & startOffset,int & offsetInNode)1394 RenderText* SimplifiedBackwardsTextIterator::handleFirstLetter(int& startOffset, int& offsetInNode)
1395 {
1396     RenderText* renderer = toRenderText(m_node->renderer());
1397     startOffset = (m_node == m_startNode) ? m_startOffset : 0;
1398 
1399     if (!renderer->isTextFragment()) {
1400         offsetInNode = 0;
1401         return renderer;
1402     }
1403 
1404     RenderTextFragment* fragment = toRenderTextFragment(renderer);
1405     int offsetAfterFirstLetter = fragment->start();
1406     if (startOffset >= offsetAfterFirstLetter) {
1407         ASSERT(!m_shouldHandleFirstLetter);
1408         offsetInNode = offsetAfterFirstLetter;
1409         return renderer;
1410     }
1411 
1412     if (!m_shouldHandleFirstLetter && offsetAfterFirstLetter < m_offset) {
1413         m_shouldHandleFirstLetter = true;
1414         offsetInNode = offsetAfterFirstLetter;
1415         return renderer;
1416     }
1417 
1418     m_shouldHandleFirstLetter = false;
1419     offsetInNode = 0;
1420     RenderText* firstLetterRenderer = firstRenderTextInFirstLetter(fragment->firstLetter());
1421 
1422     m_offset = firstLetterRenderer->caretMaxOffset();
1423     m_offset += collapsedSpaceLength(firstLetterRenderer, m_offset);
1424 
1425     return firstLetterRenderer;
1426 }
1427 
handleReplacedElement()1428 bool SimplifiedBackwardsTextIterator::handleReplacedElement()
1429 {
1430     unsigned index = m_node->nodeIndex();
1431     // We want replaced elements to behave like punctuation for boundary
1432     // finding, and to simply take up space for the selection preservation
1433     // code in moveParagraphs, so we use a comma. Unconditionally emit
1434     // here because this iterator is only used for boundary finding.
1435     emitCharacter(',', m_node->parentNode(), index, index + 1);
1436     return true;
1437 }
1438 
handleNonTextNode()1439 bool SimplifiedBackwardsTextIterator::handleNonTextNode()
1440 {
1441     // We can use a linefeed in place of a tab because this simple iterator is only used to
1442     // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs.
1443     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineAfterNode(*m_node) || shouldEmitTabBeforeNode(m_node)) {
1444         unsigned index = m_node->nodeIndex();
1445         // The start of this emitted range is wrong. Ensuring correctness would require
1446         // VisiblePositions and so would be slow. previousBoundary expects this.
1447         emitCharacter('\n', m_node->parentNode(), index + 1, index + 1);
1448     }
1449     return true;
1450 }
1451 
exitNode()1452 void SimplifiedBackwardsTextIterator::exitNode()
1453 {
1454     if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineBeforeNode(*m_node) || shouldEmitTabBeforeNode(m_node)) {
1455         // The start of this emitted range is wrong. Ensuring correctness would require
1456         // VisiblePositions and so would be slow. previousBoundary expects this.
1457         emitCharacter('\n', m_node, 0, 0);
1458     }
1459 }
1460 
emitCharacter(UChar c,Node * node,int startOffset,int endOffset)1461 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset)
1462 {
1463     m_singleCharacterBuffer = c;
1464     m_positionNode = node;
1465     m_positionStartOffset = startOffset;
1466     m_positionEndOffset = endOffset;
1467     m_textOffset = 0;
1468     m_textLength = 1;
1469     m_lastCharacter = c;
1470 }
1471 
advanceRespectingRange(Node * next)1472 bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next)
1473 {
1474     if (!next)
1475         return false;
1476     m_havePassedStartNode |= m_node == m_startNode;
1477     if (m_havePassedStartNode)
1478         return false;
1479     m_node = next;
1480     return true;
1481 }
1482 
range() const1483 PassRefPtrWillBeRawPtr<Range> SimplifiedBackwardsTextIterator::range() const
1484 {
1485     if (m_positionNode)
1486         return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
1487 
1488     return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset);
1489 }
1490 
1491 // --------
1492 
CharacterIterator(const Range * range,TextIteratorBehaviorFlags behavior)1493 CharacterIterator::CharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior)
1494     : m_offset(0)
1495     , m_runOffset(0)
1496     , m_atBreak(true)
1497     , m_textIterator(range, behavior)
1498 {
1499     initialize();
1500 }
1501 
CharacterIterator(const Position & start,const Position & end,TextIteratorBehaviorFlags behavior)1502 CharacterIterator::CharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
1503     : m_offset(0)
1504     , m_runOffset(0)
1505     , m_atBreak(true)
1506     , m_textIterator(start, end, behavior)
1507 {
1508     initialize();
1509 }
1510 
initialize()1511 void CharacterIterator::initialize()
1512 {
1513     while (!atEnd() && !m_textIterator.length())
1514         m_textIterator.advance();
1515 }
1516 
range() const1517 PassRefPtrWillBeRawPtr<Range> CharacterIterator::range() const
1518 {
1519     RefPtrWillBeRawPtr<Range> r = m_textIterator.range();
1520     if (!m_textIterator.atEnd()) {
1521         if (m_textIterator.length() <= 1) {
1522             ASSERT(!m_runOffset);
1523         } else {
1524             Node* n = r->startContainer();
1525             ASSERT(n == r->endContainer());
1526             int offset = r->startOffset() + m_runOffset;
1527             r->setStart(n, offset, ASSERT_NO_EXCEPTION);
1528             r->setEnd(n, offset + 1, ASSERT_NO_EXCEPTION);
1529         }
1530     }
1531     return r.release();
1532 }
1533 
advance(int count)1534 void CharacterIterator::advance(int count)
1535 {
1536     if (count <= 0) {
1537         ASSERT(!count);
1538         return;
1539     }
1540 
1541     m_atBreak = false;
1542 
1543     // easy if there is enough left in the current m_textIterator run
1544     int remaining = m_textIterator.length() - m_runOffset;
1545     if (count < remaining) {
1546         m_runOffset += count;
1547         m_offset += count;
1548         return;
1549     }
1550 
1551     // exhaust the current m_textIterator run
1552     count -= remaining;
1553     m_offset += remaining;
1554 
1555     // move to a subsequent m_textIterator run
1556     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
1557         int runLength = m_textIterator.length();
1558         if (!runLength) {
1559             m_atBreak = true;
1560         } else {
1561             // see whether this is m_textIterator to use
1562             if (count < runLength) {
1563                 m_runOffset = count;
1564                 m_offset += count;
1565                 return;
1566             }
1567 
1568             // exhaust this m_textIterator run
1569             count -= runLength;
1570             m_offset += runLength;
1571         }
1572     }
1573 
1574     // ran to the end of the m_textIterator... no more runs left
1575     m_atBreak = true;
1576     m_runOffset = 0;
1577 }
1578 
calculateCharacterSubrange(CharacterIterator & it,int offset,int length,Position & startPosition,Position & endPosition)1579 static void calculateCharacterSubrange(CharacterIterator& it, int offset, int length, Position& startPosition, Position& endPosition)
1580 {
1581     it.advance(offset);
1582     RefPtrWillBeRawPtr<Range> start = it.range();
1583     startPosition = start->startPosition();
1584 
1585     if (length > 1)
1586         it.advance(length - 1);
1587     RefPtrWillBeRawPtr<Range> end = it.range();
1588     endPosition = end->endPosition();
1589 }
1590 
BackwardsCharacterIterator(const Range * range,TextIteratorBehaviorFlags behavior)1591 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior)
1592     : m_offset(0)
1593     , m_runOffset(0)
1594     , m_atBreak(true)
1595     , m_textIterator(range, behavior)
1596 {
1597     while (!atEnd() && !m_textIterator.length())
1598         m_textIterator.advance();
1599 }
1600 
range() const1601 PassRefPtrWillBeRawPtr<Range> BackwardsCharacterIterator::range() const
1602 {
1603     RefPtrWillBeRawPtr<Range> r = m_textIterator.range();
1604     if (!m_textIterator.atEnd()) {
1605         if (m_textIterator.length() <= 1) {
1606             ASSERT(!m_runOffset);
1607         } else {
1608             Node* n = r->startContainer();
1609             ASSERT(n == r->endContainer());
1610             int offset = r->endOffset() - m_runOffset;
1611             r->setStart(n, offset - 1, ASSERT_NO_EXCEPTION);
1612             r->setEnd(n, offset, ASSERT_NO_EXCEPTION);
1613         }
1614     }
1615     return r.release();
1616 }
1617 
advance(int count)1618 void BackwardsCharacterIterator::advance(int count)
1619 {
1620     if (count <= 0) {
1621         ASSERT(!count);
1622         return;
1623     }
1624 
1625     m_atBreak = false;
1626 
1627     int remaining = m_textIterator.length() - m_runOffset;
1628     if (count < remaining) {
1629         m_runOffset += count;
1630         m_offset += count;
1631         return;
1632     }
1633 
1634     count -= remaining;
1635     m_offset += remaining;
1636 
1637     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
1638         int runLength = m_textIterator.length();
1639         if (!runLength) {
1640             m_atBreak = true;
1641         } else {
1642             if (count < runLength) {
1643                 m_runOffset = count;
1644                 m_offset += count;
1645                 return;
1646             }
1647 
1648             count -= runLength;
1649             m_offset += runLength;
1650         }
1651     }
1652 
1653     m_atBreak = true;
1654     m_runOffset = 0;
1655 }
1656 
1657 // --------
1658 
WordAwareIterator(const Range * range)1659 WordAwareIterator::WordAwareIterator(const Range* range)
1660     : m_didLookAhead(true) // So we consider the first chunk from the text iterator.
1661     , m_textIterator(range)
1662 {
1663     advance(); // Get in position over the first chunk of text.
1664 }
1665 
~WordAwareIterator()1666 WordAwareIterator::~WordAwareIterator()
1667 {
1668 }
1669 
1670 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries.
1671 
advance()1672 void WordAwareIterator::advance()
1673 {
1674     m_buffer.clear();
1675 
1676     // If last time we did a look-ahead, start with that looked-ahead chunk now
1677     if (!m_didLookAhead) {
1678         ASSERT(!m_textIterator.atEnd());
1679         m_textIterator.advance();
1680     }
1681     m_didLookAhead = false;
1682 
1683     // Go to next non-empty chunk.
1684     while (!m_textIterator.atEnd() && !m_textIterator.length())
1685         m_textIterator.advance();
1686 
1687     m_range = m_textIterator.range();
1688 
1689     if (m_textIterator.atEnd())
1690         return;
1691 
1692     while (1) {
1693         // If this chunk ends in whitespace we can just use it as our chunk.
1694         if (isSpaceOrNewline(m_textIterator.characterAt(m_textIterator.length() - 1)))
1695             return;
1696 
1697         // If this is the first chunk that failed, save it in m_buffer before look ahead.
1698         if (m_buffer.isEmpty())
1699             m_textIterator.appendTextTo(m_buffer);
1700 
1701         // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff
1702         m_textIterator.advance();
1703         if (m_textIterator.atEnd() || !m_textIterator.length() || isSpaceOrNewline(m_textIterator.characterAt(0))) {
1704             m_didLookAhead = true;
1705             return;
1706         }
1707 
1708         // Start gobbling chunks until we get to a suitable stopping point
1709         m_textIterator.appendTextTo(m_buffer);
1710         m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), IGNORE_EXCEPTION);
1711     }
1712 }
1713 
length() const1714 int WordAwareIterator::length() const
1715 {
1716     if (!m_buffer.isEmpty())
1717         return m_buffer.size();
1718     return m_textIterator.length();
1719 }
1720 
substring(unsigned position,unsigned length) const1721 String WordAwareIterator::substring(unsigned position, unsigned length) const
1722 {
1723     if (!m_buffer.isEmpty())
1724         return String(m_buffer.data() + position, length);
1725     return m_textIterator.substring(position, length);
1726 }
1727 
characterAt(unsigned index) const1728 UChar WordAwareIterator::characterAt(unsigned index) const
1729 {
1730     if (!m_buffer.isEmpty())
1731         return m_buffer[index];
1732     return m_textIterator.characterAt(index);
1733 }
1734 
1735 // --------
1736 
1737 static const size_t minimumSearchBufferSize = 8192;
1738 
1739 #ifndef NDEBUG
1740 static bool searcherInUse;
1741 #endif
1742 
createSearcher()1743 static UStringSearch* createSearcher()
1744 {
1745     // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
1746     // but it doesn't matter exactly what it is, since we don't perform any searches
1747     // without setting both the pattern and the text.
1748     UErrorCode status = U_ZERO_ERROR;
1749     String searchCollatorName = currentSearchLocaleID() + String("@collation=search");
1750     UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status);
1751     ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
1752     return searcher;
1753 }
1754 
searcher()1755 static UStringSearch* searcher()
1756 {
1757     static UStringSearch* searcher = createSearcher();
1758     return searcher;
1759 }
1760 
lockSearcher()1761 static inline void lockSearcher()
1762 {
1763 #ifndef NDEBUG
1764     ASSERT(!searcherInUse);
1765     searcherInUse = true;
1766 #endif
1767 }
1768 
unlockSearcher()1769 static inline void unlockSearcher()
1770 {
1771 #ifndef NDEBUG
1772     ASSERT(searcherInUse);
1773     searcherInUse = false;
1774 #endif
1775 }
1776 
SearchBuffer(const String & target,FindOptions options)1777 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
1778     : m_options(options)
1779     , m_prefixLength(0)
1780     , m_numberOfCharactersJustAppended(0)
1781     , m_atBreak(true)
1782     , m_needsMoreContext(options & AtWordStarts)
1783     , m_targetRequiresKanaWorkaround(containsKanaLetters(target))
1784 {
1785     ASSERT(!target.isEmpty());
1786     target.appendTo(m_target);
1787 
1788     // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
1789     // of doing it in a separate replacement pass here, but ICU doesn't offer a way
1790     // to add tailoring on top of the locale-specific tailoring as of this writing.
1791     foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size());
1792 
1793     size_t targetLength = m_target.size();
1794     m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize));
1795     m_overlap = m_buffer.capacity() / 4;
1796 
1797     if ((m_options & AtWordStarts) && targetLength) {
1798         UChar32 targetFirstCharacter;
1799         U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter);
1800         // Characters in the separator category never really occur at the beginning of a word,
1801         // so if the target begins with such a character, we just ignore the AtWordStart option.
1802         if (isSeparator(targetFirstCharacter)) {
1803             m_options &= ~AtWordStarts;
1804             m_needsMoreContext = false;
1805         }
1806     }
1807 
1808     // Grab the single global searcher.
1809     // If we ever have a reason to do more than once search buffer at once, we'll have
1810     // to move to multiple searchers.
1811     lockSearcher();
1812 
1813     UStringSearch* searcher = WebCore::searcher();
1814     UCollator* collator = usearch_getCollator(searcher);
1815 
1816     UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
1817     if (ucol_getStrength(collator) != strength) {
1818         ucol_setStrength(collator, strength);
1819         usearch_reset(searcher);
1820     }
1821 
1822     UErrorCode status = U_ZERO_ERROR;
1823     usearch_setPattern(searcher, m_target.data(), targetLength, &status);
1824     ASSERT(status == U_ZERO_ERROR);
1825 
1826     // The kana workaround requires a normalized copy of the target string.
1827     if (m_targetRequiresKanaWorkaround)
1828         normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget);
1829 }
1830 
~SearchBuffer()1831 inline SearchBuffer::~SearchBuffer()
1832 {
1833     // Leave the static object pointing to a valid string.
1834     UErrorCode status = U_ZERO_ERROR;
1835     usearch_setPattern(WebCore::searcher(), &newlineCharacter, 1, &status);
1836     ASSERT(status == U_ZERO_ERROR);
1837 
1838     unlockSearcher();
1839 }
1840 
1841 template<typename CharType>
append(const CharType * characters,size_t length)1842 inline void SearchBuffer::append(const CharType* characters, size_t length)
1843 {
1844     ASSERT(length);
1845 
1846     if (m_atBreak) {
1847         m_buffer.shrink(0);
1848         m_prefixLength = 0;
1849         m_atBreak = false;
1850     } else if (m_buffer.size() == m_buffer.capacity()) {
1851         memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
1852         m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap);
1853         m_buffer.shrink(m_overlap);
1854     }
1855 
1856     size_t oldLength = m_buffer.size();
1857     size_t usableLength = std::min(m_buffer.capacity() - oldLength, length);
1858     ASSERT(usableLength);
1859     m_buffer.resize(oldLength + usableLength);
1860     UChar* destination = m_buffer.data() + oldLength;
1861     StringImpl::copyChars(destination, characters, usableLength);
1862     foldQuoteMarksAndSoftHyphens(destination, usableLength);
1863     m_numberOfCharactersJustAppended = usableLength;
1864 }
1865 
needsMoreContext() const1866 inline bool SearchBuffer::needsMoreContext() const
1867 {
1868     return m_needsMoreContext;
1869 }
1870 
prependContext(const UChar * characters,size_t length)1871 inline void SearchBuffer::prependContext(const UChar* characters, size_t length)
1872 {
1873     ASSERT(m_needsMoreContext);
1874     ASSERT(m_prefixLength == m_buffer.size());
1875 
1876     if (!length)
1877         return;
1878 
1879     m_atBreak = false;
1880 
1881     size_t wordBoundaryContextStart = length;
1882     if (wordBoundaryContextStart) {
1883         U16_BACK_1(characters, 0, wordBoundaryContextStart);
1884         wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart);
1885     }
1886 
1887     size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart);
1888     m_buffer.prepend(characters + length - usableLength, usableLength);
1889     m_prefixLength += usableLength;
1890 
1891     if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity())
1892         m_needsMoreContext = false;
1893 }
1894 
atBreak() const1895 inline bool SearchBuffer::atBreak() const
1896 {
1897     return m_atBreak;
1898 }
1899 
reachedBreak()1900 inline void SearchBuffer::reachedBreak()
1901 {
1902     m_atBreak = true;
1903 }
1904 
isBadMatch(const UChar * match,size_t matchLength) const1905 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const
1906 {
1907     // This function implements the kana workaround. If usearch treats
1908     // it as a match, but we do not want to, then it's a "bad match".
1909     if (!m_targetRequiresKanaWorkaround)
1910         return false;
1911 
1912     // Normalize into a match buffer. We reuse a single buffer rather than
1913     // creating a new one each time.
1914     normalizeCharactersIntoNFCForm(match, matchLength, m_normalizedMatch);
1915 
1916     return !checkOnlyKanaLettersInStrings(m_normalizedTarget.begin(), m_normalizedTarget.size(), m_normalizedMatch.begin(), m_normalizedMatch.size());
1917 }
1918 
isWordStartMatch(size_t start,size_t length) const1919 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const
1920 {
1921     ASSERT(m_options & AtWordStarts);
1922 
1923     if (!start)
1924         return true;
1925 
1926     int size = m_buffer.size();
1927     int offset = start;
1928     UChar32 firstCharacter;
1929     U16_GET(m_buffer.data(), 0, offset, size, firstCharacter);
1930 
1931     if (m_options & TreatMedialCapitalAsWordStart) {
1932         UChar32 previousCharacter;
1933         U16_PREV(m_buffer.data(), 0, offset, previousCharacter);
1934 
1935         if (isSeparator(firstCharacter)) {
1936             // The start of a separator run is a word start (".org" in "webkit.org").
1937             if (!isSeparator(previousCharacter))
1938                 return true;
1939         } else if (isASCIIUpper(firstCharacter)) {
1940             // The start of an uppercase run is a word start ("Kit" in "WebKit").
1941             if (!isASCIIUpper(previousCharacter))
1942                 return true;
1943             // The last character of an uppercase run followed by a non-separator, non-digit
1944             // is a word start ("Request" in "XMLHTTPRequest").
1945             offset = start;
1946             U16_FWD_1(m_buffer.data(), offset, size);
1947             UChar32 nextCharacter = 0;
1948             if (offset < size)
1949                 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter);
1950             if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter))
1951                 return true;
1952         } else if (isASCIIDigit(firstCharacter)) {
1953             // The start of a digit run is a word start ("2" in "WebKit2").
1954             if (!isASCIIDigit(previousCharacter))
1955                 return true;
1956         } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) {
1957             // The start of a non-separator, non-uppercase, non-digit run is a word start,
1958             // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore").
1959             return true;
1960         }
1961     }
1962 
1963     // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes
1964     // a word, so treat the position before any CJK character as a word start.
1965     if (Character::isCJKIdeographOrSymbol(firstCharacter))
1966         return true;
1967 
1968     size_t wordBreakSearchStart = start + length;
1969     while (wordBreakSearchStart > start)
1970         wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */);
1971     return wordBreakSearchStart == start;
1972 }
1973 
search(size_t & start)1974 inline size_t SearchBuffer::search(size_t& start)
1975 {
1976     size_t size = m_buffer.size();
1977     if (m_atBreak) {
1978         if (!size)
1979             return 0;
1980     } else {
1981         if (size != m_buffer.capacity())
1982             return 0;
1983     }
1984 
1985     UStringSearch* searcher = WebCore::searcher();
1986 
1987     UErrorCode status = U_ZERO_ERROR;
1988     usearch_setText(searcher, m_buffer.data(), size, &status);
1989     ASSERT(status == U_ZERO_ERROR);
1990 
1991     usearch_setOffset(searcher, m_prefixLength, &status);
1992     ASSERT(status == U_ZERO_ERROR);
1993 
1994     int matchStart = usearch_next(searcher, &status);
1995     ASSERT(status == U_ZERO_ERROR);
1996 
1997 nextMatch:
1998     if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
1999         ASSERT(matchStart == USEARCH_DONE);
2000         return 0;
2001     }
2002 
2003     // Matches that start in the overlap area are only tentative.
2004     // The same match may appear later, matching more characters,
2005     // possibly including a combining character that's not yet in the buffer.
2006     if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
2007         size_t overlap = m_overlap;
2008         if (m_options & AtWordStarts) {
2009             // Ensure that there is sufficient context before matchStart the next time around for
2010             // determining if it is at a word boundary.
2011             int wordBoundaryContextStart = matchStart;
2012             U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
2013             wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart);
2014             overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart));
2015         }
2016         memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar));
2017         m_prefixLength -= std::min(m_prefixLength, size - overlap);
2018         m_buffer.shrink(overlap);
2019         return 0;
2020     }
2021 
2022     size_t matchedLength = usearch_getMatchedLength(searcher);
2023     ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size);
2024 
2025     // If this match is "bad", move on to the next match.
2026     if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) {
2027         matchStart = usearch_next(searcher, &status);
2028         ASSERT(status == U_ZERO_ERROR);
2029         goto nextMatch;
2030     }
2031 
2032     size_t newSize = size - (matchStart + 1);
2033     memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
2034     m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1);
2035     m_buffer.shrink(newSize);
2036 
2037     start = size - matchStart;
2038     return matchedLength;
2039 }
2040 
2041 // --------
2042 
rangeLength(const Range * r,bool forSelectionPreservation)2043 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation)
2044 {
2045     int length = 0;
2046     TextIteratorBehaviorFlags behaviorFlags = TextIteratorEmitsObjectReplacementCharacter;
2047     if (forSelectionPreservation)
2048         behaviorFlags |= TextIteratorEmitsCharactersBetweenAllVisiblePositions;
2049     for (TextIterator it(r, behaviorFlags); !it.atEnd(); it.advance())
2050         length += it.length();
2051 
2052     return length;
2053 }
2054 
subrange(Range * entireRange,int characterOffset,int characterCount)2055 PassRefPtrWillBeRawPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount)
2056 {
2057     CharacterIterator entireRangeIterator(entireRange);
2058     Position start;
2059     Position end;
2060     calculateCharacterSubrange(entireRangeIterator, characterOffset, characterCount, start, end);
2061     return Range::create(entireRange->ownerDocument(), start, end);
2062 }
2063 
2064 // --------
2065 
plainText(const Range * r,TextIteratorBehaviorFlags behavior)2066 String plainText(const Range* r, TextIteratorBehaviorFlags behavior)
2067 {
2068     // The initial buffer size can be critical for performance: https://bugs.webkit.org/show_bug.cgi?id=81192
2069     static const unsigned initialCapacity = 1 << 15;
2070 
2071     unsigned bufferLength = 0;
2072     StringBuilder builder;
2073     builder.reserveCapacity(initialCapacity);
2074 
2075     for (TextIterator it(r, behavior); !it.atEnd(); it.advance()) {
2076         it.appendTextToStringBuilder(builder);
2077         bufferLength += it.length();
2078     }
2079 
2080     if (!bufferLength)
2081         return emptyString();
2082 
2083     return builder.toString();
2084 }
2085 
collapsedToBoundary(const Range * range,bool forward)2086 static PassRefPtrWillBeRawPtr<Range> collapsedToBoundary(const Range* range, bool forward)
2087 {
2088     RefPtrWillBeRawPtr<Range> result = range->cloneRange();
2089     result->collapse(!forward);
2090     return result.release();
2091 }
2092 
2093 // Check if there's any unpaird surrogate code point.
2094 // Non-character code points are not checked.
isValidUTF16(const String & s)2095 static bool isValidUTF16(const String& s)
2096 {
2097     if (s.is8Bit())
2098         return true;
2099     const UChar* ustr = s.characters16();
2100     size_t length = s.length();
2101     size_t position = 0;
2102     while (position < length) {
2103         UChar32 character;
2104         U16_NEXT(ustr, position, length, character);
2105         if (U_IS_SURROGATE(character))
2106             return false;
2107     }
2108     return true;
2109 }
2110 
findPlainTextInternal(CharacterIterator & it,const String & target,FindOptions options,size_t & matchStart)2111 static size_t findPlainTextInternal(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart)
2112 {
2113     matchStart = 0;
2114     size_t matchLength = 0;
2115 
2116     if (!isValidUTF16(target))
2117         return 0;
2118 
2119     SearchBuffer buffer(target, options);
2120 
2121     if (buffer.needsMoreContext()) {
2122         RefPtrWillBeRawPtr<Range> startRange = it.range();
2123         RefPtrWillBeRawPtr<Range> beforeStartRange = startRange->ownerDocument().createRange();
2124         beforeStartRange->setEnd(startRange->startContainer(), startRange->startOffset(), IGNORE_EXCEPTION);
2125         for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) {
2126             Vector<UChar, 1024> characters;
2127             backwardsIterator.prependTextTo(characters);
2128             buffer.prependContext(characters.data(), characters.size());
2129             if (!buffer.needsMoreContext())
2130                 break;
2131         }
2132     }
2133 
2134     while (!it.atEnd()) {
2135         it.appendTextTo(buffer);
2136         it.advance(buffer.numberOfCharactersJustAppended());
2137 tryAgain:
2138         size_t matchStartOffset;
2139         if (size_t newMatchLength = buffer.search(matchStartOffset)) {
2140             // Note that we found a match, and where we found it.
2141             size_t lastCharacterInBufferOffset = it.characterOffset();
2142             ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
2143             matchStart = lastCharacterInBufferOffset - matchStartOffset;
2144             matchLength = newMatchLength;
2145             // If searching forward, stop on the first match.
2146             // If searching backward, don't stop, so we end up with the last match.
2147             if (!(options & Backwards))
2148                 break;
2149             goto tryAgain;
2150         }
2151         if (it.atBreak() && !buffer.atBreak()) {
2152             buffer.reachedBreak();
2153             goto tryAgain;
2154         }
2155     }
2156 
2157     return matchLength;
2158 }
2159 
2160 static const TextIteratorBehaviorFlags iteratorFlagsForFindPlainText = TextIteratorEntersTextControls | TextIteratorEntersAuthorShadowRoots;
2161 
findPlainText(const Range * range,const String & target,FindOptions options)2162 PassRefPtrWillBeRawPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options)
2163 {
2164     // CharacterIterator requires renderers to be up-to-date
2165     range->ownerDocument().updateLayout();
2166 
2167     // First, find the text.
2168     size_t matchStart;
2169     size_t matchLength;
2170     {
2171         CharacterIterator findIterator(range, iteratorFlagsForFindPlainText);
2172         matchLength = findPlainTextInternal(findIterator, target, options, matchStart);
2173         if (!matchLength)
2174             return collapsedToBoundary(range, !(options & Backwards));
2175     }
2176 
2177     // Then, find the document position of the start and the end of the text.
2178     CharacterIterator computeRangeIterator(range, iteratorFlagsForFindPlainText);
2179     Position resultStart;
2180     Position resultEnd;
2181     calculateCharacterSubrange(computeRangeIterator, matchStart, matchLength, resultStart, resultEnd);
2182     return Range::create(range->ownerDocument(), resultStart, resultEnd);
2183 }
2184 
findPlainText(const Position & inputStart,const Position & inputEnd,const String & target,FindOptions options,Position & resultStart,Position & resultEnd)2185 void findPlainText(const Position& inputStart, const Position& inputEnd, const String& target, FindOptions options, Position& resultStart, Position& resultEnd)
2186 {
2187     resultStart.clear();
2188     resultEnd.clear();
2189     // CharacterIterator requires renderers to be up-to-date.
2190     if (!inputStart.inDocument())
2191         return;
2192     ASSERT(inputStart.document() == inputEnd.document());
2193     inputStart.document()->updateLayout();
2194 
2195     // FIXME: Reduce the code duplication with above (but how?).
2196     size_t matchStart;
2197     size_t matchLength;
2198     {
2199         CharacterIterator findIterator(inputStart, inputEnd, iteratorFlagsForFindPlainText);
2200         matchLength = findPlainTextInternal(findIterator, target, options, matchStart);
2201         if (!matchLength) {
2202             const Position& collapseTo = options & Backwards ? inputStart : inputEnd;
2203             resultStart = collapseTo;
2204             resultEnd = collapseTo;
2205             return;
2206         }
2207     }
2208 
2209     CharacterIterator computeRangeIterator(inputStart, inputEnd, iteratorFlagsForFindPlainText);
2210     calculateCharacterSubrange(computeRangeIterator, matchStart, matchLength, resultStart, resultEnd);
2211 }
2212 
2213 }
2214