• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
3  * Copyright (C) 2005 Alexey Proskuryakov.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 #include "TextIterator.h"
29 
30 #include "CharacterNames.h"
31 #include "Document.h"
32 #include "HTMLElement.h"
33 #include "HTMLNames.h"
34 #include "htmlediting.h"
35 #include "InlineTextBox.h"
36 #include "Position.h"
37 #include "Range.h"
38 #include "RenderTableCell.h"
39 #include "RenderTableRow.h"
40 #include "RenderTextControl.h"
41 #include "VisiblePosition.h"
42 #include "visible_units.h"
43 
44 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
45 #include "TextBreakIteratorInternalICU.h"
46 #include <unicode/usearch.h>
47 #endif
48 
49 using namespace WTF::Unicode;
50 using namespace std;
51 
52 namespace WebCore {
53 
54 using namespace HTMLNames;
55 
56 // Buffer that knows how to compare with a search target.
57 // Keeps enough of the previous text to be able to search in the future, but no more.
58 // Non-breaking spaces are always equal to normal spaces.
59 // Case folding is also done if <isCaseSensitive> is false.
60 class SearchBuffer : public Noncopyable {
61 public:
62     SearchBuffer(const String& target, bool isCaseSensitive);
63     ~SearchBuffer();
64 
65     // Returns number of characters appended; guaranteed to be in the range [1, length].
66     size_t append(const UChar*, size_t length);
67     void reachedBreak();
68 
69     // Result is the size in characters of what was found.
70     // And <startOffset> is the number of characters back to the start of what was found.
71     size_t search(size_t& startOffset);
72     bool atBreak() const;
73 
74 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
75 
76 private:
77     String m_target;
78     Vector<UChar> m_buffer;
79     size_t m_overlap;
80     bool m_atBreak;
81 
82 #else
83 
84 private:
85     void append(UChar, bool isCharacterStart);
86     size_t length() const;
87 
88     String m_target;
89     bool m_isCaseSensitive;
90 
91     Vector<UChar> m_buffer;
92     Vector<bool> m_isCharacterStartBuffer;
93     bool m_isBufferFull;
94     size_t m_cursor;
95 
96 #endif
97 };
98 
99 // --------
100 
101 static const unsigned bitsInWord = sizeof(unsigned) * 8;
102 static const unsigned bitInWordMask = bitsInWord - 1;
103 
BitStack()104 BitStack::BitStack()
105     : m_size(0)
106 {
107 }
108 
push(bool bit)109 void BitStack::push(bool bit)
110 {
111     unsigned index = m_size / bitsInWord;
112     unsigned shift = m_size & bitInWordMask;
113     if (!shift && index == m_words.size()) {
114         m_words.grow(index + 1);
115         m_words[index] = 0;
116     }
117     unsigned& word = m_words[index];
118     unsigned mask = 1U << shift;
119     if (bit)
120         word |= mask;
121     else
122         word &= ~mask;
123     ++m_size;
124 }
125 
pop()126 void BitStack::pop()
127 {
128     if (m_size)
129         --m_size;
130 }
131 
top() const132 bool BitStack::top() const
133 {
134     if (!m_size)
135         return false;
136     unsigned shift = (m_size - 1) & bitInWordMask;
137     return m_words.last() & (1U << shift);
138 }
139 
size() const140 unsigned BitStack::size() const
141 {
142     return m_size;
143 }
144 
145 // --------
146 
parentCrossingShadowBoundaries(Node * node)147 static inline Node* parentCrossingShadowBoundaries(Node* node)
148 {
149     if (Node* parent = node->parentNode())
150         return parent;
151     return node->shadowParentNode();
152 }
153 
154 #ifndef NDEBUG
155 
depthCrossingShadowBoundaries(Node * node)156 static unsigned depthCrossingShadowBoundaries(Node* node)
157 {
158     unsigned depth = 0;
159     for (Node* parent = parentCrossingShadowBoundaries(node); parent; parent = parentCrossingShadowBoundaries(parent))
160         ++depth;
161     return depth;
162 }
163 
164 #endif
165 
166 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees.
nextInPreOrderCrossingShadowBoundaries(Node * rangeEndContainer,int rangeEndOffset)167 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset)
168 {
169     if (!rangeEndContainer)
170         return 0;
171     if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) {
172         if (Node* next = rangeEndContainer->childNode(rangeEndOffset))
173             return next;
174     }
175     for (Node* node = rangeEndContainer; node; node = parentCrossingShadowBoundaries(node)) {
176         if (Node* next = node->nextSibling())
177             return next;
178     }
179     return 0;
180 }
181 
previousInPostOrderCrossingShadowBoundaries(Node * rangeStartContainer,int rangeStartOffset)182 static Node* previousInPostOrderCrossingShadowBoundaries(Node* rangeStartContainer, int rangeStartOffset)
183 {
184     if (!rangeStartContainer)
185         return 0;
186     if (rangeStartOffset > 0 && !rangeStartContainer->offsetInCharacters()) {
187         if (Node* previous = rangeStartContainer->childNode(rangeStartOffset - 1))
188             return previous;
189     }
190     for (Node* node = rangeStartContainer; node; node = parentCrossingShadowBoundaries(node)) {
191         if (Node* previous = node->previousSibling())
192             return previous;
193     }
194     return 0;
195 }
196 
197 // --------
198 
fullyClipsContents(Node * node)199 static inline bool fullyClipsContents(Node* node)
200 {
201     RenderObject* renderer = node->renderer();
202     if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip())
203         return false;
204     return toRenderBox(renderer)->size().isEmpty();
205 }
206 
ignoresContainerClip(Node * node)207 static inline bool ignoresContainerClip(Node* node)
208 {
209     RenderObject* renderer = node->renderer();
210     if (!renderer || renderer->isText())
211         return false;
212     EPosition position = renderer->style()->position();
213     return position == AbsolutePosition || position == FixedPosition;
214 }
215 
pushFullyClippedState(BitStack & stack,Node * node)216 static void pushFullyClippedState(BitStack& stack, Node* node)
217 {
218     ASSERT(stack.size() == depthCrossingShadowBoundaries(node));
219 
220     // Push true if this node full clips its contents, or if a parent already has fully
221     // clipped and this is not a node that ignores its container's clip.
222     stack.push(fullyClipsContents(node) || stack.top() && !ignoresContainerClip(node));
223 }
224 
setUpFullyClippedStack(BitStack & stack,Node * node)225 static void setUpFullyClippedStack(BitStack& stack, Node* node)
226 {
227     // Put the nodes in a vector so we can iterate in reverse order.
228     Vector<Node*, 100> ancestry;
229     for (Node* parent = parentCrossingShadowBoundaries(node); parent; parent = parentCrossingShadowBoundaries(parent))
230         ancestry.append(parent);
231 
232     // Call pushFullyClippedState on each node starting with the earliest ancestor.
233     size_t size = ancestry.size();
234     for (size_t i = 0; i < size; ++i)
235         pushFullyClippedState(stack, ancestry[size - i - 1]);
236     pushFullyClippedState(stack, node);
237 
238     ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node));
239 }
240 
241 // --------
242 
TextIterator()243 TextIterator::TextIterator()
244     : m_startContainer(0)
245     , m_startOffset(0)
246     , m_endContainer(0)
247     , m_endOffset(0)
248     , m_positionNode(0)
249     , m_textCharacters(0)
250     , m_textLength(0)
251     , m_lastCharacter(0)
252     , m_emitCharactersBetweenAllVisiblePositions(false)
253     , m_enterTextControls(false)
254 {
255 }
256 
TextIterator(const Range * r,bool emitCharactersBetweenAllVisiblePositions,bool enterTextControls)257 TextIterator::TextIterator(const Range* r, bool emitCharactersBetweenAllVisiblePositions, bool enterTextControls)
258     : m_startContainer(0)
259     , m_startOffset(0)
260     , m_endContainer(0)
261     , m_endOffset(0)
262     , m_positionNode(0)
263     , m_textCharacters(0)
264     , m_textLength(0)
265     , m_emitCharactersBetweenAllVisiblePositions(emitCharactersBetweenAllVisiblePositions)
266     , m_enterTextControls(enterTextControls)
267 {
268     if (!r)
269         return;
270 
271     // get and validate the range endpoints
272     Node* startContainer = r->startContainer();
273     if (!startContainer)
274         return;
275     int startOffset = r->startOffset();
276     Node* endContainer = r->endContainer();
277     int endOffset = r->endOffset();
278 
279     // Callers should be handing us well-formed ranges. If we discover that this isn't
280     // the case, we could consider changing this assertion to an early return.
281     ASSERT(r->boundaryPointsValid());
282 
283     // remember range - this does not change
284     m_startContainer = startContainer;
285     m_startOffset = startOffset;
286     m_endContainer = endContainer;
287     m_endOffset = endOffset;
288 
289     // set up the current node for processing
290     m_node = r->firstNode();
291     if (!m_node)
292         return;
293     setUpFullyClippedStack(m_fullyClippedStack, m_node);
294     m_offset = m_node == m_startContainer ? m_startOffset : 0;
295     m_handledNode = false;
296     m_handledChildren = false;
297 
298     // calculate first out of bounds node
299     m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset);
300 
301     // initialize node processing state
302     m_needAnotherNewline = false;
303     m_textBox = 0;
304 
305     // initialize record of previous node processing
306     m_haveEmitted = false;
307     m_lastTextNode = 0;
308     m_lastTextNodeEndedWithCollapsedSpace = false;
309     m_lastCharacter = 0;
310 
311 #ifndef NDEBUG
312     // need this just because of the assert in advance()
313     m_positionNode = m_node;
314 #endif
315 
316     // identify the first run
317     advance();
318 }
319 
advance()320 void TextIterator::advance()
321 {
322     // reset the run information
323     m_positionNode = 0;
324     m_textLength = 0;
325 
326     // handle remembered node that needed a newline after the text node's newline
327     if (m_needAnotherNewline) {
328         // Emit the extra newline, and position it *inside* m_node, after m_node's
329         // contents, in case it's a block, in the same way that we position the first
330         // newline.  The range for the emitted newline should start where the line
331         // break begins.
332         // FIXME: It would be cleaner if we emitted two newlines during the last
333         // iteration, instead of using m_needAnotherNewline.
334         Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node;
335         emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
336         m_needAnotherNewline = false;
337         return;
338     }
339 
340     // handle remembered text box
341     if (m_textBox) {
342         handleTextBox();
343         if (m_positionNode)
344             return;
345     }
346 
347     while (m_node && m_node != m_pastEndNode) {
348         // if the range ends at offset 0 of an element, represent the
349         // position, but not the content, of that element e.g. if the
350         // node is a blockflow element, emit a newline that
351         // precedes the element
352         if (m_node == m_endContainer && m_endOffset == 0) {
353             representNodeOffsetZero();
354             m_node = 0;
355             return;
356         }
357 
358         RenderObject* renderer = m_node->renderer();
359         if (!renderer) {
360             m_handledNode = true;
361             m_handledChildren = true;
362         } else {
363             // handle current node according to its type
364             if (!m_handledNode) {
365                 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) // FIXME: What about CDATA_SECTION_NODE?
366                     m_handledNode = handleTextNode();
367                 else if (renderer && (renderer->isImage() || renderer->isWidget() ||
368                          (renderer->node() && renderer->node()->isElementNode() &&
369                           static_cast<Element*>(renderer->node())->isFormControlElement())))
370                     m_handledNode = handleReplacedElement();
371                 else
372                     m_handledNode = handleNonTextNode();
373                 if (m_positionNode)
374                     return;
375             }
376         }
377 
378         // find a new current node to handle in depth-first manner,
379         // calling exitNode() as we come back thru a parent node
380         Node* next = m_handledChildren ? 0 : m_node->firstChild();
381         m_offset = 0;
382         if (!next) {
383             next = m_node->nextSibling();
384             if (!next) {
385                 bool pastEnd = m_node->traverseNextNode() == m_pastEndNode;
386                 Node* parentNode = parentCrossingShadowBoundaries(m_node);
387                 while (!next && parentNode) {
388                     if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode))
389                         return;
390                     bool haveRenderer = m_node->renderer();
391                     m_node = parentNode;
392                     m_fullyClippedStack.pop();
393                     parentNode = parentCrossingShadowBoundaries(m_node);
394                     if (haveRenderer)
395                         exitNode();
396                     if (m_positionNode) {
397                         m_handledNode = true;
398                         m_handledChildren = true;
399                         return;
400                     }
401                     next = m_node->nextSibling();
402                 }
403             }
404             m_fullyClippedStack.pop();
405         }
406 
407         // set the new current node
408         m_node = next;
409         if (m_node)
410             pushFullyClippedState(m_fullyClippedStack, m_node);
411         m_handledNode = false;
412         m_handledChildren = false;
413 
414         // how would this ever be?
415         if (m_positionNode)
416             return;
417     }
418 }
419 
compareBoxStart(const InlineTextBox * first,const InlineTextBox * second)420 static inline bool compareBoxStart(const InlineTextBox* first, const InlineTextBox* second)
421 {
422     return first->start() < second->start();
423 }
424 
handleTextNode()425 bool TextIterator::handleTextNode()
426 {
427     if (m_fullyClippedStack.top())
428         return false;
429 
430     RenderText* renderer = toRenderText(m_node->renderer());
431     if (renderer->style()->visibility() != VISIBLE)
432         return false;
433 
434     m_lastTextNode = m_node;
435     String str = renderer->text();
436 
437     // handle pre-formatted text
438     if (!renderer->style()->collapseWhiteSpace()) {
439         int runStart = m_offset;
440         if (m_lastTextNodeEndedWithCollapsedSpace) {
441             emitCharacter(' ', m_node, 0, runStart, runStart);
442             return false;
443         }
444         int strLength = str.length();
445         int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX;
446         int runEnd = min(strLength, end);
447 
448         if (runStart >= runEnd)
449             return true;
450 
451         emitText(m_node, runStart, runEnd);
452         return true;
453     }
454 
455     if (!renderer->firstTextBox() && str.length() > 0) {
456         m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space
457         return true;
458     }
459 
460     // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
461     if (renderer->containsReversedText()) {
462         m_sortedTextBoxes.clear();
463         for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) {
464             m_sortedTextBoxes.append(textBox);
465         }
466         std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), compareBoxStart);
467         m_sortedTextBoxesPosition = 0;
468     }
469 
470     m_textBox = renderer->containsReversedText() ? m_sortedTextBoxes[0] : renderer->firstTextBox();
471     handleTextBox();
472     return true;
473 }
474 
handleTextBox()475 void TextIterator::handleTextBox()
476 {
477     RenderText* renderer = toRenderText(m_node->renderer());
478     String str = renderer->text();
479     int start = m_offset;
480     int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX;
481     while (m_textBox) {
482         int textBoxStart = m_textBox->start();
483         int runStart = max(textBoxStart, start);
484 
485         // Check for collapsed space at the start of this run.
486         InlineTextBox* firstTextBox = renderer->containsReversedText() ? m_sortedTextBoxes[0] : renderer->firstTextBox();
487         bool needSpace = m_lastTextNodeEndedWithCollapsedSpace
488             || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0);
489         if (needSpace && !isCollapsibleWhitespace(m_lastCharacter) && m_lastCharacter) {
490             if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') {
491                 unsigned spaceRunStart = runStart - 1;
492                 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ')
493                     --spaceRunStart;
494                 emitText(m_node, spaceRunStart, spaceRunStart + 1);
495             } else
496                 emitCharacter(' ', m_node, 0, runStart, runStart);
497             return;
498         }
499         int textBoxEnd = textBoxStart + m_textBox->len();
500         int runEnd = min(textBoxEnd, end);
501 
502         // Determine what the next text box will be, but don't advance yet
503         InlineTextBox* nextTextBox = 0;
504         if (renderer->containsReversedText()) {
505             if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size())
506                 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1];
507         } else
508             nextTextBox = m_textBox->nextTextBox();
509 
510         if (runStart < runEnd) {
511             // Handle either a single newline character (which becomes a space),
512             // or a run of characters that does not include a newline.
513             // This effectively translates newlines to spaces without copying the text.
514             if (str[runStart] == '\n') {
515                 emitCharacter(' ', m_node, 0, runStart, runStart + 1);
516                 m_offset = runStart + 1;
517             } else {
518                 int subrunEnd = str.find('\n', runStart);
519                 if (subrunEnd == -1 || subrunEnd > runEnd)
520                     subrunEnd = runEnd;
521 
522                 m_offset = subrunEnd;
523                 emitText(m_node, runStart, subrunEnd);
524             }
525 
526             // If we are doing a subrun that doesn't go to the end of the text box,
527             // come back again to finish handling this text box; don't advance to the next one.
528             if (m_positionEndOffset < textBoxEnd)
529                 return;
530 
531             // Advance and return
532             int nextRunStart = nextTextBox ? nextTextBox->start() : str.length();
533             if (nextRunStart > runEnd)
534                 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end
535             m_textBox = nextTextBox;
536             if (renderer->containsReversedText())
537                 ++m_sortedTextBoxesPosition;
538             return;
539         }
540         // Advance and continue
541         m_textBox = nextTextBox;
542         if (renderer->containsReversedText())
543             ++m_sortedTextBoxesPosition;
544     }
545 }
546 
handleReplacedElement()547 bool TextIterator::handleReplacedElement()
548 {
549     if (m_fullyClippedStack.top())
550         return false;
551 
552     RenderObject* renderer = m_node->renderer();
553     if (renderer->style()->visibility() != VISIBLE)
554         return false;
555 
556     if (m_lastTextNodeEndedWithCollapsedSpace) {
557         emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1);
558         return false;
559     }
560 
561     if (m_enterTextControls && renderer->isTextControl()) {
562         if (HTMLElement* innerTextElement = toRenderTextControl(renderer)->innerTextElement()) {
563             m_node = innerTextElement->shadowTreeRootNode();
564             pushFullyClippedState(m_fullyClippedStack, m_node);
565             m_offset = 0;
566             return false;
567         }
568     }
569 
570     m_haveEmitted = true;
571 
572     if (m_emitCharactersBetweenAllVisiblePositions) {
573         // We want replaced elements to behave like punctuation for boundary
574         // finding, and to simply take up space for the selection preservation
575         // code in moveParagraphs, so we use a comma.
576         emitCharacter(',', m_node->parentNode(), m_node, 0, 1);
577         return true;
578     }
579 
580     m_positionNode = m_node->parentNode();
581     m_positionOffsetBaseNode = m_node;
582     m_positionStartOffset = 0;
583     m_positionEndOffset = 1;
584 
585     m_textCharacters = 0;
586     m_textLength = 0;
587 
588     m_lastCharacter = 0;
589 
590     return true;
591 }
592 
shouldEmitTabBeforeNode(Node * node)593 static bool shouldEmitTabBeforeNode(Node* node)
594 {
595     RenderObject* r = node->renderer();
596 
597     // Table cells are delimited by tabs.
598     if (!r || !isTableCell(node))
599         return false;
600 
601     // Want a tab before every cell other than the first one
602     RenderTableCell* rc = toRenderTableCell(r);
603     RenderTable* t = rc->table();
604     return t && (t->cellBefore(rc) || t->cellAbove(rc));
605 }
606 
shouldEmitNewlineForNode(Node * node)607 static bool shouldEmitNewlineForNode(Node* node)
608 {
609     // br elements are represented by a single newline.
610     RenderObject* r = node->renderer();
611     if (!r)
612         return node->hasTagName(brTag);
613 
614     return r->isBR();
615 }
616 
shouldEmitNewlinesBeforeAndAfterNode(Node * node)617 static bool shouldEmitNewlinesBeforeAndAfterNode(Node* node)
618 {
619     // Block flow (versus inline flow) is represented by having
620     // a newline both before and after the element.
621     RenderObject* r = node->renderer();
622     if (!r) {
623         return (node->hasTagName(blockquoteTag)
624                 || node->hasTagName(ddTag)
625                 || node->hasTagName(divTag)
626                 || node->hasTagName(dlTag)
627                 || node->hasTagName(dtTag)
628                 || node->hasTagName(h1Tag)
629                 || node->hasTagName(h2Tag)
630                 || node->hasTagName(h3Tag)
631                 || node->hasTagName(h4Tag)
632                 || node->hasTagName(h5Tag)
633                 || node->hasTagName(h6Tag)
634                 || node->hasTagName(hrTag)
635                 || node->hasTagName(liTag)
636                 || node->hasTagName(listingTag)
637                 || node->hasTagName(olTag)
638                 || node->hasTagName(pTag)
639                 || node->hasTagName(preTag)
640                 || node->hasTagName(trTag)
641                 || node->hasTagName(ulTag));
642     }
643 
644     // Need to make an exception for table cells, because they are blocks, but we
645     // want them tab-delimited rather than having newlines before and after.
646     if (isTableCell(node))
647         return false;
648 
649     // Need to make an exception for table row elements, because they are neither
650     // "inline" or "RenderBlock", but we want newlines for them.
651     if (r->isTableRow()) {
652         RenderTable* t = toRenderTableRow(r)->table();
653         if (t && !t->isInline())
654             return true;
655     }
656 
657     return !r->isInline() && r->isRenderBlock() && !r->isFloatingOrPositioned() && !r->isBody();
658 }
659 
shouldEmitNewlineAfterNode(Node * node)660 static bool shouldEmitNewlineAfterNode(Node* node)
661 {
662     // FIXME: It should be better but slower to create a VisiblePosition here.
663     if (!shouldEmitNewlinesBeforeAndAfterNode(node))
664         return false;
665     // Check if this is the very last renderer in the document.
666     // If so, then we should not emit a newline.
667     while ((node = node->traverseNextSibling()))
668         if (node->renderer())
669             return true;
670     return false;
671 }
672 
shouldEmitNewlineBeforeNode(Node * node)673 static bool shouldEmitNewlineBeforeNode(Node* node)
674 {
675     return shouldEmitNewlinesBeforeAndAfterNode(node);
676 }
677 
shouldEmitExtraNewlineForNode(Node * node)678 static bool shouldEmitExtraNewlineForNode(Node* node)
679 {
680     // When there is a significant collapsed bottom margin, emit an extra
681     // newline for a more realistic result.  We end up getting the right
682     // result even without margin collapsing. For example: <div><p>text</p></div>
683     // will work right even if both the <div> and the <p> have bottom margins.
684     RenderObject* r = node->renderer();
685     if (!r || !r->isBox())
686         return false;
687 
688     // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears
689     // not to do this at all
690     if (node->hasTagName(h1Tag)
691         || node->hasTagName(h2Tag)
692         || node->hasTagName(h3Tag)
693         || node->hasTagName(h4Tag)
694         || node->hasTagName(h5Tag)
695         || node->hasTagName(h6Tag)
696         || node->hasTagName(pTag)) {
697         RenderStyle* style = r->style();
698         if (style) {
699             int bottomMargin = toRenderBox(r)->collapsedMarginBottom();
700             int fontSize = style->fontDescription().computedPixelSize();
701             if (bottomMargin * 2 >= fontSize)
702                 return true;
703         }
704     }
705 
706     return false;
707 }
708 
709 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic).
shouldRepresentNodeOffsetZero()710 bool TextIterator::shouldRepresentNodeOffsetZero()
711 {
712     if (m_emitCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isTable())
713         return true;
714 
715     // Leave element positioned flush with start of a paragraph
716     // (e.g. do not insert tab before a table cell at the start of a paragraph)
717     if (m_lastCharacter == '\n')
718         return false;
719 
720     // Otherwise, show the position if we have emitted any characters
721     if (m_haveEmitted)
722         return true;
723 
724     // We've not emitted anything yet. Generally, there is no need for any positioning then.
725     // The only exception is when the element is visually not in the same line as
726     // the start of the range (e.g. the range starts at the end of the previous paragraph).
727     // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we
728     // make quicker checks to possibly avoid that. Another check that we could make is
729     // is whether the inline vs block flow changed since the previous visible element.
730     // I think we're already in a special enough case that that won't be needed, tho.
731 
732     // No character needed if this is the first node in the range.
733     if (m_node == m_startContainer)
734         return false;
735 
736     // If we are outside the start container's subtree, assume we need to emit.
737     // FIXME: m_startContainer could be an inline block
738     if (!m_node->isDescendantOf(m_startContainer))
739         return true;
740 
741     // If we started as m_startContainer offset 0 and the current node is a descendant of
742     // the start container, we already had enough context to correctly decide whether to
743     // emit after a preceding block. We chose not to emit (m_haveEmitted is false),
744     // so don't second guess that now.
745     // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably
746     // immaterial since we likely would have already emitted something by now.
747     if (m_startOffset == 0)
748         return false;
749 
750     // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning.
751     // Additionally, if the range we are iterating over contains huge sections of unrendered content,
752     // we would create VisiblePositions on every call to this function without this check.
753     if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE)
754         return false;
755 
756     // The startPos.isNotNull() check is needed because the start could be before the body,
757     // and in that case we'll get null. We don't want to put in newlines at the start in that case.
758     // The currPos.isNotNull() check is needed because positions in non-HTML content
759     // (like SVG) do not have visible positions, and we don't want to emit for them either.
760     VisiblePosition startPos = VisiblePosition(m_startContainer, m_startOffset, DOWNSTREAM);
761     VisiblePosition currPos = VisiblePosition(m_node, 0, DOWNSTREAM);
762     return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos);
763 }
764 
shouldEmitSpaceBeforeAndAfterNode(Node * node)765 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node)
766 {
767     return node->renderer() && node->renderer()->isTable() && (node->renderer()->isInline() || m_emitCharactersBetweenAllVisiblePositions);
768 }
769 
representNodeOffsetZero()770 void TextIterator::representNodeOffsetZero()
771 {
772     // Emit a character to show the positioning of m_node.
773 
774     // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can
775     // create VisiblePositions, which is expensive.  So, we perform the inexpensive checks
776     // on m_node to see if it necessitates emitting a character first and will early return
777     // before encountering shouldRepresentNodeOffsetZero()s worse case behavior.
778     if (shouldEmitTabBeforeNode(m_node)) {
779         if (shouldRepresentNodeOffsetZero())
780             emitCharacter('\t', m_node->parentNode(), m_node, 0, 0);
781     } else if (shouldEmitNewlineBeforeNode(m_node)) {
782         if (shouldRepresentNodeOffsetZero())
783             emitCharacter('\n', m_node->parentNode(), m_node, 0, 0);
784     } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) {
785         if (shouldRepresentNodeOffsetZero())
786             emitCharacter(' ', m_node->parentNode(), m_node, 0, 0);
787     }
788 }
789 
handleNonTextNode()790 bool TextIterator::handleNonTextNode()
791 {
792     if (shouldEmitNewlineForNode(m_node))
793         emitCharacter('\n', m_node->parentNode(), m_node, 0, 1);
794     else if (m_emitCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR())
795         emitCharacter(' ', m_node->parentNode(), m_node, 0, 1);
796     else
797         representNodeOffsetZero();
798 
799     return true;
800 }
801 
exitNode()802 void TextIterator::exitNode()
803 {
804     // prevent emitting a newline when exiting a collapsed block at beginning of the range
805     // FIXME: !m_haveEmitted does not necessarily mean there was a collapsed block... it could
806     // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and
807     // therefore look like a blank line.
808     if (!m_haveEmitted)
809         return;
810 
811     // Emit with a position *inside* m_node, after m_node's contents, in
812     // case it is a block, because the run should start where the
813     // emitted character is positioned visually.
814     Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node;
815     // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making
816     // the logic in _web_attributedStringFromRange match.  We'll get that for free when we switch to use
817     // TextIterator in _web_attributedStringFromRange.
818     // See <rdar://problem/5428427> for an example of how this mismatch will cause problems.
819     if (m_lastTextNode && shouldEmitNewlineAfterNode(m_node)) {
820         // use extra newline to represent margin bottom, as needed
821         bool addNewline = shouldEmitExtraNewlineForNode(m_node);
822 
823         // FIXME: We need to emit a '\n' as we leave an empty block(s) that
824         // contain a VisiblePosition when doing selection preservation.
825         if (m_lastCharacter != '\n') {
826             // insert a newline with a position following this block's contents.
827             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
828             // remember whether to later add a newline for the current node
829             ASSERT(!m_needAnotherNewline);
830             m_needAnotherNewline = addNewline;
831         } else if (addNewline)
832             // insert a newline with a position following this block's contents.
833             emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
834     }
835 
836     // If nothing was emitted, see if we need to emit a space.
837     if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node))
838         emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1);
839 }
840 
emitCharacter(UChar c,Node * textNode,Node * offsetBaseNode,int textStartOffset,int textEndOffset)841 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset)
842 {
843     m_haveEmitted = true;
844 
845     // remember information with which to construct the TextIterator::range()
846     // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode
847     m_positionNode = textNode;
848     m_positionOffsetBaseNode = offsetBaseNode;
849     m_positionStartOffset = textStartOffset;
850     m_positionEndOffset = textEndOffset;
851 
852     // remember information with which to construct the TextIterator::characters() and length()
853     m_singleCharacterBuffer = c;
854     m_textCharacters = &m_singleCharacterBuffer;
855     m_textLength = 1;
856 
857     // remember some iteration state
858     m_lastTextNodeEndedWithCollapsedSpace = false;
859     m_lastCharacter = c;
860 }
861 
emitText(Node * textNode,int textStartOffset,int textEndOffset)862 void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset)
863 {
864     RenderText* renderer = toRenderText(m_node->renderer());
865     String str = renderer->text();
866     ASSERT(str.characters());
867 
868     m_positionNode = textNode;
869     m_positionOffsetBaseNode = 0;
870     m_positionStartOffset = textStartOffset;
871     m_positionEndOffset = textEndOffset;
872     m_textCharacters = str.characters() + textStartOffset;
873     m_textLength = textEndOffset - textStartOffset;
874     m_lastCharacter = str[textEndOffset - 1];
875 
876     m_lastTextNodeEndedWithCollapsedSpace = false;
877     m_haveEmitted = true;
878 }
879 
range() const880 PassRefPtr<Range> TextIterator::range() const
881 {
882     // use the current run information, if we have it
883     if (m_positionNode) {
884         if (m_positionOffsetBaseNode) {
885             int index = m_positionOffsetBaseNode->nodeIndex();
886             m_positionStartOffset += index;
887             m_positionEndOffset += index;
888             m_positionOffsetBaseNode = 0;
889         }
890         return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
891     }
892 
893     // otherwise, return the end of the overall range we were given
894     if (m_endContainer)
895         return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset);
896 
897     return 0;
898 }
899 
node() const900 Node* TextIterator::node() const
901 {
902     RefPtr<Range> textRange = range();
903     if (!textRange)
904         return 0;
905 
906     Node* node = textRange->startContainer();
907     if (!node)
908         return 0;
909     if (node->offsetInCharacters())
910         return node;
911 
912     return node->childNode(textRange->startOffset());
913 }
914 
915 // --------
916 
SimplifiedBackwardsTextIterator()917 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator()
918     : m_positionNode(0)
919 {
920 }
921 
SimplifiedBackwardsTextIterator(const Range * r)922 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r)
923     : m_positionNode(0)
924 {
925     if (!r)
926         return;
927 
928     Node* startNode = r->startContainer();
929     if (!startNode)
930         return;
931     Node* endNode = r->endContainer();
932     int startOffset = r->startOffset();
933     int endOffset = r->endOffset();
934 
935     if (!startNode->offsetInCharacters()) {
936         if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) {
937             startNode = startNode->childNode(startOffset);
938             startOffset = 0;
939         }
940     }
941     if (!endNode->offsetInCharacters()) {
942         if (endOffset > 0 && endOffset <= static_cast<int>(endNode->childNodeCount())) {
943             endNode = endNode->childNode(endOffset - 1);
944             endOffset = endNode->offsetInCharacters() ? endNode->maxCharacterOffset() : endNode->childNodeCount();
945         }
946     }
947 
948     m_node = endNode;
949     setUpFullyClippedStack(m_fullyClippedStack, m_node);
950     m_offset = endOffset;
951     m_handledNode = false;
952     m_handledChildren = endOffset == 0;
953 
954     m_startNode = startNode;
955     m_startOffset = startOffset;
956     m_endNode = endNode;
957     m_endOffset = endOffset;
958 
959 #ifndef NDEBUG
960     // Need this just because of the assert.
961     m_positionNode = endNode;
962 #endif
963 
964     m_lastTextNode = 0;
965     m_lastCharacter = '\n';
966 
967     m_pastStartNode = previousInPostOrderCrossingShadowBoundaries(startNode, startOffset);
968 
969     advance();
970 }
971 
advance()972 void SimplifiedBackwardsTextIterator::advance()
973 {
974     ASSERT(m_positionNode);
975 
976     m_positionNode = 0;
977     m_textLength = 0;
978 
979     while (m_node && m_node != m_pastStartNode) {
980         // Don't handle node if we start iterating at [node, 0].
981         if (!m_handledNode && !(m_node == m_endNode && m_endOffset == 0)) {
982             RenderObject* renderer = m_node->renderer();
983             if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) {
984                 // FIXME: What about CDATA_SECTION_NODE?
985                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
986                     m_handledNode = handleTextNode();
987             } else if (renderer && (renderer->isImage() || renderer->isWidget())) {
988                 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
989                     m_handledNode = handleReplacedElement();
990             } else
991                 m_handledNode = handleNonTextNode();
992             if (m_positionNode)
993                 return;
994         }
995 
996         Node* next = m_handledChildren ? 0 : m_node->lastChild();
997         if (!next) {
998             // Exit empty containers as we pass over them or containers
999             // where [container, 0] is where we started iterating.
1000             if (!m_handledNode &&
1001                 canHaveChildrenForEditing(m_node) &&
1002                 m_node->parentNode() &&
1003                 (!m_node->lastChild() || (m_node == m_endNode && m_endOffset == 0))) {
1004                 exitNode();
1005                 if (m_positionNode) {
1006                     m_handledNode = true;
1007                     m_handledChildren = true;
1008                     return;
1009                 }
1010             }
1011             // Exit all other containers.
1012             next = m_node->previousSibling();
1013             while (!next) {
1014                 Node* parentNode = parentCrossingShadowBoundaries(m_node);
1015                 if (!parentNode)
1016                     break;
1017                 m_node = parentNode;
1018                 m_fullyClippedStack.pop();
1019                 exitNode();
1020                 if (m_positionNode) {
1021                     m_handledNode = true;
1022                     m_handledChildren = true;
1023                     return;
1024                 }
1025                 next = m_node->previousSibling();
1026             }
1027             m_fullyClippedStack.pop();
1028         }
1029 
1030         m_node = next;
1031         if (m_node)
1032             pushFullyClippedState(m_fullyClippedStack, m_node);
1033         m_offset = m_node ? caretMaxOffset(m_node) : 0;
1034         m_handledNode = false;
1035         m_handledChildren = false;
1036 
1037         if (m_positionNode)
1038             return;
1039     }
1040 }
1041 
handleTextNode()1042 bool SimplifiedBackwardsTextIterator::handleTextNode()
1043 {
1044     m_lastTextNode = m_node;
1045 
1046     RenderText* renderer = toRenderText(m_node->renderer());
1047     String str = renderer->text();
1048 
1049     if (!renderer->firstTextBox() && str.length() > 0)
1050         return true;
1051 
1052     m_positionEndOffset = m_offset;
1053 
1054     m_offset = (m_node == m_startNode) ? m_startOffset : 0;
1055     m_positionNode = m_node;
1056     m_positionStartOffset = m_offset;
1057     m_textLength = m_positionEndOffset - m_positionStartOffset;
1058     m_textCharacters = str.characters() + m_positionStartOffset;
1059 
1060     m_lastCharacter = str[m_positionEndOffset - 1];
1061 
1062     return true;
1063 }
1064 
handleReplacedElement()1065 bool SimplifiedBackwardsTextIterator::handleReplacedElement()
1066 {
1067     unsigned index = m_node->nodeIndex();
1068     // We want replaced elements to behave like punctuation for boundary
1069     // finding, and to simply take up space for the selection preservation
1070     // code in moveParagraphs, so we use a comma.  Unconditionally emit
1071     // here because this iterator is only used for boundary finding.
1072     emitCharacter(',', m_node->parentNode(), index, index + 1);
1073     return true;
1074 }
1075 
handleNonTextNode()1076 bool SimplifiedBackwardsTextIterator::handleNonTextNode()
1077 {
1078     // We can use a linefeed in place of a tab because this simple iterator is only used to
1079     // find boundaries, not actual content.  A linefeed breaks words, sentences, and paragraphs.
1080     if (shouldEmitNewlineForNode(m_node) || shouldEmitNewlineAfterNode(m_node) || shouldEmitTabBeforeNode(m_node)) {
1081         unsigned index = m_node->nodeIndex();
1082         // The start of this emitted range is wrong. Ensuring correctness would require
1083         // VisiblePositions and so would be slow. previousBoundary expects this.
1084         emitCharacter('\n', m_node->parentNode(), index + 1, index + 1);
1085     }
1086     return true;
1087 }
1088 
exitNode()1089 void SimplifiedBackwardsTextIterator::exitNode()
1090 {
1091     if (shouldEmitNewlineForNode(m_node) || shouldEmitNewlineBeforeNode(m_node) || shouldEmitTabBeforeNode(m_node)) {
1092         // The start of this emitted range is wrong. Ensuring correctness would require
1093         // VisiblePositions and so would be slow. previousBoundary expects this.
1094         emitCharacter('\n', m_node, 0, 0);
1095     }
1096 }
1097 
emitCharacter(UChar c,Node * node,int startOffset,int endOffset)1098 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset)
1099 {
1100     m_singleCharacterBuffer = c;
1101     m_positionNode = node;
1102     m_positionStartOffset = startOffset;
1103     m_positionEndOffset = endOffset;
1104     m_textCharacters = &m_singleCharacterBuffer;
1105     m_textLength = 1;
1106     m_lastCharacter = c;
1107 }
1108 
range() const1109 PassRefPtr<Range> SimplifiedBackwardsTextIterator::range() const
1110 {
1111     if (m_positionNode)
1112         return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
1113 
1114     return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset);
1115 }
1116 
1117 // --------
1118 
CharacterIterator()1119 CharacterIterator::CharacterIterator()
1120     : m_offset(0)
1121     , m_runOffset(0)
1122     , m_atBreak(true)
1123 {
1124 }
1125 
CharacterIterator(const Range * r,bool emitCharactersBetweenAllVisiblePositions,bool enterTextControls)1126 CharacterIterator::CharacterIterator(const Range* r, bool emitCharactersBetweenAllVisiblePositions, bool enterTextControls)
1127     : m_offset(0)
1128     , m_runOffset(0)
1129     , m_atBreak(true)
1130     , m_textIterator(r, emitCharactersBetweenAllVisiblePositions, enterTextControls)
1131 {
1132     while (!atEnd() && m_textIterator.length() == 0)
1133         m_textIterator.advance();
1134 }
1135 
range() const1136 PassRefPtr<Range> CharacterIterator::range() const
1137 {
1138     RefPtr<Range> r = m_textIterator.range();
1139     if (!m_textIterator.atEnd()) {
1140         if (m_textIterator.length() <= 1) {
1141             ASSERT(m_runOffset == 0);
1142         } else {
1143             Node* n = r->startContainer();
1144             ASSERT(n == r->endContainer());
1145             int offset = r->startOffset() + m_runOffset;
1146             ExceptionCode ec = 0;
1147             r->setStart(n, offset, ec);
1148             r->setEnd(n, offset + 1, ec);
1149             ASSERT(!ec);
1150         }
1151     }
1152     return r.release();
1153 }
1154 
advance(int count)1155 void CharacterIterator::advance(int count)
1156 {
1157     if (count <= 0) {
1158         ASSERT(count == 0);
1159         return;
1160     }
1161 
1162     m_atBreak = false;
1163 
1164     // easy if there is enough left in the current m_textIterator run
1165     int remaining = m_textIterator.length() - m_runOffset;
1166     if (count < remaining) {
1167         m_runOffset += count;
1168         m_offset += count;
1169         return;
1170     }
1171 
1172     // exhaust the current m_textIterator run
1173     count -= remaining;
1174     m_offset += remaining;
1175 
1176     // move to a subsequent m_textIterator run
1177     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
1178         int runLength = m_textIterator.length();
1179         if (runLength == 0)
1180             m_atBreak = true;
1181         else {
1182             // see whether this is m_textIterator to use
1183             if (count < runLength) {
1184                 m_runOffset = count;
1185                 m_offset += count;
1186                 return;
1187             }
1188 
1189             // exhaust this m_textIterator run
1190             count -= runLength;
1191             m_offset += runLength;
1192         }
1193     }
1194 
1195     // ran to the end of the m_textIterator... no more runs left
1196     m_atBreak = true;
1197     m_runOffset = 0;
1198 }
1199 
string(int numChars)1200 String CharacterIterator::string(int numChars)
1201 {
1202     Vector<UChar> result;
1203     result.reserveInitialCapacity(numChars);
1204     while (numChars > 0 && !atEnd()) {
1205         int runSize = min(numChars, length());
1206         result.append(characters(), runSize);
1207         numChars -= runSize;
1208         advance(runSize);
1209     }
1210     return String::adopt(result);
1211 }
1212 
characterSubrange(CharacterIterator & it,int offset,int length)1213 static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length)
1214 {
1215     it.advance(offset);
1216     RefPtr<Range> start = it.range();
1217 
1218     if (length > 1)
1219         it.advance(length - 1);
1220     RefPtr<Range> end = it.range();
1221 
1222     return Range::create(start->startContainer()->document(),
1223         start->startContainer(), start->startOffset(),
1224         end->endContainer(), end->endOffset());
1225 }
1226 
BackwardsCharacterIterator()1227 BackwardsCharacterIterator::BackwardsCharacterIterator()
1228     : m_offset(0)
1229     , m_runOffset(0)
1230     , m_atBreak(true)
1231 {
1232 }
1233 
BackwardsCharacterIterator(const Range * range)1234 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range)
1235     : m_offset(0)
1236     , m_runOffset(0)
1237     , m_atBreak(true)
1238     , m_textIterator(range)
1239 {
1240     while (!atEnd() && !m_textIterator.length())
1241         m_textIterator.advance();
1242 }
1243 
range() const1244 PassRefPtr<Range> BackwardsCharacterIterator::range() const
1245 {
1246     RefPtr<Range> r = m_textIterator.range();
1247     if (!m_textIterator.atEnd()) {
1248         if (m_textIterator.length() <= 1)
1249             ASSERT(m_runOffset == 0);
1250         else {
1251             Node* n = r->startContainer();
1252             ASSERT(n == r->endContainer());
1253             int offset = r->endOffset() - m_runOffset;
1254             ExceptionCode ec = 0;
1255             r->setStart(n, offset - 1, ec);
1256             r->setEnd(n, offset, ec);
1257             ASSERT(!ec);
1258         }
1259     }
1260     return r.release();
1261 }
1262 
advance(int count)1263 void BackwardsCharacterIterator::advance(int count)
1264 {
1265     if (count <= 0) {
1266         ASSERT(!count);
1267         return;
1268     }
1269 
1270     m_atBreak = false;
1271 
1272     int remaining = m_textIterator.length() - m_runOffset;
1273     if (count < remaining) {
1274         m_runOffset += count;
1275         m_offset += count;
1276         return;
1277     }
1278 
1279     count -= remaining;
1280     m_offset += remaining;
1281 
1282     for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
1283         int runLength = m_textIterator.length();
1284         if (runLength == 0)
1285             m_atBreak = true;
1286         else {
1287             if (count < runLength) {
1288                 m_runOffset = count;
1289                 m_offset += count;
1290                 return;
1291             }
1292 
1293             count -= runLength;
1294             m_offset += runLength;
1295         }
1296     }
1297 
1298     m_atBreak = true;
1299     m_runOffset = 0;
1300 }
1301 
1302 // --------
1303 
WordAwareIterator()1304 WordAwareIterator::WordAwareIterator()
1305     : m_previousText(0)
1306     , m_didLookAhead(false)
1307 {
1308 }
1309 
WordAwareIterator(const Range * r)1310 WordAwareIterator::WordAwareIterator(const Range* r)
1311     : m_previousText(0)
1312     , m_didLookAhead(true) // so we consider the first chunk from the text iterator
1313     , m_textIterator(r)
1314 {
1315     advance(); // get in position over the first chunk of text
1316 }
1317 
1318 // We're always in one of these modes:
1319 // - The current chunk in the text iterator is our current chunk
1320 //      (typically its a piece of whitespace, or text that ended with whitespace)
1321 // - The previous chunk in the text iterator is our current chunk
1322 //      (we looked ahead to the next chunk and found a word boundary)
1323 // - We built up our own chunk of text from many chunks from the text iterator
1324 
1325 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries.
1326 
advance()1327 void WordAwareIterator::advance()
1328 {
1329     m_previousText = 0;
1330     m_buffer.clear();      // toss any old buffer we built up
1331 
1332     // If last time we did a look-ahead, start with that looked-ahead chunk now
1333     if (!m_didLookAhead) {
1334         ASSERT(!m_textIterator.atEnd());
1335         m_textIterator.advance();
1336     }
1337     m_didLookAhead = false;
1338 
1339     // Go to next non-empty chunk
1340     while (!m_textIterator.atEnd() && m_textIterator.length() == 0)
1341         m_textIterator.advance();
1342     m_range = m_textIterator.range();
1343 
1344     if (m_textIterator.atEnd())
1345         return;
1346 
1347     while (1) {
1348         // If this chunk ends in whitespace we can just use it as our chunk.
1349         if (isSpaceOrNewline(m_textIterator.characters()[m_textIterator.length() - 1]))
1350             return;
1351 
1352         // If this is the first chunk that failed, save it in previousText before look ahead
1353         if (m_buffer.isEmpty()) {
1354             m_previousText = m_textIterator.characters();
1355             m_previousLength = m_textIterator.length();
1356         }
1357 
1358         // Look ahead to next chunk.  If it is whitespace or a break, we can use the previous stuff
1359         m_textIterator.advance();
1360         if (m_textIterator.atEnd() || m_textIterator.length() == 0 || isSpaceOrNewline(m_textIterator.characters()[0])) {
1361             m_didLookAhead = true;
1362             return;
1363         }
1364 
1365         if (m_buffer.isEmpty()) {
1366             // Start gobbling chunks until we get to a suitable stopping point
1367             m_buffer.append(m_previousText, m_previousLength);
1368             m_previousText = 0;
1369         }
1370         m_buffer.append(m_textIterator.characters(), m_textIterator.length());
1371         int exception = 0;
1372         m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), exception);
1373     }
1374 }
1375 
length() const1376 int WordAwareIterator::length() const
1377 {
1378     if (!m_buffer.isEmpty())
1379         return m_buffer.size();
1380     if (m_previousText)
1381         return m_previousLength;
1382     return m_textIterator.length();
1383 }
1384 
characters() const1385 const UChar* WordAwareIterator::characters() const
1386 {
1387     if (!m_buffer.isEmpty())
1388         return m_buffer.data();
1389     if (m_previousText)
1390         return m_previousText;
1391     return m_textIterator.characters();
1392 }
1393 
1394 // --------
1395 
foldQuoteMark(UChar c)1396 static inline UChar foldQuoteMark(UChar c)
1397 {
1398     switch (c) {
1399         case hebrewPunctuationGershayim:
1400         case leftDoubleQuotationMark:
1401         case rightDoubleQuotationMark:
1402             return '"';
1403         case hebrewPunctuationGeresh:
1404         case leftSingleQuotationMark:
1405         case rightSingleQuotationMark:
1406             return '\'';
1407         default:
1408             return c;
1409     }
1410 }
1411 
foldQuoteMarks(String & s)1412 static inline void foldQuoteMarks(String& s)
1413 {
1414     s.replace(hebrewPunctuationGeresh, '\'');
1415     s.replace(hebrewPunctuationGershayim, '"');
1416     s.replace(leftDoubleQuotationMark, '"');
1417     s.replace(leftSingleQuotationMark, '\'');
1418     s.replace(rightDoubleQuotationMark, '"');
1419     s.replace(rightSingleQuotationMark, '\'');
1420 }
1421 
1422 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
1423 
foldQuoteMarks(UChar * data,size_t length)1424 static inline void foldQuoteMarks(UChar* data, size_t length)
1425 {
1426     for (size_t i = 0; i < length; ++i)
1427         data[i] = foldQuoteMark(data[i]);
1428 }
1429 
1430 static const size_t minimumSearchBufferSize = 8192;
1431 
1432 #ifndef NDEBUG
1433 static bool searcherInUse;
1434 #endif
1435 
createSearcher()1436 static UStringSearch* createSearcher()
1437 {
1438     // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
1439     // but it doesn't matter exactly what it is, since we don't perform any searches
1440     // without setting both the pattern and the text.
1441     UErrorCode status = U_ZERO_ERROR;
1442     UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, currentSearchLocaleID(), 0, &status);
1443     ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
1444     return searcher;
1445 }
1446 
searcher()1447 static UStringSearch* searcher()
1448 {
1449     static UStringSearch* searcher = createSearcher();
1450     return searcher;
1451 }
1452 
lockSearcher()1453 static inline void lockSearcher()
1454 {
1455 #ifndef NDEBUG
1456     ASSERT(!searcherInUse);
1457     searcherInUse = true;
1458 #endif
1459 }
1460 
unlockSearcher()1461 static inline void unlockSearcher()
1462 {
1463 #ifndef NDEBUG
1464     ASSERT(searcherInUse);
1465     searcherInUse = false;
1466 #endif
1467 }
1468 
SearchBuffer(const String & target,bool isCaseSensitive)1469 inline SearchBuffer::SearchBuffer(const String& target, bool isCaseSensitive)
1470     : m_target(target)
1471     , m_atBreak(true)
1472 {
1473     ASSERT(!m_target.isEmpty());
1474 
1475     // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
1476     // of doing it in a separate replacement pass here, but ICU doesn't offer a way
1477     // to add tailoring on top of the locale-specific tailoring as of this writing.
1478     foldQuoteMarks(m_target);
1479 
1480     size_t targetLength = m_target.length();
1481     m_buffer.reserveInitialCapacity(max(targetLength * 8, minimumSearchBufferSize));
1482     m_overlap = m_buffer.capacity() / 4;
1483 
1484     // Grab the single global searcher.
1485     // If we ever have a reason to do more than once search buffer at once, we'll have
1486     // to move to multiple searchers.
1487     lockSearcher();
1488 
1489     UStringSearch* searcher = WebCore::searcher();
1490     UCollator* collator = usearch_getCollator(searcher);
1491 
1492     UCollationStrength strength = isCaseSensitive ? UCOL_TERTIARY : UCOL_PRIMARY;
1493     if (ucol_getStrength(collator) != strength) {
1494         ucol_setStrength(collator, strength);
1495         usearch_reset(searcher);
1496     }
1497 
1498     UErrorCode status = U_ZERO_ERROR;
1499     usearch_setPattern(searcher, m_target.characters(), targetLength, &status);
1500     ASSERT(status == U_ZERO_ERROR);
1501 }
1502 
~SearchBuffer()1503 inline SearchBuffer::~SearchBuffer()
1504 {
1505     unlockSearcher();
1506 }
1507 
append(const UChar * characters,size_t length)1508 inline size_t SearchBuffer::append(const UChar* characters, size_t length)
1509 {
1510     ASSERT(length);
1511 
1512     if (m_atBreak) {
1513         m_buffer.shrink(0);
1514         m_atBreak = false;
1515     } else if (m_buffer.size() == m_buffer.capacity()) {
1516         memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
1517         m_buffer.shrink(m_overlap);
1518     }
1519 
1520     size_t oldLength = m_buffer.size();
1521     size_t usableLength = min(m_buffer.capacity() - oldLength, length);
1522     ASSERT(usableLength);
1523     m_buffer.append(characters, usableLength);
1524     foldQuoteMarks(m_buffer.data() + oldLength, usableLength);
1525     return usableLength;
1526 }
1527 
atBreak() const1528 inline bool SearchBuffer::atBreak() const
1529 {
1530     return m_atBreak;
1531 }
1532 
reachedBreak()1533 inline void SearchBuffer::reachedBreak()
1534 {
1535     m_atBreak = true;
1536 }
1537 
search(size_t & start)1538 inline size_t SearchBuffer::search(size_t& start)
1539 {
1540     size_t size = m_buffer.size();
1541     if (m_atBreak) {
1542         if (!size)
1543             return 0;
1544     } else {
1545         if (size != m_buffer.capacity())
1546             return 0;
1547     }
1548 
1549     UStringSearch* searcher = WebCore::searcher();
1550 
1551     UErrorCode status = U_ZERO_ERROR;
1552     usearch_setText(searcher, m_buffer.data(), size, &status);
1553     ASSERT(status == U_ZERO_ERROR);
1554 
1555     int matchStart = usearch_first(searcher, &status);
1556     ASSERT(status == U_ZERO_ERROR);
1557     if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
1558         ASSERT(matchStart == USEARCH_DONE);
1559         return 0;
1560     }
1561 
1562     // Matches that start in the overlap area are only tentative.
1563     // The same match may appear later, matching more characters,
1564     // possibly including a combining character that's not yet in the buffer.
1565     if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
1566         memcpy(m_buffer.data(), m_buffer.data() + size - m_overlap, m_overlap * sizeof(UChar));
1567         m_buffer.shrink(m_overlap);
1568         return 0;
1569     }
1570 
1571     size_t newSize = size - (matchStart + 1);
1572     memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
1573     m_buffer.shrink(newSize);
1574 
1575     start = size - matchStart;
1576     return usearch_getMatchedLength(searcher);
1577 }
1578 
1579 #else // !ICU_UNICODE
1580 
SearchBuffer(const String & target,bool isCaseSensitive)1581 inline SearchBuffer::SearchBuffer(const String& target, bool isCaseSensitive)
1582     : m_target(isCaseSensitive ? target : target.foldCase())
1583     , m_isCaseSensitive(isCaseSensitive)
1584     , m_buffer(m_target.length())
1585     , m_isCharacterStartBuffer(m_target.length())
1586     , m_isBufferFull(false)
1587     , m_cursor(0)
1588 {
1589     ASSERT(!m_target.isEmpty());
1590     m_target.replace(noBreakSpace, ' ');
1591     foldQuoteMarks(m_target);
1592 }
1593 
~SearchBuffer()1594 inline SearchBuffer::~SearchBuffer()
1595 {
1596 }
1597 
reachedBreak()1598 inline void SearchBuffer::reachedBreak()
1599 {
1600     m_cursor = 0;
1601     m_isBufferFull = false;
1602 }
1603 
atBreak() const1604 inline bool SearchBuffer::atBreak() const
1605 {
1606     return !m_cursor && !m_isBufferFull;
1607 }
1608 
append(UChar c,bool isStart)1609 inline void SearchBuffer::append(UChar c, bool isStart)
1610 {
1611     m_buffer[m_cursor] = c == noBreakSpace ? ' ' : foldQuoteMark(c);
1612     m_isCharacterStartBuffer[m_cursor] = isStart;
1613     if (++m_cursor == m_target.length()) {
1614         m_cursor = 0;
1615         m_isBufferFull = true;
1616     }
1617 }
1618 
append(const UChar * characters,size_t length)1619 inline size_t SearchBuffer::append(const UChar* characters, size_t length)
1620 {
1621     ASSERT(length);
1622     if (m_isCaseSensitive) {
1623         append(characters[0], true);
1624         return 1;
1625     }
1626     const int maxFoldedCharacters = 16; // sensible maximum is 3, this should be more than enough
1627     UChar foldedCharacters[maxFoldedCharacters];
1628     bool error;
1629     int numFoldedCharacters = foldCase(foldedCharacters, maxFoldedCharacters, characters, 1, &error);
1630     ASSERT(!error);
1631     ASSERT(numFoldedCharacters);
1632     ASSERT(numFoldedCharacters <= maxFoldedCharacters);
1633     if (!error && numFoldedCharacters) {
1634         numFoldedCharacters = min(numFoldedCharacters, maxFoldedCharacters);
1635         append(foldedCharacters[0], true);
1636         for (int i = 1; i < numFoldedCharacters; ++i)
1637             append(foldedCharacters[i], false);
1638     }
1639     return 1;
1640 }
1641 
search(size_t & start)1642 inline size_t SearchBuffer::search(size_t& start)
1643 {
1644     if (!m_isBufferFull)
1645         return 0;
1646     if (!m_isCharacterStartBuffer[m_cursor])
1647         return 0;
1648 
1649     size_t tailSpace = m_target.length() - m_cursor;
1650     if (memcmp(&m_buffer[m_cursor], m_target.characters(), tailSpace * sizeof(UChar)) != 0)
1651         return 0;
1652     if (memcmp(&m_buffer[0], m_target.characters() + tailSpace, m_cursor * sizeof(UChar)) != 0)
1653         return 0;
1654 
1655     start = length();
1656 
1657     // Now that we've found a match once, we don't want to find it again, because those
1658     // are the SearchBuffer semantics, allowing for a buffer where you append more than one
1659     // character at a time. To do this we take advantage of m_isCharacterStartBuffer, but if
1660     // we want to get rid of that in the future we could track this with a separate boolean
1661     // or even move the characters to the start of the buffer and set m_isBufferFull to false.
1662     m_isCharacterStartBuffer[m_cursor] = false;
1663 
1664     return start;
1665 }
1666 
1667 // Returns the number of characters that were appended to the buffer (what we are searching in).
1668 // That's not necessarily the same length as the passed-in target string, because case folding
1669 // can make two strings match even though they're not the same length.
length() const1670 size_t SearchBuffer::length() const
1671 {
1672     size_t bufferSize = m_target.length();
1673     size_t length = 0;
1674     for (size_t i = 0; i < bufferSize; ++i)
1675         length += m_isCharacterStartBuffer[i];
1676     return length;
1677 }
1678 
1679 #endif // !ICU_UNICODE
1680 
1681 // --------
1682 
rangeLength(const Range * r,bool forSelectionPreservation)1683 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation)
1684 {
1685     int length = 0;
1686     for (TextIterator it(r, forSelectionPreservation); !it.atEnd(); it.advance())
1687         length += it.length();
1688 
1689     return length;
1690 }
1691 
subrange(Range * entireRange,int characterOffset,int characterCount)1692 PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount)
1693 {
1694     CharacterIterator entireRangeIterator(entireRange);
1695     return characterSubrange(entireRangeIterator, characterOffset, characterCount);
1696 }
1697 
rangeFromLocationAndLength(Element * scope,int rangeLocation,int rangeLength,bool forSelectionPreservation)1698 PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool forSelectionPreservation)
1699 {
1700     RefPtr<Range> resultRange = scope->document()->createRange();
1701 
1702     int docTextPosition = 0;
1703     int rangeEnd = rangeLocation + rangeLength;
1704     bool startRangeFound = false;
1705 
1706     RefPtr<Range> textRunRange;
1707 
1708     TextIterator it(rangeOfContents(scope).get(), forSelectionPreservation);
1709 
1710     // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>.
1711     if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) {
1712         textRunRange = it.range();
1713 
1714         ExceptionCode ec = 0;
1715         resultRange->setStart(textRunRange->startContainer(), 0, ec);
1716         ASSERT(!ec);
1717         resultRange->setEnd(textRunRange->startContainer(), 0, ec);
1718         ASSERT(!ec);
1719 
1720         return resultRange.release();
1721     }
1722 
1723     for (; !it.atEnd(); it.advance()) {
1724         int len = it.length();
1725         textRunRange = it.range();
1726 
1727         bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + len;
1728         bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + len;
1729 
1730         // Fix textRunRange->endPosition(), but only if foundStart || foundEnd, because it is only
1731         // in those cases that textRunRange is used.
1732         if (foundStart || foundEnd) {
1733             // FIXME: This is a workaround for the fact that the end of a run is often at the wrong
1734             // position for emitted '\n's.
1735             if (len == 1 && it.characters()[0] == '\n') {
1736                 Position runStart = textRunRange->startPosition();
1737                 Position runEnd = VisiblePosition(runStart).next().deepEquivalent();
1738                 if (runEnd.isNotNull()) {
1739                     ExceptionCode ec = 0;
1740                     textRunRange->setEnd(runEnd.node(), runEnd.deprecatedEditingOffset(), ec);
1741                     ASSERT(!ec);
1742                 }
1743             }
1744         }
1745 
1746         if (foundStart) {
1747             startRangeFound = true;
1748             int exception = 0;
1749             if (textRunRange->startContainer()->isTextNode()) {
1750                 int offset = rangeLocation - docTextPosition;
1751                 resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception);
1752             } else {
1753                 if (rangeLocation == docTextPosition)
1754                     resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset(), exception);
1755                 else
1756                     resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset(), exception);
1757             }
1758         }
1759 
1760         if (foundEnd) {
1761             int exception = 0;
1762             if (textRunRange->startContainer()->isTextNode()) {
1763                 int offset = rangeEnd - docTextPosition;
1764                 resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception);
1765             } else {
1766                 if (rangeEnd == docTextPosition)
1767                     resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset(), exception);
1768                 else
1769                     resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception);
1770             }
1771             docTextPosition += len;
1772             break;
1773         }
1774         docTextPosition += len;
1775     }
1776 
1777     if (!startRangeFound)
1778         return 0;
1779 
1780     if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds
1781         int exception = 0;
1782         resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception);
1783     }
1784 
1785     return resultRange.release();
1786 }
1787 
1788 // --------
1789 
plainTextToMallocAllocatedBuffer(const Range * r,unsigned & bufferLength,bool isDisplayString)1790 UChar* plainTextToMallocAllocatedBuffer(const Range* r, unsigned& bufferLength, bool isDisplayString)
1791 {
1792     UChar* result = 0;
1793 
1794     // Do this in pieces to avoid massive reallocations if there is a large amount of text.
1795     // Use system malloc for buffers since they can consume lots of memory and current TCMalloc is unable return it back to OS.
1796     static const unsigned cMaxSegmentSize = 1 << 16;
1797     bufferLength = 0;
1798     typedef pair<UChar*, unsigned> TextSegment;
1799     Vector<TextSegment>* textSegments = 0;
1800     Vector<UChar> textBuffer;
1801     textBuffer.reserveInitialCapacity(cMaxSegmentSize);
1802     for (TextIterator it(r); !it.atEnd(); it.advance()) {
1803         if (textBuffer.size() && textBuffer.size() + it.length() > cMaxSegmentSize) {
1804             UChar* newSegmentBuffer = static_cast<UChar*>(malloc(textBuffer.size() * sizeof(UChar)));
1805             if (!newSegmentBuffer)
1806                 goto exit;
1807             memcpy(newSegmentBuffer, textBuffer.data(), textBuffer.size() * sizeof(UChar));
1808             if (!textSegments)
1809                 textSegments = new Vector<TextSegment>;
1810             textSegments->append(make_pair(newSegmentBuffer, (unsigned)textBuffer.size()));
1811             textBuffer.clear();
1812         }
1813         textBuffer.append(it.characters(), it.length());
1814         bufferLength += it.length();
1815     }
1816 
1817     if (!bufferLength)
1818         return 0;
1819 
1820     // Since we know the size now, we can make a single buffer out of the pieces with one big alloc
1821     result = static_cast<UChar*>(malloc(bufferLength * sizeof(UChar)));
1822     if (!result)
1823         goto exit;
1824 
1825     {
1826         UChar* resultPos = result;
1827         if (textSegments) {
1828             unsigned size = textSegments->size();
1829             for (unsigned i = 0; i < size; ++i) {
1830                 const TextSegment& segment = textSegments->at(i);
1831                 memcpy(resultPos, segment.first, segment.second * sizeof(UChar));
1832                 resultPos += segment.second;
1833             }
1834         }
1835         memcpy(resultPos, textBuffer.data(), textBuffer.size() * sizeof(UChar));
1836     }
1837 
1838 exit:
1839     if (textSegments) {
1840         unsigned size = textSegments->size();
1841         for (unsigned i = 0; i < size; ++i)
1842             free(textSegments->at(i).first);
1843         delete textSegments;
1844     }
1845 
1846     if (isDisplayString && r->ownerDocument())
1847         r->ownerDocument()->displayBufferModifiedByEncoding(result, bufferLength);
1848 
1849     return result;
1850 }
1851 
plainText(const Range * r)1852 String plainText(const Range* r)
1853 {
1854     unsigned length;
1855     UChar* buf = plainTextToMallocAllocatedBuffer(r, length, false);
1856     if (!buf)
1857         return "";
1858     String result(buf, length);
1859     free(buf);
1860     return result;
1861 }
1862 
isAllCollapsibleWhitespace(const String & string)1863 static inline bool isAllCollapsibleWhitespace(const String& string)
1864 {
1865     const UChar* characters = string.characters();
1866     unsigned length = string.length();
1867     for (unsigned i = 0; i < length; ++i) {
1868         if (!isCollapsibleWhitespace(characters[i]))
1869             return false;
1870     }
1871     return true;
1872 }
1873 
collapsedToBoundary(const Range * range,bool forward)1874 static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward)
1875 {
1876     ExceptionCode ec = 0;
1877     RefPtr<Range> result = range->cloneRange(ec);
1878     ASSERT(!ec);
1879     result->collapse(!forward, ec);
1880     ASSERT(!ec);
1881     return result.release();
1882 }
1883 
findPlainText(CharacterIterator & it,const String & target,bool forward,bool caseSensitive,size_t & matchStart)1884 static size_t findPlainText(CharacterIterator& it, const String& target, bool forward, bool caseSensitive, size_t& matchStart)
1885 {
1886     matchStart = 0;
1887     size_t matchLength = 0;
1888 
1889     SearchBuffer buffer(target, caseSensitive);
1890 
1891     while (!it.atEnd()) {
1892         it.advance(buffer.append(it.characters(), it.length()));
1893 tryAgain:
1894         size_t matchStartOffset;
1895         if (size_t newMatchLength = buffer.search(matchStartOffset)) {
1896             // Note that we found a match, and where we found it.
1897             size_t lastCharacterInBufferOffset = it.characterOffset();
1898             ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
1899             matchStart = lastCharacterInBufferOffset - matchStartOffset;
1900             matchLength = newMatchLength;
1901             // If searching forward, stop on the first match.
1902             // If searching backward, don't stop, so we end up with the last match.
1903             if (forward)
1904                 break;
1905             goto tryAgain;
1906         }
1907         if (it.atBreak() && !buffer.atBreak()) {
1908             buffer.reachedBreak();
1909             goto tryAgain;
1910         }
1911     }
1912 
1913     return matchLength;
1914 }
1915 
findPlainText(const Range * range,const String & target,bool forward,bool caseSensitive)1916 PassRefPtr<Range> findPlainText(const Range* range, const String& target, bool forward, bool caseSensitive)
1917 {
1918     // First, find the text.
1919     size_t matchStart;
1920     size_t matchLength;
1921     {
1922         CharacterIterator findIterator(range, false, true);
1923         matchLength = findPlainText(findIterator, target, forward, caseSensitive, matchStart);
1924         if (!matchLength)
1925             return collapsedToBoundary(range, forward);
1926     }
1927 
1928     // Then, find the document position of the start and the end of the text.
1929     CharacterIterator computeRangeIterator(range, false, true);
1930     return characterSubrange(computeRangeIterator, matchStart, matchLength);
1931 }
1932 
1933 }
1934