1 /*
2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2005 Alexey Proskuryakov.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "TextIterator.h"
29
30 #include "CharacterNames.h"
31 #include "Document.h"
32 #include "HTMLElement.h"
33 #include "HTMLNames.h"
34 #include "htmlediting.h"
35 #include "InlineTextBox.h"
36 #include "Position.h"
37 #include "Range.h"
38 #include "RenderTableCell.h"
39 #include "RenderTableRow.h"
40 #include "RenderTextControl.h"
41 #include "VisiblePosition.h"
42 #include "visible_units.h"
43
44 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
45 #include "TextBreakIteratorInternalICU.h"
46 #include <unicode/usearch.h>
47 #endif
48
49 using namespace WTF::Unicode;
50 using namespace std;
51
52 namespace WebCore {
53
54 using namespace HTMLNames;
55
56 // Buffer that knows how to compare with a search target.
57 // Keeps enough of the previous text to be able to search in the future, but no more.
58 // Non-breaking spaces are always equal to normal spaces.
59 // Case folding is also done if <isCaseSensitive> is false.
60 class SearchBuffer : public Noncopyable {
61 public:
62 SearchBuffer(const String& target, bool isCaseSensitive);
63 ~SearchBuffer();
64
65 // Returns number of characters appended; guaranteed to be in the range [1, length].
66 size_t append(const UChar*, size_t length);
67 void reachedBreak();
68
69 // Result is the size in characters of what was found.
70 // And <startOffset> is the number of characters back to the start of what was found.
71 size_t search(size_t& startOffset);
72 bool atBreak() const;
73
74 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
75
76 private:
77 String m_target;
78 Vector<UChar> m_buffer;
79 size_t m_overlap;
80 bool m_atBreak;
81
82 #else
83
84 private:
85 void append(UChar, bool isCharacterStart);
86 size_t length() const;
87
88 String m_target;
89 bool m_isCaseSensitive;
90
91 Vector<UChar> m_buffer;
92 Vector<bool> m_isCharacterStartBuffer;
93 bool m_isBufferFull;
94 size_t m_cursor;
95
96 #endif
97 };
98
99 // --------
100
101 static const unsigned bitsInWord = sizeof(unsigned) * 8;
102 static const unsigned bitInWordMask = bitsInWord - 1;
103
BitStack()104 BitStack::BitStack()
105 : m_size(0)
106 {
107 }
108
push(bool bit)109 void BitStack::push(bool bit)
110 {
111 unsigned index = m_size / bitsInWord;
112 unsigned shift = m_size & bitInWordMask;
113 if (!shift && index == m_words.size()) {
114 m_words.grow(index + 1);
115 m_words[index] = 0;
116 }
117 unsigned& word = m_words[index];
118 unsigned mask = 1U << shift;
119 if (bit)
120 word |= mask;
121 else
122 word &= ~mask;
123 ++m_size;
124 }
125
pop()126 void BitStack::pop()
127 {
128 if (m_size)
129 --m_size;
130 }
131
top() const132 bool BitStack::top() const
133 {
134 if (!m_size)
135 return false;
136 unsigned shift = (m_size - 1) & bitInWordMask;
137 return m_words.last() & (1U << shift);
138 }
139
size() const140 unsigned BitStack::size() const
141 {
142 return m_size;
143 }
144
145 // --------
146
parentCrossingShadowBoundaries(Node * node)147 static inline Node* parentCrossingShadowBoundaries(Node* node)
148 {
149 if (Node* parent = node->parentNode())
150 return parent;
151 return node->shadowParentNode();
152 }
153
154 #ifndef NDEBUG
155
depthCrossingShadowBoundaries(Node * node)156 static unsigned depthCrossingShadowBoundaries(Node* node)
157 {
158 unsigned depth = 0;
159 for (Node* parent = parentCrossingShadowBoundaries(node); parent; parent = parentCrossingShadowBoundaries(parent))
160 ++depth;
161 return depth;
162 }
163
164 #endif
165
166 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees.
nextInPreOrderCrossingShadowBoundaries(Node * rangeEndContainer,int rangeEndOffset)167 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset)
168 {
169 if (!rangeEndContainer)
170 return 0;
171 if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) {
172 if (Node* next = rangeEndContainer->childNode(rangeEndOffset))
173 return next;
174 }
175 for (Node* node = rangeEndContainer; node; node = parentCrossingShadowBoundaries(node)) {
176 if (Node* next = node->nextSibling())
177 return next;
178 }
179 return 0;
180 }
181
previousInPostOrderCrossingShadowBoundaries(Node * rangeStartContainer,int rangeStartOffset)182 static Node* previousInPostOrderCrossingShadowBoundaries(Node* rangeStartContainer, int rangeStartOffset)
183 {
184 if (!rangeStartContainer)
185 return 0;
186 if (rangeStartOffset > 0 && !rangeStartContainer->offsetInCharacters()) {
187 if (Node* previous = rangeStartContainer->childNode(rangeStartOffset - 1))
188 return previous;
189 }
190 for (Node* node = rangeStartContainer; node; node = parentCrossingShadowBoundaries(node)) {
191 if (Node* previous = node->previousSibling())
192 return previous;
193 }
194 return 0;
195 }
196
197 // --------
198
fullyClipsContents(Node * node)199 static inline bool fullyClipsContents(Node* node)
200 {
201 RenderObject* renderer = node->renderer();
202 if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip())
203 return false;
204 return toRenderBox(renderer)->size().isEmpty();
205 }
206
ignoresContainerClip(Node * node)207 static inline bool ignoresContainerClip(Node* node)
208 {
209 RenderObject* renderer = node->renderer();
210 if (!renderer || renderer->isText())
211 return false;
212 EPosition position = renderer->style()->position();
213 return position == AbsolutePosition || position == FixedPosition;
214 }
215
pushFullyClippedState(BitStack & stack,Node * node)216 static void pushFullyClippedState(BitStack& stack, Node* node)
217 {
218 ASSERT(stack.size() == depthCrossingShadowBoundaries(node));
219
220 // Push true if this node full clips its contents, or if a parent already has fully
221 // clipped and this is not a node that ignores its container's clip.
222 stack.push(fullyClipsContents(node) || stack.top() && !ignoresContainerClip(node));
223 }
224
setUpFullyClippedStack(BitStack & stack,Node * node)225 static void setUpFullyClippedStack(BitStack& stack, Node* node)
226 {
227 // Put the nodes in a vector so we can iterate in reverse order.
228 Vector<Node*, 100> ancestry;
229 for (Node* parent = parentCrossingShadowBoundaries(node); parent; parent = parentCrossingShadowBoundaries(parent))
230 ancestry.append(parent);
231
232 // Call pushFullyClippedState on each node starting with the earliest ancestor.
233 size_t size = ancestry.size();
234 for (size_t i = 0; i < size; ++i)
235 pushFullyClippedState(stack, ancestry[size - i - 1]);
236 pushFullyClippedState(stack, node);
237
238 ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node));
239 }
240
241 // --------
242
TextIterator()243 TextIterator::TextIterator()
244 : m_startContainer(0)
245 , m_startOffset(0)
246 , m_endContainer(0)
247 , m_endOffset(0)
248 , m_positionNode(0)
249 , m_textCharacters(0)
250 , m_textLength(0)
251 , m_lastCharacter(0)
252 , m_emitCharactersBetweenAllVisiblePositions(false)
253 , m_enterTextControls(false)
254 {
255 }
256
TextIterator(const Range * r,bool emitCharactersBetweenAllVisiblePositions,bool enterTextControls)257 TextIterator::TextIterator(const Range* r, bool emitCharactersBetweenAllVisiblePositions, bool enterTextControls)
258 : m_startContainer(0)
259 , m_startOffset(0)
260 , m_endContainer(0)
261 , m_endOffset(0)
262 , m_positionNode(0)
263 , m_textCharacters(0)
264 , m_textLength(0)
265 , m_emitCharactersBetweenAllVisiblePositions(emitCharactersBetweenAllVisiblePositions)
266 , m_enterTextControls(enterTextControls)
267 {
268 if (!r)
269 return;
270
271 // get and validate the range endpoints
272 Node* startContainer = r->startContainer();
273 if (!startContainer)
274 return;
275 int startOffset = r->startOffset();
276 Node* endContainer = r->endContainer();
277 int endOffset = r->endOffset();
278
279 // Callers should be handing us well-formed ranges. If we discover that this isn't
280 // the case, we could consider changing this assertion to an early return.
281 ASSERT(r->boundaryPointsValid());
282
283 // remember range - this does not change
284 m_startContainer = startContainer;
285 m_startOffset = startOffset;
286 m_endContainer = endContainer;
287 m_endOffset = endOffset;
288
289 // set up the current node for processing
290 m_node = r->firstNode();
291 if (!m_node)
292 return;
293 setUpFullyClippedStack(m_fullyClippedStack, m_node);
294 m_offset = m_node == m_startContainer ? m_startOffset : 0;
295 m_handledNode = false;
296 m_handledChildren = false;
297
298 // calculate first out of bounds node
299 m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset);
300
301 // initialize node processing state
302 m_needAnotherNewline = false;
303 m_textBox = 0;
304
305 // initialize record of previous node processing
306 m_haveEmitted = false;
307 m_lastTextNode = 0;
308 m_lastTextNodeEndedWithCollapsedSpace = false;
309 m_lastCharacter = 0;
310
311 #ifndef NDEBUG
312 // need this just because of the assert in advance()
313 m_positionNode = m_node;
314 #endif
315
316 // identify the first run
317 advance();
318 }
319
advance()320 void TextIterator::advance()
321 {
322 // reset the run information
323 m_positionNode = 0;
324 m_textLength = 0;
325
326 // handle remembered node that needed a newline after the text node's newline
327 if (m_needAnotherNewline) {
328 // Emit the extra newline, and position it *inside* m_node, after m_node's
329 // contents, in case it's a block, in the same way that we position the first
330 // newline. The range for the emitted newline should start where the line
331 // break begins.
332 // FIXME: It would be cleaner if we emitted two newlines during the last
333 // iteration, instead of using m_needAnotherNewline.
334 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node;
335 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
336 m_needAnotherNewline = false;
337 return;
338 }
339
340 // handle remembered text box
341 if (m_textBox) {
342 handleTextBox();
343 if (m_positionNode)
344 return;
345 }
346
347 while (m_node && m_node != m_pastEndNode) {
348 // if the range ends at offset 0 of an element, represent the
349 // position, but not the content, of that element e.g. if the
350 // node is a blockflow element, emit a newline that
351 // precedes the element
352 if (m_node == m_endContainer && m_endOffset == 0) {
353 representNodeOffsetZero();
354 m_node = 0;
355 return;
356 }
357
358 RenderObject* renderer = m_node->renderer();
359 if (!renderer) {
360 m_handledNode = true;
361 m_handledChildren = true;
362 } else {
363 // handle current node according to its type
364 if (!m_handledNode) {
365 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) // FIXME: What about CDATA_SECTION_NODE?
366 m_handledNode = handleTextNode();
367 else if (renderer && (renderer->isImage() || renderer->isWidget() ||
368 (renderer->node() && renderer->node()->isElementNode() &&
369 static_cast<Element*>(renderer->node())->isFormControlElement())))
370 m_handledNode = handleReplacedElement();
371 else
372 m_handledNode = handleNonTextNode();
373 if (m_positionNode)
374 return;
375 }
376 }
377
378 // find a new current node to handle in depth-first manner,
379 // calling exitNode() as we come back thru a parent node
380 Node* next = m_handledChildren ? 0 : m_node->firstChild();
381 m_offset = 0;
382 if (!next) {
383 next = m_node->nextSibling();
384 if (!next) {
385 bool pastEnd = m_node->traverseNextNode() == m_pastEndNode;
386 Node* parentNode = parentCrossingShadowBoundaries(m_node);
387 while (!next && parentNode) {
388 if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode))
389 return;
390 bool haveRenderer = m_node->renderer();
391 m_node = parentNode;
392 m_fullyClippedStack.pop();
393 parentNode = parentCrossingShadowBoundaries(m_node);
394 if (haveRenderer)
395 exitNode();
396 if (m_positionNode) {
397 m_handledNode = true;
398 m_handledChildren = true;
399 return;
400 }
401 next = m_node->nextSibling();
402 }
403 }
404 m_fullyClippedStack.pop();
405 }
406
407 // set the new current node
408 m_node = next;
409 if (m_node)
410 pushFullyClippedState(m_fullyClippedStack, m_node);
411 m_handledNode = false;
412 m_handledChildren = false;
413
414 // how would this ever be?
415 if (m_positionNode)
416 return;
417 }
418 }
419
compareBoxStart(const InlineTextBox * first,const InlineTextBox * second)420 static inline bool compareBoxStart(const InlineTextBox* first, const InlineTextBox* second)
421 {
422 return first->start() < second->start();
423 }
424
handleTextNode()425 bool TextIterator::handleTextNode()
426 {
427 if (m_fullyClippedStack.top())
428 return false;
429
430 RenderText* renderer = toRenderText(m_node->renderer());
431 if (renderer->style()->visibility() != VISIBLE)
432 return false;
433
434 m_lastTextNode = m_node;
435 String str = renderer->text();
436
437 // handle pre-formatted text
438 if (!renderer->style()->collapseWhiteSpace()) {
439 int runStart = m_offset;
440 if (m_lastTextNodeEndedWithCollapsedSpace) {
441 emitCharacter(' ', m_node, 0, runStart, runStart);
442 return false;
443 }
444 int strLength = str.length();
445 int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX;
446 int runEnd = min(strLength, end);
447
448 if (runStart >= runEnd)
449 return true;
450
451 emitText(m_node, runStart, runEnd);
452 return true;
453 }
454
455 if (!renderer->firstTextBox() && str.length() > 0) {
456 m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space
457 return true;
458 }
459
460 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
461 if (renderer->containsReversedText()) {
462 m_sortedTextBoxes.clear();
463 for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) {
464 m_sortedTextBoxes.append(textBox);
465 }
466 std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), compareBoxStart);
467 m_sortedTextBoxesPosition = 0;
468 }
469
470 m_textBox = renderer->containsReversedText() ? m_sortedTextBoxes[0] : renderer->firstTextBox();
471 handleTextBox();
472 return true;
473 }
474
handleTextBox()475 void TextIterator::handleTextBox()
476 {
477 RenderText* renderer = toRenderText(m_node->renderer());
478 String str = renderer->text();
479 int start = m_offset;
480 int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX;
481 while (m_textBox) {
482 int textBoxStart = m_textBox->start();
483 int runStart = max(textBoxStart, start);
484
485 // Check for collapsed space at the start of this run.
486 InlineTextBox* firstTextBox = renderer->containsReversedText() ? m_sortedTextBoxes[0] : renderer->firstTextBox();
487 bool needSpace = m_lastTextNodeEndedWithCollapsedSpace
488 || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0);
489 if (needSpace && !isCollapsibleWhitespace(m_lastCharacter) && m_lastCharacter) {
490 if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') {
491 unsigned spaceRunStart = runStart - 1;
492 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ')
493 --spaceRunStart;
494 emitText(m_node, spaceRunStart, spaceRunStart + 1);
495 } else
496 emitCharacter(' ', m_node, 0, runStart, runStart);
497 return;
498 }
499 int textBoxEnd = textBoxStart + m_textBox->len();
500 int runEnd = min(textBoxEnd, end);
501
502 // Determine what the next text box will be, but don't advance yet
503 InlineTextBox* nextTextBox = 0;
504 if (renderer->containsReversedText()) {
505 if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size())
506 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1];
507 } else
508 nextTextBox = m_textBox->nextTextBox();
509
510 if (runStart < runEnd) {
511 // Handle either a single newline character (which becomes a space),
512 // or a run of characters that does not include a newline.
513 // This effectively translates newlines to spaces without copying the text.
514 if (str[runStart] == '\n') {
515 emitCharacter(' ', m_node, 0, runStart, runStart + 1);
516 m_offset = runStart + 1;
517 } else {
518 int subrunEnd = str.find('\n', runStart);
519 if (subrunEnd == -1 || subrunEnd > runEnd)
520 subrunEnd = runEnd;
521
522 m_offset = subrunEnd;
523 emitText(m_node, runStart, subrunEnd);
524 }
525
526 // If we are doing a subrun that doesn't go to the end of the text box,
527 // come back again to finish handling this text box; don't advance to the next one.
528 if (m_positionEndOffset < textBoxEnd)
529 return;
530
531 // Advance and return
532 int nextRunStart = nextTextBox ? nextTextBox->start() : str.length();
533 if (nextRunStart > runEnd)
534 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end
535 m_textBox = nextTextBox;
536 if (renderer->containsReversedText())
537 ++m_sortedTextBoxesPosition;
538 return;
539 }
540 // Advance and continue
541 m_textBox = nextTextBox;
542 if (renderer->containsReversedText())
543 ++m_sortedTextBoxesPosition;
544 }
545 }
546
handleReplacedElement()547 bool TextIterator::handleReplacedElement()
548 {
549 if (m_fullyClippedStack.top())
550 return false;
551
552 RenderObject* renderer = m_node->renderer();
553 if (renderer->style()->visibility() != VISIBLE)
554 return false;
555
556 if (m_lastTextNodeEndedWithCollapsedSpace) {
557 emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1);
558 return false;
559 }
560
561 if (m_enterTextControls && renderer->isTextControl()) {
562 if (HTMLElement* innerTextElement = toRenderTextControl(renderer)->innerTextElement()) {
563 m_node = innerTextElement->shadowTreeRootNode();
564 pushFullyClippedState(m_fullyClippedStack, m_node);
565 m_offset = 0;
566 return false;
567 }
568 }
569
570 m_haveEmitted = true;
571
572 if (m_emitCharactersBetweenAllVisiblePositions) {
573 // We want replaced elements to behave like punctuation for boundary
574 // finding, and to simply take up space for the selection preservation
575 // code in moveParagraphs, so we use a comma.
576 emitCharacter(',', m_node->parentNode(), m_node, 0, 1);
577 return true;
578 }
579
580 m_positionNode = m_node->parentNode();
581 m_positionOffsetBaseNode = m_node;
582 m_positionStartOffset = 0;
583 m_positionEndOffset = 1;
584
585 m_textCharacters = 0;
586 m_textLength = 0;
587
588 m_lastCharacter = 0;
589
590 return true;
591 }
592
shouldEmitTabBeforeNode(Node * node)593 static bool shouldEmitTabBeforeNode(Node* node)
594 {
595 RenderObject* r = node->renderer();
596
597 // Table cells are delimited by tabs.
598 if (!r || !isTableCell(node))
599 return false;
600
601 // Want a tab before every cell other than the first one
602 RenderTableCell* rc = toRenderTableCell(r);
603 RenderTable* t = rc->table();
604 return t && (t->cellBefore(rc) || t->cellAbove(rc));
605 }
606
shouldEmitNewlineForNode(Node * node)607 static bool shouldEmitNewlineForNode(Node* node)
608 {
609 // br elements are represented by a single newline.
610 RenderObject* r = node->renderer();
611 if (!r)
612 return node->hasTagName(brTag);
613
614 return r->isBR();
615 }
616
shouldEmitNewlinesBeforeAndAfterNode(Node * node)617 static bool shouldEmitNewlinesBeforeAndAfterNode(Node* node)
618 {
619 // Block flow (versus inline flow) is represented by having
620 // a newline both before and after the element.
621 RenderObject* r = node->renderer();
622 if (!r) {
623 return (node->hasTagName(blockquoteTag)
624 || node->hasTagName(ddTag)
625 || node->hasTagName(divTag)
626 || node->hasTagName(dlTag)
627 || node->hasTagName(dtTag)
628 || node->hasTagName(h1Tag)
629 || node->hasTagName(h2Tag)
630 || node->hasTagName(h3Tag)
631 || node->hasTagName(h4Tag)
632 || node->hasTagName(h5Tag)
633 || node->hasTagName(h6Tag)
634 || node->hasTagName(hrTag)
635 || node->hasTagName(liTag)
636 || node->hasTagName(listingTag)
637 || node->hasTagName(olTag)
638 || node->hasTagName(pTag)
639 || node->hasTagName(preTag)
640 || node->hasTagName(trTag)
641 || node->hasTagName(ulTag));
642 }
643
644 // Need to make an exception for table cells, because they are blocks, but we
645 // want them tab-delimited rather than having newlines before and after.
646 if (isTableCell(node))
647 return false;
648
649 // Need to make an exception for table row elements, because they are neither
650 // "inline" or "RenderBlock", but we want newlines for them.
651 if (r->isTableRow()) {
652 RenderTable* t = toRenderTableRow(r)->table();
653 if (t && !t->isInline())
654 return true;
655 }
656
657 return !r->isInline() && r->isRenderBlock() && !r->isFloatingOrPositioned() && !r->isBody();
658 }
659
shouldEmitNewlineAfterNode(Node * node)660 static bool shouldEmitNewlineAfterNode(Node* node)
661 {
662 // FIXME: It should be better but slower to create a VisiblePosition here.
663 if (!shouldEmitNewlinesBeforeAndAfterNode(node))
664 return false;
665 // Check if this is the very last renderer in the document.
666 // If so, then we should not emit a newline.
667 while ((node = node->traverseNextSibling()))
668 if (node->renderer())
669 return true;
670 return false;
671 }
672
shouldEmitNewlineBeforeNode(Node * node)673 static bool shouldEmitNewlineBeforeNode(Node* node)
674 {
675 return shouldEmitNewlinesBeforeAndAfterNode(node);
676 }
677
shouldEmitExtraNewlineForNode(Node * node)678 static bool shouldEmitExtraNewlineForNode(Node* node)
679 {
680 // When there is a significant collapsed bottom margin, emit an extra
681 // newline for a more realistic result. We end up getting the right
682 // result even without margin collapsing. For example: <div><p>text</p></div>
683 // will work right even if both the <div> and the <p> have bottom margins.
684 RenderObject* r = node->renderer();
685 if (!r || !r->isBox())
686 return false;
687
688 // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears
689 // not to do this at all
690 if (node->hasTagName(h1Tag)
691 || node->hasTagName(h2Tag)
692 || node->hasTagName(h3Tag)
693 || node->hasTagName(h4Tag)
694 || node->hasTagName(h5Tag)
695 || node->hasTagName(h6Tag)
696 || node->hasTagName(pTag)) {
697 RenderStyle* style = r->style();
698 if (style) {
699 int bottomMargin = toRenderBox(r)->collapsedMarginBottom();
700 int fontSize = style->fontDescription().computedPixelSize();
701 if (bottomMargin * 2 >= fontSize)
702 return true;
703 }
704 }
705
706 return false;
707 }
708
709 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic).
shouldRepresentNodeOffsetZero()710 bool TextIterator::shouldRepresentNodeOffsetZero()
711 {
712 if (m_emitCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isTable())
713 return true;
714
715 // Leave element positioned flush with start of a paragraph
716 // (e.g. do not insert tab before a table cell at the start of a paragraph)
717 if (m_lastCharacter == '\n')
718 return false;
719
720 // Otherwise, show the position if we have emitted any characters
721 if (m_haveEmitted)
722 return true;
723
724 // We've not emitted anything yet. Generally, there is no need for any positioning then.
725 // The only exception is when the element is visually not in the same line as
726 // the start of the range (e.g. the range starts at the end of the previous paragraph).
727 // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we
728 // make quicker checks to possibly avoid that. Another check that we could make is
729 // is whether the inline vs block flow changed since the previous visible element.
730 // I think we're already in a special enough case that that won't be needed, tho.
731
732 // No character needed if this is the first node in the range.
733 if (m_node == m_startContainer)
734 return false;
735
736 // If we are outside the start container's subtree, assume we need to emit.
737 // FIXME: m_startContainer could be an inline block
738 if (!m_node->isDescendantOf(m_startContainer))
739 return true;
740
741 // If we started as m_startContainer offset 0 and the current node is a descendant of
742 // the start container, we already had enough context to correctly decide whether to
743 // emit after a preceding block. We chose not to emit (m_haveEmitted is false),
744 // so don't second guess that now.
745 // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably
746 // immaterial since we likely would have already emitted something by now.
747 if (m_startOffset == 0)
748 return false;
749
750 // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning.
751 // Additionally, if the range we are iterating over contains huge sections of unrendered content,
752 // we would create VisiblePositions on every call to this function without this check.
753 if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE)
754 return false;
755
756 // The startPos.isNotNull() check is needed because the start could be before the body,
757 // and in that case we'll get null. We don't want to put in newlines at the start in that case.
758 // The currPos.isNotNull() check is needed because positions in non-HTML content
759 // (like SVG) do not have visible positions, and we don't want to emit for them either.
760 VisiblePosition startPos = VisiblePosition(m_startContainer, m_startOffset, DOWNSTREAM);
761 VisiblePosition currPos = VisiblePosition(m_node, 0, DOWNSTREAM);
762 return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos);
763 }
764
shouldEmitSpaceBeforeAndAfterNode(Node * node)765 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node)
766 {
767 return node->renderer() && node->renderer()->isTable() && (node->renderer()->isInline() || m_emitCharactersBetweenAllVisiblePositions);
768 }
769
representNodeOffsetZero()770 void TextIterator::representNodeOffsetZero()
771 {
772 // Emit a character to show the positioning of m_node.
773
774 // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can
775 // create VisiblePositions, which is expensive. So, we perform the inexpensive checks
776 // on m_node to see if it necessitates emitting a character first and will early return
777 // before encountering shouldRepresentNodeOffsetZero()s worse case behavior.
778 if (shouldEmitTabBeforeNode(m_node)) {
779 if (shouldRepresentNodeOffsetZero())
780 emitCharacter('\t', m_node->parentNode(), m_node, 0, 0);
781 } else if (shouldEmitNewlineBeforeNode(m_node)) {
782 if (shouldRepresentNodeOffsetZero())
783 emitCharacter('\n', m_node->parentNode(), m_node, 0, 0);
784 } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) {
785 if (shouldRepresentNodeOffsetZero())
786 emitCharacter(' ', m_node->parentNode(), m_node, 0, 0);
787 }
788 }
789
handleNonTextNode()790 bool TextIterator::handleNonTextNode()
791 {
792 if (shouldEmitNewlineForNode(m_node))
793 emitCharacter('\n', m_node->parentNode(), m_node, 0, 1);
794 else if (m_emitCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR())
795 emitCharacter(' ', m_node->parentNode(), m_node, 0, 1);
796 else
797 representNodeOffsetZero();
798
799 return true;
800 }
801
exitNode()802 void TextIterator::exitNode()
803 {
804 // prevent emitting a newline when exiting a collapsed block at beginning of the range
805 // FIXME: !m_haveEmitted does not necessarily mean there was a collapsed block... it could
806 // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and
807 // therefore look like a blank line.
808 if (!m_haveEmitted)
809 return;
810
811 // Emit with a position *inside* m_node, after m_node's contents, in
812 // case it is a block, because the run should start where the
813 // emitted character is positioned visually.
814 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node;
815 // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making
816 // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use
817 // TextIterator in _web_attributedStringFromRange.
818 // See <rdar://problem/5428427> for an example of how this mismatch will cause problems.
819 if (m_lastTextNode && shouldEmitNewlineAfterNode(m_node)) {
820 // use extra newline to represent margin bottom, as needed
821 bool addNewline = shouldEmitExtraNewlineForNode(m_node);
822
823 // FIXME: We need to emit a '\n' as we leave an empty block(s) that
824 // contain a VisiblePosition when doing selection preservation.
825 if (m_lastCharacter != '\n') {
826 // insert a newline with a position following this block's contents.
827 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
828 // remember whether to later add a newline for the current node
829 ASSERT(!m_needAnotherNewline);
830 m_needAnotherNewline = addNewline;
831 } else if (addNewline)
832 // insert a newline with a position following this block's contents.
833 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
834 }
835
836 // If nothing was emitted, see if we need to emit a space.
837 if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node))
838 emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1);
839 }
840
emitCharacter(UChar c,Node * textNode,Node * offsetBaseNode,int textStartOffset,int textEndOffset)841 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset)
842 {
843 m_haveEmitted = true;
844
845 // remember information with which to construct the TextIterator::range()
846 // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode
847 m_positionNode = textNode;
848 m_positionOffsetBaseNode = offsetBaseNode;
849 m_positionStartOffset = textStartOffset;
850 m_positionEndOffset = textEndOffset;
851
852 // remember information with which to construct the TextIterator::characters() and length()
853 m_singleCharacterBuffer = c;
854 m_textCharacters = &m_singleCharacterBuffer;
855 m_textLength = 1;
856
857 // remember some iteration state
858 m_lastTextNodeEndedWithCollapsedSpace = false;
859 m_lastCharacter = c;
860 }
861
emitText(Node * textNode,int textStartOffset,int textEndOffset)862 void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset)
863 {
864 RenderText* renderer = toRenderText(m_node->renderer());
865 String str = renderer->text();
866 ASSERT(str.characters());
867
868 m_positionNode = textNode;
869 m_positionOffsetBaseNode = 0;
870 m_positionStartOffset = textStartOffset;
871 m_positionEndOffset = textEndOffset;
872 m_textCharacters = str.characters() + textStartOffset;
873 m_textLength = textEndOffset - textStartOffset;
874 m_lastCharacter = str[textEndOffset - 1];
875
876 m_lastTextNodeEndedWithCollapsedSpace = false;
877 m_haveEmitted = true;
878 }
879
range() const880 PassRefPtr<Range> TextIterator::range() const
881 {
882 // use the current run information, if we have it
883 if (m_positionNode) {
884 if (m_positionOffsetBaseNode) {
885 int index = m_positionOffsetBaseNode->nodeIndex();
886 m_positionStartOffset += index;
887 m_positionEndOffset += index;
888 m_positionOffsetBaseNode = 0;
889 }
890 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
891 }
892
893 // otherwise, return the end of the overall range we were given
894 if (m_endContainer)
895 return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset);
896
897 return 0;
898 }
899
node() const900 Node* TextIterator::node() const
901 {
902 RefPtr<Range> textRange = range();
903 if (!textRange)
904 return 0;
905
906 Node* node = textRange->startContainer();
907 if (!node)
908 return 0;
909 if (node->offsetInCharacters())
910 return node;
911
912 return node->childNode(textRange->startOffset());
913 }
914
915 // --------
916
SimplifiedBackwardsTextIterator()917 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator()
918 : m_positionNode(0)
919 {
920 }
921
SimplifiedBackwardsTextIterator(const Range * r)922 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r)
923 : m_positionNode(0)
924 {
925 if (!r)
926 return;
927
928 Node* startNode = r->startContainer();
929 if (!startNode)
930 return;
931 Node* endNode = r->endContainer();
932 int startOffset = r->startOffset();
933 int endOffset = r->endOffset();
934
935 if (!startNode->offsetInCharacters()) {
936 if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) {
937 startNode = startNode->childNode(startOffset);
938 startOffset = 0;
939 }
940 }
941 if (!endNode->offsetInCharacters()) {
942 if (endOffset > 0 && endOffset <= static_cast<int>(endNode->childNodeCount())) {
943 endNode = endNode->childNode(endOffset - 1);
944 endOffset = endNode->offsetInCharacters() ? endNode->maxCharacterOffset() : endNode->childNodeCount();
945 }
946 }
947
948 m_node = endNode;
949 setUpFullyClippedStack(m_fullyClippedStack, m_node);
950 m_offset = endOffset;
951 m_handledNode = false;
952 m_handledChildren = endOffset == 0;
953
954 m_startNode = startNode;
955 m_startOffset = startOffset;
956 m_endNode = endNode;
957 m_endOffset = endOffset;
958
959 #ifndef NDEBUG
960 // Need this just because of the assert.
961 m_positionNode = endNode;
962 #endif
963
964 m_lastTextNode = 0;
965 m_lastCharacter = '\n';
966
967 m_pastStartNode = previousInPostOrderCrossingShadowBoundaries(startNode, startOffset);
968
969 advance();
970 }
971
advance()972 void SimplifiedBackwardsTextIterator::advance()
973 {
974 ASSERT(m_positionNode);
975
976 m_positionNode = 0;
977 m_textLength = 0;
978
979 while (m_node && m_node != m_pastStartNode) {
980 // Don't handle node if we start iterating at [node, 0].
981 if (!m_handledNode && !(m_node == m_endNode && m_endOffset == 0)) {
982 RenderObject* renderer = m_node->renderer();
983 if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) {
984 // FIXME: What about CDATA_SECTION_NODE?
985 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
986 m_handledNode = handleTextNode();
987 } else if (renderer && (renderer->isImage() || renderer->isWidget())) {
988 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
989 m_handledNode = handleReplacedElement();
990 } else
991 m_handledNode = handleNonTextNode();
992 if (m_positionNode)
993 return;
994 }
995
996 Node* next = m_handledChildren ? 0 : m_node->lastChild();
997 if (!next) {
998 // Exit empty containers as we pass over them or containers
999 // where [container, 0] is where we started iterating.
1000 if (!m_handledNode &&
1001 canHaveChildrenForEditing(m_node) &&
1002 m_node->parentNode() &&
1003 (!m_node->lastChild() || (m_node == m_endNode && m_endOffset == 0))) {
1004 exitNode();
1005 if (m_positionNode) {
1006 m_handledNode = true;
1007 m_handledChildren = true;
1008 return;
1009 }
1010 }
1011 // Exit all other containers.
1012 next = m_node->previousSibling();
1013 while (!next) {
1014 Node* parentNode = parentCrossingShadowBoundaries(m_node);
1015 if (!parentNode)
1016 break;
1017 m_node = parentNode;
1018 m_fullyClippedStack.pop();
1019 exitNode();
1020 if (m_positionNode) {
1021 m_handledNode = true;
1022 m_handledChildren = true;
1023 return;
1024 }
1025 next = m_node->previousSibling();
1026 }
1027 m_fullyClippedStack.pop();
1028 }
1029
1030 m_node = next;
1031 if (m_node)
1032 pushFullyClippedState(m_fullyClippedStack, m_node);
1033 m_offset = m_node ? caretMaxOffset(m_node) : 0;
1034 m_handledNode = false;
1035 m_handledChildren = false;
1036
1037 if (m_positionNode)
1038 return;
1039 }
1040 }
1041
handleTextNode()1042 bool SimplifiedBackwardsTextIterator::handleTextNode()
1043 {
1044 m_lastTextNode = m_node;
1045
1046 RenderText* renderer = toRenderText(m_node->renderer());
1047 String str = renderer->text();
1048
1049 if (!renderer->firstTextBox() && str.length() > 0)
1050 return true;
1051
1052 m_positionEndOffset = m_offset;
1053
1054 m_offset = (m_node == m_startNode) ? m_startOffset : 0;
1055 m_positionNode = m_node;
1056 m_positionStartOffset = m_offset;
1057 m_textLength = m_positionEndOffset - m_positionStartOffset;
1058 m_textCharacters = str.characters() + m_positionStartOffset;
1059
1060 m_lastCharacter = str[m_positionEndOffset - 1];
1061
1062 return true;
1063 }
1064
handleReplacedElement()1065 bool SimplifiedBackwardsTextIterator::handleReplacedElement()
1066 {
1067 unsigned index = m_node->nodeIndex();
1068 // We want replaced elements to behave like punctuation for boundary
1069 // finding, and to simply take up space for the selection preservation
1070 // code in moveParagraphs, so we use a comma. Unconditionally emit
1071 // here because this iterator is only used for boundary finding.
1072 emitCharacter(',', m_node->parentNode(), index, index + 1);
1073 return true;
1074 }
1075
handleNonTextNode()1076 bool SimplifiedBackwardsTextIterator::handleNonTextNode()
1077 {
1078 // We can use a linefeed in place of a tab because this simple iterator is only used to
1079 // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs.
1080 if (shouldEmitNewlineForNode(m_node) || shouldEmitNewlineAfterNode(m_node) || shouldEmitTabBeforeNode(m_node)) {
1081 unsigned index = m_node->nodeIndex();
1082 // The start of this emitted range is wrong. Ensuring correctness would require
1083 // VisiblePositions and so would be slow. previousBoundary expects this.
1084 emitCharacter('\n', m_node->parentNode(), index + 1, index + 1);
1085 }
1086 return true;
1087 }
1088
exitNode()1089 void SimplifiedBackwardsTextIterator::exitNode()
1090 {
1091 if (shouldEmitNewlineForNode(m_node) || shouldEmitNewlineBeforeNode(m_node) || shouldEmitTabBeforeNode(m_node)) {
1092 // The start of this emitted range is wrong. Ensuring correctness would require
1093 // VisiblePositions and so would be slow. previousBoundary expects this.
1094 emitCharacter('\n', m_node, 0, 0);
1095 }
1096 }
1097
emitCharacter(UChar c,Node * node,int startOffset,int endOffset)1098 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset)
1099 {
1100 m_singleCharacterBuffer = c;
1101 m_positionNode = node;
1102 m_positionStartOffset = startOffset;
1103 m_positionEndOffset = endOffset;
1104 m_textCharacters = &m_singleCharacterBuffer;
1105 m_textLength = 1;
1106 m_lastCharacter = c;
1107 }
1108
range() const1109 PassRefPtr<Range> SimplifiedBackwardsTextIterator::range() const
1110 {
1111 if (m_positionNode)
1112 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
1113
1114 return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset);
1115 }
1116
1117 // --------
1118
CharacterIterator()1119 CharacterIterator::CharacterIterator()
1120 : m_offset(0)
1121 , m_runOffset(0)
1122 , m_atBreak(true)
1123 {
1124 }
1125
CharacterIterator(const Range * r,bool emitCharactersBetweenAllVisiblePositions,bool enterTextControls)1126 CharacterIterator::CharacterIterator(const Range* r, bool emitCharactersBetweenAllVisiblePositions, bool enterTextControls)
1127 : m_offset(0)
1128 , m_runOffset(0)
1129 , m_atBreak(true)
1130 , m_textIterator(r, emitCharactersBetweenAllVisiblePositions, enterTextControls)
1131 {
1132 while (!atEnd() && m_textIterator.length() == 0)
1133 m_textIterator.advance();
1134 }
1135
range() const1136 PassRefPtr<Range> CharacterIterator::range() const
1137 {
1138 RefPtr<Range> r = m_textIterator.range();
1139 if (!m_textIterator.atEnd()) {
1140 if (m_textIterator.length() <= 1) {
1141 ASSERT(m_runOffset == 0);
1142 } else {
1143 Node* n = r->startContainer();
1144 ASSERT(n == r->endContainer());
1145 int offset = r->startOffset() + m_runOffset;
1146 ExceptionCode ec = 0;
1147 r->setStart(n, offset, ec);
1148 r->setEnd(n, offset + 1, ec);
1149 ASSERT(!ec);
1150 }
1151 }
1152 return r.release();
1153 }
1154
advance(int count)1155 void CharacterIterator::advance(int count)
1156 {
1157 if (count <= 0) {
1158 ASSERT(count == 0);
1159 return;
1160 }
1161
1162 m_atBreak = false;
1163
1164 // easy if there is enough left in the current m_textIterator run
1165 int remaining = m_textIterator.length() - m_runOffset;
1166 if (count < remaining) {
1167 m_runOffset += count;
1168 m_offset += count;
1169 return;
1170 }
1171
1172 // exhaust the current m_textIterator run
1173 count -= remaining;
1174 m_offset += remaining;
1175
1176 // move to a subsequent m_textIterator run
1177 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
1178 int runLength = m_textIterator.length();
1179 if (runLength == 0)
1180 m_atBreak = true;
1181 else {
1182 // see whether this is m_textIterator to use
1183 if (count < runLength) {
1184 m_runOffset = count;
1185 m_offset += count;
1186 return;
1187 }
1188
1189 // exhaust this m_textIterator run
1190 count -= runLength;
1191 m_offset += runLength;
1192 }
1193 }
1194
1195 // ran to the end of the m_textIterator... no more runs left
1196 m_atBreak = true;
1197 m_runOffset = 0;
1198 }
1199
string(int numChars)1200 String CharacterIterator::string(int numChars)
1201 {
1202 Vector<UChar> result;
1203 result.reserveInitialCapacity(numChars);
1204 while (numChars > 0 && !atEnd()) {
1205 int runSize = min(numChars, length());
1206 result.append(characters(), runSize);
1207 numChars -= runSize;
1208 advance(runSize);
1209 }
1210 return String::adopt(result);
1211 }
1212
characterSubrange(CharacterIterator & it,int offset,int length)1213 static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length)
1214 {
1215 it.advance(offset);
1216 RefPtr<Range> start = it.range();
1217
1218 if (length > 1)
1219 it.advance(length - 1);
1220 RefPtr<Range> end = it.range();
1221
1222 return Range::create(start->startContainer()->document(),
1223 start->startContainer(), start->startOffset(),
1224 end->endContainer(), end->endOffset());
1225 }
1226
BackwardsCharacterIterator()1227 BackwardsCharacterIterator::BackwardsCharacterIterator()
1228 : m_offset(0)
1229 , m_runOffset(0)
1230 , m_atBreak(true)
1231 {
1232 }
1233
BackwardsCharacterIterator(const Range * range)1234 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range)
1235 : m_offset(0)
1236 , m_runOffset(0)
1237 , m_atBreak(true)
1238 , m_textIterator(range)
1239 {
1240 while (!atEnd() && !m_textIterator.length())
1241 m_textIterator.advance();
1242 }
1243
range() const1244 PassRefPtr<Range> BackwardsCharacterIterator::range() const
1245 {
1246 RefPtr<Range> r = m_textIterator.range();
1247 if (!m_textIterator.atEnd()) {
1248 if (m_textIterator.length() <= 1)
1249 ASSERT(m_runOffset == 0);
1250 else {
1251 Node* n = r->startContainer();
1252 ASSERT(n == r->endContainer());
1253 int offset = r->endOffset() - m_runOffset;
1254 ExceptionCode ec = 0;
1255 r->setStart(n, offset - 1, ec);
1256 r->setEnd(n, offset, ec);
1257 ASSERT(!ec);
1258 }
1259 }
1260 return r.release();
1261 }
1262
advance(int count)1263 void BackwardsCharacterIterator::advance(int count)
1264 {
1265 if (count <= 0) {
1266 ASSERT(!count);
1267 return;
1268 }
1269
1270 m_atBreak = false;
1271
1272 int remaining = m_textIterator.length() - m_runOffset;
1273 if (count < remaining) {
1274 m_runOffset += count;
1275 m_offset += count;
1276 return;
1277 }
1278
1279 count -= remaining;
1280 m_offset += remaining;
1281
1282 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
1283 int runLength = m_textIterator.length();
1284 if (runLength == 0)
1285 m_atBreak = true;
1286 else {
1287 if (count < runLength) {
1288 m_runOffset = count;
1289 m_offset += count;
1290 return;
1291 }
1292
1293 count -= runLength;
1294 m_offset += runLength;
1295 }
1296 }
1297
1298 m_atBreak = true;
1299 m_runOffset = 0;
1300 }
1301
1302 // --------
1303
WordAwareIterator()1304 WordAwareIterator::WordAwareIterator()
1305 : m_previousText(0)
1306 , m_didLookAhead(false)
1307 {
1308 }
1309
WordAwareIterator(const Range * r)1310 WordAwareIterator::WordAwareIterator(const Range* r)
1311 : m_previousText(0)
1312 , m_didLookAhead(true) // so we consider the first chunk from the text iterator
1313 , m_textIterator(r)
1314 {
1315 advance(); // get in position over the first chunk of text
1316 }
1317
1318 // We're always in one of these modes:
1319 // - The current chunk in the text iterator is our current chunk
1320 // (typically its a piece of whitespace, or text that ended with whitespace)
1321 // - The previous chunk in the text iterator is our current chunk
1322 // (we looked ahead to the next chunk and found a word boundary)
1323 // - We built up our own chunk of text from many chunks from the text iterator
1324
1325 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries.
1326
advance()1327 void WordAwareIterator::advance()
1328 {
1329 m_previousText = 0;
1330 m_buffer.clear(); // toss any old buffer we built up
1331
1332 // If last time we did a look-ahead, start with that looked-ahead chunk now
1333 if (!m_didLookAhead) {
1334 ASSERT(!m_textIterator.atEnd());
1335 m_textIterator.advance();
1336 }
1337 m_didLookAhead = false;
1338
1339 // Go to next non-empty chunk
1340 while (!m_textIterator.atEnd() && m_textIterator.length() == 0)
1341 m_textIterator.advance();
1342 m_range = m_textIterator.range();
1343
1344 if (m_textIterator.atEnd())
1345 return;
1346
1347 while (1) {
1348 // If this chunk ends in whitespace we can just use it as our chunk.
1349 if (isSpaceOrNewline(m_textIterator.characters()[m_textIterator.length() - 1]))
1350 return;
1351
1352 // If this is the first chunk that failed, save it in previousText before look ahead
1353 if (m_buffer.isEmpty()) {
1354 m_previousText = m_textIterator.characters();
1355 m_previousLength = m_textIterator.length();
1356 }
1357
1358 // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff
1359 m_textIterator.advance();
1360 if (m_textIterator.atEnd() || m_textIterator.length() == 0 || isSpaceOrNewline(m_textIterator.characters()[0])) {
1361 m_didLookAhead = true;
1362 return;
1363 }
1364
1365 if (m_buffer.isEmpty()) {
1366 // Start gobbling chunks until we get to a suitable stopping point
1367 m_buffer.append(m_previousText, m_previousLength);
1368 m_previousText = 0;
1369 }
1370 m_buffer.append(m_textIterator.characters(), m_textIterator.length());
1371 int exception = 0;
1372 m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), exception);
1373 }
1374 }
1375
length() const1376 int WordAwareIterator::length() const
1377 {
1378 if (!m_buffer.isEmpty())
1379 return m_buffer.size();
1380 if (m_previousText)
1381 return m_previousLength;
1382 return m_textIterator.length();
1383 }
1384
characters() const1385 const UChar* WordAwareIterator::characters() const
1386 {
1387 if (!m_buffer.isEmpty())
1388 return m_buffer.data();
1389 if (m_previousText)
1390 return m_previousText;
1391 return m_textIterator.characters();
1392 }
1393
1394 // --------
1395
foldQuoteMark(UChar c)1396 static inline UChar foldQuoteMark(UChar c)
1397 {
1398 switch (c) {
1399 case hebrewPunctuationGershayim:
1400 case leftDoubleQuotationMark:
1401 case rightDoubleQuotationMark:
1402 return '"';
1403 case hebrewPunctuationGeresh:
1404 case leftSingleQuotationMark:
1405 case rightSingleQuotationMark:
1406 return '\'';
1407 default:
1408 return c;
1409 }
1410 }
1411
foldQuoteMarks(String & s)1412 static inline void foldQuoteMarks(String& s)
1413 {
1414 s.replace(hebrewPunctuationGeresh, '\'');
1415 s.replace(hebrewPunctuationGershayim, '"');
1416 s.replace(leftDoubleQuotationMark, '"');
1417 s.replace(leftSingleQuotationMark, '\'');
1418 s.replace(rightDoubleQuotationMark, '"');
1419 s.replace(rightSingleQuotationMark, '\'');
1420 }
1421
1422 #if USE(ICU_UNICODE) && !UCONFIG_NO_COLLATION
1423
foldQuoteMarks(UChar * data,size_t length)1424 static inline void foldQuoteMarks(UChar* data, size_t length)
1425 {
1426 for (size_t i = 0; i < length; ++i)
1427 data[i] = foldQuoteMark(data[i]);
1428 }
1429
1430 static const size_t minimumSearchBufferSize = 8192;
1431
1432 #ifndef NDEBUG
1433 static bool searcherInUse;
1434 #endif
1435
createSearcher()1436 static UStringSearch* createSearcher()
1437 {
1438 // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
1439 // but it doesn't matter exactly what it is, since we don't perform any searches
1440 // without setting both the pattern and the text.
1441 UErrorCode status = U_ZERO_ERROR;
1442 UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, currentSearchLocaleID(), 0, &status);
1443 ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
1444 return searcher;
1445 }
1446
searcher()1447 static UStringSearch* searcher()
1448 {
1449 static UStringSearch* searcher = createSearcher();
1450 return searcher;
1451 }
1452
lockSearcher()1453 static inline void lockSearcher()
1454 {
1455 #ifndef NDEBUG
1456 ASSERT(!searcherInUse);
1457 searcherInUse = true;
1458 #endif
1459 }
1460
unlockSearcher()1461 static inline void unlockSearcher()
1462 {
1463 #ifndef NDEBUG
1464 ASSERT(searcherInUse);
1465 searcherInUse = false;
1466 #endif
1467 }
1468
SearchBuffer(const String & target,bool isCaseSensitive)1469 inline SearchBuffer::SearchBuffer(const String& target, bool isCaseSensitive)
1470 : m_target(target)
1471 , m_atBreak(true)
1472 {
1473 ASSERT(!m_target.isEmpty());
1474
1475 // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
1476 // of doing it in a separate replacement pass here, but ICU doesn't offer a way
1477 // to add tailoring on top of the locale-specific tailoring as of this writing.
1478 foldQuoteMarks(m_target);
1479
1480 size_t targetLength = m_target.length();
1481 m_buffer.reserveInitialCapacity(max(targetLength * 8, minimumSearchBufferSize));
1482 m_overlap = m_buffer.capacity() / 4;
1483
1484 // Grab the single global searcher.
1485 // If we ever have a reason to do more than once search buffer at once, we'll have
1486 // to move to multiple searchers.
1487 lockSearcher();
1488
1489 UStringSearch* searcher = WebCore::searcher();
1490 UCollator* collator = usearch_getCollator(searcher);
1491
1492 UCollationStrength strength = isCaseSensitive ? UCOL_TERTIARY : UCOL_PRIMARY;
1493 if (ucol_getStrength(collator) != strength) {
1494 ucol_setStrength(collator, strength);
1495 usearch_reset(searcher);
1496 }
1497
1498 UErrorCode status = U_ZERO_ERROR;
1499 usearch_setPattern(searcher, m_target.characters(), targetLength, &status);
1500 ASSERT(status == U_ZERO_ERROR);
1501 }
1502
~SearchBuffer()1503 inline SearchBuffer::~SearchBuffer()
1504 {
1505 unlockSearcher();
1506 }
1507
append(const UChar * characters,size_t length)1508 inline size_t SearchBuffer::append(const UChar* characters, size_t length)
1509 {
1510 ASSERT(length);
1511
1512 if (m_atBreak) {
1513 m_buffer.shrink(0);
1514 m_atBreak = false;
1515 } else if (m_buffer.size() == m_buffer.capacity()) {
1516 memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
1517 m_buffer.shrink(m_overlap);
1518 }
1519
1520 size_t oldLength = m_buffer.size();
1521 size_t usableLength = min(m_buffer.capacity() - oldLength, length);
1522 ASSERT(usableLength);
1523 m_buffer.append(characters, usableLength);
1524 foldQuoteMarks(m_buffer.data() + oldLength, usableLength);
1525 return usableLength;
1526 }
1527
atBreak() const1528 inline bool SearchBuffer::atBreak() const
1529 {
1530 return m_atBreak;
1531 }
1532
reachedBreak()1533 inline void SearchBuffer::reachedBreak()
1534 {
1535 m_atBreak = true;
1536 }
1537
search(size_t & start)1538 inline size_t SearchBuffer::search(size_t& start)
1539 {
1540 size_t size = m_buffer.size();
1541 if (m_atBreak) {
1542 if (!size)
1543 return 0;
1544 } else {
1545 if (size != m_buffer.capacity())
1546 return 0;
1547 }
1548
1549 UStringSearch* searcher = WebCore::searcher();
1550
1551 UErrorCode status = U_ZERO_ERROR;
1552 usearch_setText(searcher, m_buffer.data(), size, &status);
1553 ASSERT(status == U_ZERO_ERROR);
1554
1555 int matchStart = usearch_first(searcher, &status);
1556 ASSERT(status == U_ZERO_ERROR);
1557 if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
1558 ASSERT(matchStart == USEARCH_DONE);
1559 return 0;
1560 }
1561
1562 // Matches that start in the overlap area are only tentative.
1563 // The same match may appear later, matching more characters,
1564 // possibly including a combining character that's not yet in the buffer.
1565 if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
1566 memcpy(m_buffer.data(), m_buffer.data() + size - m_overlap, m_overlap * sizeof(UChar));
1567 m_buffer.shrink(m_overlap);
1568 return 0;
1569 }
1570
1571 size_t newSize = size - (matchStart + 1);
1572 memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
1573 m_buffer.shrink(newSize);
1574
1575 start = size - matchStart;
1576 return usearch_getMatchedLength(searcher);
1577 }
1578
1579 #else // !ICU_UNICODE
1580
SearchBuffer(const String & target,bool isCaseSensitive)1581 inline SearchBuffer::SearchBuffer(const String& target, bool isCaseSensitive)
1582 : m_target(isCaseSensitive ? target : target.foldCase())
1583 , m_isCaseSensitive(isCaseSensitive)
1584 , m_buffer(m_target.length())
1585 , m_isCharacterStartBuffer(m_target.length())
1586 , m_isBufferFull(false)
1587 , m_cursor(0)
1588 {
1589 ASSERT(!m_target.isEmpty());
1590 m_target.replace(noBreakSpace, ' ');
1591 foldQuoteMarks(m_target);
1592 }
1593
~SearchBuffer()1594 inline SearchBuffer::~SearchBuffer()
1595 {
1596 }
1597
reachedBreak()1598 inline void SearchBuffer::reachedBreak()
1599 {
1600 m_cursor = 0;
1601 m_isBufferFull = false;
1602 }
1603
atBreak() const1604 inline bool SearchBuffer::atBreak() const
1605 {
1606 return !m_cursor && !m_isBufferFull;
1607 }
1608
append(UChar c,bool isStart)1609 inline void SearchBuffer::append(UChar c, bool isStart)
1610 {
1611 m_buffer[m_cursor] = c == noBreakSpace ? ' ' : foldQuoteMark(c);
1612 m_isCharacterStartBuffer[m_cursor] = isStart;
1613 if (++m_cursor == m_target.length()) {
1614 m_cursor = 0;
1615 m_isBufferFull = true;
1616 }
1617 }
1618
append(const UChar * characters,size_t length)1619 inline size_t SearchBuffer::append(const UChar* characters, size_t length)
1620 {
1621 ASSERT(length);
1622 if (m_isCaseSensitive) {
1623 append(characters[0], true);
1624 return 1;
1625 }
1626 const int maxFoldedCharacters = 16; // sensible maximum is 3, this should be more than enough
1627 UChar foldedCharacters[maxFoldedCharacters];
1628 bool error;
1629 int numFoldedCharacters = foldCase(foldedCharacters, maxFoldedCharacters, characters, 1, &error);
1630 ASSERT(!error);
1631 ASSERT(numFoldedCharacters);
1632 ASSERT(numFoldedCharacters <= maxFoldedCharacters);
1633 if (!error && numFoldedCharacters) {
1634 numFoldedCharacters = min(numFoldedCharacters, maxFoldedCharacters);
1635 append(foldedCharacters[0], true);
1636 for (int i = 1; i < numFoldedCharacters; ++i)
1637 append(foldedCharacters[i], false);
1638 }
1639 return 1;
1640 }
1641
search(size_t & start)1642 inline size_t SearchBuffer::search(size_t& start)
1643 {
1644 if (!m_isBufferFull)
1645 return 0;
1646 if (!m_isCharacterStartBuffer[m_cursor])
1647 return 0;
1648
1649 size_t tailSpace = m_target.length() - m_cursor;
1650 if (memcmp(&m_buffer[m_cursor], m_target.characters(), tailSpace * sizeof(UChar)) != 0)
1651 return 0;
1652 if (memcmp(&m_buffer[0], m_target.characters() + tailSpace, m_cursor * sizeof(UChar)) != 0)
1653 return 0;
1654
1655 start = length();
1656
1657 // Now that we've found a match once, we don't want to find it again, because those
1658 // are the SearchBuffer semantics, allowing for a buffer where you append more than one
1659 // character at a time. To do this we take advantage of m_isCharacterStartBuffer, but if
1660 // we want to get rid of that in the future we could track this with a separate boolean
1661 // or even move the characters to the start of the buffer and set m_isBufferFull to false.
1662 m_isCharacterStartBuffer[m_cursor] = false;
1663
1664 return start;
1665 }
1666
1667 // Returns the number of characters that were appended to the buffer (what we are searching in).
1668 // That's not necessarily the same length as the passed-in target string, because case folding
1669 // can make two strings match even though they're not the same length.
length() const1670 size_t SearchBuffer::length() const
1671 {
1672 size_t bufferSize = m_target.length();
1673 size_t length = 0;
1674 for (size_t i = 0; i < bufferSize; ++i)
1675 length += m_isCharacterStartBuffer[i];
1676 return length;
1677 }
1678
1679 #endif // !ICU_UNICODE
1680
1681 // --------
1682
rangeLength(const Range * r,bool forSelectionPreservation)1683 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation)
1684 {
1685 int length = 0;
1686 for (TextIterator it(r, forSelectionPreservation); !it.atEnd(); it.advance())
1687 length += it.length();
1688
1689 return length;
1690 }
1691
subrange(Range * entireRange,int characterOffset,int characterCount)1692 PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount)
1693 {
1694 CharacterIterator entireRangeIterator(entireRange);
1695 return characterSubrange(entireRangeIterator, characterOffset, characterCount);
1696 }
1697
rangeFromLocationAndLength(Element * scope,int rangeLocation,int rangeLength,bool forSelectionPreservation)1698 PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(Element* scope, int rangeLocation, int rangeLength, bool forSelectionPreservation)
1699 {
1700 RefPtr<Range> resultRange = scope->document()->createRange();
1701
1702 int docTextPosition = 0;
1703 int rangeEnd = rangeLocation + rangeLength;
1704 bool startRangeFound = false;
1705
1706 RefPtr<Range> textRunRange;
1707
1708 TextIterator it(rangeOfContents(scope).get(), forSelectionPreservation);
1709
1710 // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>.
1711 if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) {
1712 textRunRange = it.range();
1713
1714 ExceptionCode ec = 0;
1715 resultRange->setStart(textRunRange->startContainer(), 0, ec);
1716 ASSERT(!ec);
1717 resultRange->setEnd(textRunRange->startContainer(), 0, ec);
1718 ASSERT(!ec);
1719
1720 return resultRange.release();
1721 }
1722
1723 for (; !it.atEnd(); it.advance()) {
1724 int len = it.length();
1725 textRunRange = it.range();
1726
1727 bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + len;
1728 bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + len;
1729
1730 // Fix textRunRange->endPosition(), but only if foundStart || foundEnd, because it is only
1731 // in those cases that textRunRange is used.
1732 if (foundStart || foundEnd) {
1733 // FIXME: This is a workaround for the fact that the end of a run is often at the wrong
1734 // position for emitted '\n's.
1735 if (len == 1 && it.characters()[0] == '\n') {
1736 Position runStart = textRunRange->startPosition();
1737 Position runEnd = VisiblePosition(runStart).next().deepEquivalent();
1738 if (runEnd.isNotNull()) {
1739 ExceptionCode ec = 0;
1740 textRunRange->setEnd(runEnd.node(), runEnd.deprecatedEditingOffset(), ec);
1741 ASSERT(!ec);
1742 }
1743 }
1744 }
1745
1746 if (foundStart) {
1747 startRangeFound = true;
1748 int exception = 0;
1749 if (textRunRange->startContainer()->isTextNode()) {
1750 int offset = rangeLocation - docTextPosition;
1751 resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception);
1752 } else {
1753 if (rangeLocation == docTextPosition)
1754 resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset(), exception);
1755 else
1756 resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset(), exception);
1757 }
1758 }
1759
1760 if (foundEnd) {
1761 int exception = 0;
1762 if (textRunRange->startContainer()->isTextNode()) {
1763 int offset = rangeEnd - docTextPosition;
1764 resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception);
1765 } else {
1766 if (rangeEnd == docTextPosition)
1767 resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset(), exception);
1768 else
1769 resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception);
1770 }
1771 docTextPosition += len;
1772 break;
1773 }
1774 docTextPosition += len;
1775 }
1776
1777 if (!startRangeFound)
1778 return 0;
1779
1780 if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds
1781 int exception = 0;
1782 resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception);
1783 }
1784
1785 return resultRange.release();
1786 }
1787
1788 // --------
1789
plainTextToMallocAllocatedBuffer(const Range * r,unsigned & bufferLength,bool isDisplayString)1790 UChar* plainTextToMallocAllocatedBuffer(const Range* r, unsigned& bufferLength, bool isDisplayString)
1791 {
1792 UChar* result = 0;
1793
1794 // Do this in pieces to avoid massive reallocations if there is a large amount of text.
1795 // Use system malloc for buffers since they can consume lots of memory and current TCMalloc is unable return it back to OS.
1796 static const unsigned cMaxSegmentSize = 1 << 16;
1797 bufferLength = 0;
1798 typedef pair<UChar*, unsigned> TextSegment;
1799 Vector<TextSegment>* textSegments = 0;
1800 Vector<UChar> textBuffer;
1801 textBuffer.reserveInitialCapacity(cMaxSegmentSize);
1802 for (TextIterator it(r); !it.atEnd(); it.advance()) {
1803 if (textBuffer.size() && textBuffer.size() + it.length() > cMaxSegmentSize) {
1804 UChar* newSegmentBuffer = static_cast<UChar*>(malloc(textBuffer.size() * sizeof(UChar)));
1805 if (!newSegmentBuffer)
1806 goto exit;
1807 memcpy(newSegmentBuffer, textBuffer.data(), textBuffer.size() * sizeof(UChar));
1808 if (!textSegments)
1809 textSegments = new Vector<TextSegment>;
1810 textSegments->append(make_pair(newSegmentBuffer, (unsigned)textBuffer.size()));
1811 textBuffer.clear();
1812 }
1813 textBuffer.append(it.characters(), it.length());
1814 bufferLength += it.length();
1815 }
1816
1817 if (!bufferLength)
1818 return 0;
1819
1820 // Since we know the size now, we can make a single buffer out of the pieces with one big alloc
1821 result = static_cast<UChar*>(malloc(bufferLength * sizeof(UChar)));
1822 if (!result)
1823 goto exit;
1824
1825 {
1826 UChar* resultPos = result;
1827 if (textSegments) {
1828 unsigned size = textSegments->size();
1829 for (unsigned i = 0; i < size; ++i) {
1830 const TextSegment& segment = textSegments->at(i);
1831 memcpy(resultPos, segment.first, segment.second * sizeof(UChar));
1832 resultPos += segment.second;
1833 }
1834 }
1835 memcpy(resultPos, textBuffer.data(), textBuffer.size() * sizeof(UChar));
1836 }
1837
1838 exit:
1839 if (textSegments) {
1840 unsigned size = textSegments->size();
1841 for (unsigned i = 0; i < size; ++i)
1842 free(textSegments->at(i).first);
1843 delete textSegments;
1844 }
1845
1846 if (isDisplayString && r->ownerDocument())
1847 r->ownerDocument()->displayBufferModifiedByEncoding(result, bufferLength);
1848
1849 return result;
1850 }
1851
plainText(const Range * r)1852 String plainText(const Range* r)
1853 {
1854 unsigned length;
1855 UChar* buf = plainTextToMallocAllocatedBuffer(r, length, false);
1856 if (!buf)
1857 return "";
1858 String result(buf, length);
1859 free(buf);
1860 return result;
1861 }
1862
isAllCollapsibleWhitespace(const String & string)1863 static inline bool isAllCollapsibleWhitespace(const String& string)
1864 {
1865 const UChar* characters = string.characters();
1866 unsigned length = string.length();
1867 for (unsigned i = 0; i < length; ++i) {
1868 if (!isCollapsibleWhitespace(characters[i]))
1869 return false;
1870 }
1871 return true;
1872 }
1873
collapsedToBoundary(const Range * range,bool forward)1874 static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward)
1875 {
1876 ExceptionCode ec = 0;
1877 RefPtr<Range> result = range->cloneRange(ec);
1878 ASSERT(!ec);
1879 result->collapse(!forward, ec);
1880 ASSERT(!ec);
1881 return result.release();
1882 }
1883
findPlainText(CharacterIterator & it,const String & target,bool forward,bool caseSensitive,size_t & matchStart)1884 static size_t findPlainText(CharacterIterator& it, const String& target, bool forward, bool caseSensitive, size_t& matchStart)
1885 {
1886 matchStart = 0;
1887 size_t matchLength = 0;
1888
1889 SearchBuffer buffer(target, caseSensitive);
1890
1891 while (!it.atEnd()) {
1892 it.advance(buffer.append(it.characters(), it.length()));
1893 tryAgain:
1894 size_t matchStartOffset;
1895 if (size_t newMatchLength = buffer.search(matchStartOffset)) {
1896 // Note that we found a match, and where we found it.
1897 size_t lastCharacterInBufferOffset = it.characterOffset();
1898 ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
1899 matchStart = lastCharacterInBufferOffset - matchStartOffset;
1900 matchLength = newMatchLength;
1901 // If searching forward, stop on the first match.
1902 // If searching backward, don't stop, so we end up with the last match.
1903 if (forward)
1904 break;
1905 goto tryAgain;
1906 }
1907 if (it.atBreak() && !buffer.atBreak()) {
1908 buffer.reachedBreak();
1909 goto tryAgain;
1910 }
1911 }
1912
1913 return matchLength;
1914 }
1915
findPlainText(const Range * range,const String & target,bool forward,bool caseSensitive)1916 PassRefPtr<Range> findPlainText(const Range* range, const String& target, bool forward, bool caseSensitive)
1917 {
1918 // First, find the text.
1919 size_t matchStart;
1920 size_t matchLength;
1921 {
1922 CharacterIterator findIterator(range, false, true);
1923 matchLength = findPlainText(findIterator, target, forward, caseSensitive, matchStart);
1924 if (!matchLength)
1925 return collapsedToBoundary(range, forward);
1926 }
1927
1928 // Then, find the document position of the start and the end of the text.
1929 CharacterIterator computeRangeIterator(range, false, true);
1930 return characterSubrange(computeRangeIterator, matchStart, matchLength);
1931 }
1932
1933 }
1934