1 /*
2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved.
3 * Copyright (C) 2005 Alexey Proskuryakov.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "core/editing/TextIterator.h"
29
30 #include "bindings/v8/ExceptionStatePlaceholder.h"
31 #include "core/HTMLNames.h"
32 #include "core/dom/Document.h"
33 #include "core/dom/NodeTraversal.h"
34 #include "core/dom/shadow/ShadowRoot.h"
35 #include "core/editing/VisiblePosition.h"
36 #include "core/editing/VisibleUnits.h"
37 #include "core/editing/htmlediting.h"
38 #include "core/html/HTMLElement.h"
39 #include "core/html/HTMLTextFormControlElement.h"
40 #include "core/rendering/InlineTextBox.h"
41 #include "core/rendering/RenderImage.h"
42 #include "core/rendering/RenderTableCell.h"
43 #include "core/rendering/RenderTableRow.h"
44 #include "core/rendering/RenderTextControl.h"
45 #include "core/rendering/RenderTextFragment.h"
46 #include "platform/fonts/Character.h"
47 #include "platform/fonts/Font.h"
48 #include "platform/text/TextBoundaries.h"
49 #include "platform/text/TextBreakIteratorInternalICU.h"
50 #include "platform/text/UnicodeUtilities.h"
51 #include "wtf/text/CString.h"
52 #include "wtf/text/StringBuilder.h"
53 #include "wtf/unicode/CharacterNames.h"
54 #include <unicode/usearch.h>
55 #include <unicode/utf16.h>
56
57 using namespace WTF::Unicode;
58
59 namespace WebCore {
60
61 using namespace HTMLNames;
62
63 // Buffer that knows how to compare with a search target.
64 // Keeps enough of the previous text to be able to search in the future, but no more.
65 // Non-breaking spaces are always equal to normal spaces.
66 // Case folding is also done if the CaseInsensitive option is specified.
67 // Matches are further filtered if the AtWordStarts option is specified, although some
68 // matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well.
69 class SearchBuffer {
70 WTF_MAKE_NONCOPYABLE(SearchBuffer);
71 public:
72 SearchBuffer(const String& target, FindOptions);
73 ~SearchBuffer();
74
75 // Returns number of characters appended; guaranteed to be in the range [1, length].
76 template<typename CharType>
77 void append(const CharType*, size_t length);
numberOfCharactersJustAppended() const78 size_t numberOfCharactersJustAppended() const { return m_numberOfCharactersJustAppended; }
79
80 bool needsMoreContext() const;
81 void prependContext(const UChar*, size_t length);
82 void reachedBreak();
83
84 // Result is the size in characters of what was found.
85 // And <startOffset> is the number of characters back to the start of what was found.
86 size_t search(size_t& startOffset);
87 bool atBreak() const;
88
89 private:
90 bool isBadMatch(const UChar*, size_t length) const;
91 bool isWordStartMatch(size_t start, size_t length) const;
92
93 Vector<UChar> m_target;
94 FindOptions m_options;
95
96 Vector<UChar> m_buffer;
97 size_t m_overlap;
98 size_t m_prefixLength;
99 size_t m_numberOfCharactersJustAppended;
100 bool m_atBreak;
101 bool m_needsMoreContext;
102
103 bool m_targetRequiresKanaWorkaround;
104 Vector<UChar> m_normalizedTarget;
105 mutable Vector<UChar> m_normalizedMatch;
106 };
107
108 // --------
109
110 static const unsigned bitsInWord = sizeof(unsigned) * 8;
111 static const unsigned bitInWordMask = bitsInWord - 1;
112
BitStack()113 BitStack::BitStack()
114 : m_size(0)
115 {
116 }
117
~BitStack()118 BitStack::~BitStack()
119 {
120 }
121
push(bool bit)122 void BitStack::push(bool bit)
123 {
124 unsigned index = m_size / bitsInWord;
125 unsigned shift = m_size & bitInWordMask;
126 if (!shift && index == m_words.size()) {
127 m_words.grow(index + 1);
128 m_words[index] = 0;
129 }
130 unsigned& word = m_words[index];
131 unsigned mask = 1U << shift;
132 if (bit)
133 word |= mask;
134 else
135 word &= ~mask;
136 ++m_size;
137 }
138
pop()139 void BitStack::pop()
140 {
141 if (m_size)
142 --m_size;
143 }
144
top() const145 bool BitStack::top() const
146 {
147 if (!m_size)
148 return false;
149 unsigned shift = (m_size - 1) & bitInWordMask;
150 return m_words.last() & (1U << shift);
151 }
152
size() const153 unsigned BitStack::size() const
154 {
155 return m_size;
156 }
157
158 // --------
159
160 #if ASSERT_ENABLED
161
depthCrossingShadowBoundaries(Node * node)162 static unsigned depthCrossingShadowBoundaries(Node* node)
163 {
164 unsigned depth = 0;
165 for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
166 ++depth;
167 return depth;
168 }
169
170 #endif
171
172 // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees.
nextInPreOrderCrossingShadowBoundaries(Node * rangeEndContainer,int rangeEndOffset)173 static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset)
174 {
175 if (!rangeEndContainer)
176 return 0;
177 if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) {
178 if (Node* next = rangeEndContainer->traverseToChildAt(rangeEndOffset))
179 return next;
180 }
181 for (Node* node = rangeEndContainer; node; node = node->parentOrShadowHostNode()) {
182 if (Node* next = node->nextSibling())
183 return next;
184 }
185 return 0;
186 }
187
188 // --------
189
fullyClipsContents(Node * node)190 static inline bool fullyClipsContents(Node* node)
191 {
192 RenderObject* renderer = node->renderer();
193 if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip())
194 return false;
195 return toRenderBox(renderer)->size().isEmpty();
196 }
197
ignoresContainerClip(Node * node)198 static inline bool ignoresContainerClip(Node* node)
199 {
200 RenderObject* renderer = node->renderer();
201 if (!renderer || renderer->isText())
202 return false;
203 return renderer->style()->hasOutOfFlowPosition();
204 }
205
pushFullyClippedState(BitStack & stack,Node * node)206 static void pushFullyClippedState(BitStack& stack, Node* node)
207 {
208 ASSERT(stack.size() == depthCrossingShadowBoundaries(node));
209
210 // FIXME: m_fullyClippedStack was added in response to <https://bugs.webkit.org/show_bug.cgi?id=26364>
211 // ("Search can find text that's hidden by overflow:hidden"), but the logic here will not work correctly if
212 // a shadow tree redistributes nodes. m_fullyClippedStack relies on the assumption that DOM node hierarchy matches
213 // the render tree, which is not necessarily true if there happens to be shadow DOM distribution or other mechanics
214 // that shuffle around the render objects regardless of node tree hierarchy (like CSS flexbox).
215 //
216 // A more appropriate way to handle this situation is to detect overflow:hidden blocks by using only rendering
217 // primitives, not with DOM primitives.
218
219 // Push true if this node full clips its contents, or if a parent already has fully
220 // clipped and this is not a node that ignores its container's clip.
221 stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node)));
222 }
223
setUpFullyClippedStack(BitStack & stack,Node * node)224 static void setUpFullyClippedStack(BitStack& stack, Node* node)
225 {
226 // Put the nodes in a vector so we can iterate in reverse order.
227 Vector<Node*, 100> ancestry;
228 for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
229 ancestry.append(parent);
230
231 // Call pushFullyClippedState on each node starting with the earliest ancestor.
232 size_t size = ancestry.size();
233 for (size_t i = 0; i < size; ++i)
234 pushFullyClippedState(stack, ancestry[size - i - 1]);
235 pushFullyClippedState(stack, node);
236
237 ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node));
238 }
239
240 // --------
241
TextIterator(const Range * range,TextIteratorBehaviorFlags behavior)242 TextIterator::TextIterator(const Range* range, TextIteratorBehaviorFlags behavior)
243 : m_startContainer(nullptr)
244 , m_startOffset(0)
245 , m_endContainer(nullptr)
246 , m_endOffset(0)
247 , m_positionNode(nullptr)
248 , m_textLength(0)
249 , m_needsAnotherNewline(false)
250 , m_textBox(0)
251 , m_remainingTextBox(0)
252 , m_firstLetterText(0)
253 , m_lastTextNode(nullptr)
254 , m_lastTextNodeEndedWithCollapsedSpace(false)
255 , m_lastCharacter(0)
256 , m_sortedTextBoxesPosition(0)
257 , m_hasEmitted(false)
258 , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)
259 , m_entersTextControls(behavior & TextIteratorEntersTextControls)
260 , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText)
261 , m_handledFirstLetter(false)
262 , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility)
263 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
264 , m_shouldStop(false)
265 , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText)
266 , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots)
267 , m_emitsObjectReplacementCharacter(behavior & TextIteratorEmitsObjectReplacementCharacter)
268 {
269 if (range)
270 initialize(range->startPosition(), range->endPosition());
271 }
272
TextIterator(const Position & start,const Position & end,TextIteratorBehaviorFlags behavior)273 TextIterator::TextIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
274 : m_startContainer(nullptr)
275 , m_startOffset(0)
276 , m_endContainer(nullptr)
277 , m_endOffset(0)
278 , m_positionNode(nullptr)
279 , m_textLength(0)
280 , m_needsAnotherNewline(false)
281 , m_textBox(0)
282 , m_remainingTextBox(0)
283 , m_firstLetterText(0)
284 , m_lastTextNode(nullptr)
285 , m_lastTextNodeEndedWithCollapsedSpace(false)
286 , m_lastCharacter(0)
287 , m_sortedTextBoxesPosition(0)
288 , m_hasEmitted(false)
289 , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)
290 , m_entersTextControls(behavior & TextIteratorEntersTextControls)
291 , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText)
292 , m_handledFirstLetter(false)
293 , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility)
294 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
295 , m_shouldStop(false)
296 , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText)
297 , m_entersAuthorShadowRoots(behavior & TextIteratorEntersAuthorShadowRoots)
298 , m_emitsObjectReplacementCharacter(behavior & TextIteratorEmitsObjectReplacementCharacter)
299 {
300 initialize(start, end);
301 }
302
initialize(const Position & start,const Position & end)303 void TextIterator::initialize(const Position& start, const Position& end)
304 {
305 ASSERT(comparePositions(start, end) <= 0);
306
307 // Get and validate |start| and |end|.
308 Node* startContainer = start.containerNode();
309 if (!startContainer)
310 return;
311 int startOffset = start.computeOffsetInContainerNode();
312 Node* endContainer = end.containerNode();
313 if (!endContainer)
314 return;
315 int endOffset = end.computeOffsetInContainerNode();
316
317 // Remember the range - this does not change.
318 m_startContainer = startContainer;
319 m_startOffset = startOffset;
320 m_endContainer = endContainer;
321 m_endOffset = endOffset;
322
323 // Figure out the initial value of m_shadowDepth: the depth of startContainer's tree scope from
324 // the common ancestor tree scope.
325 const TreeScope* commonAncestorTreeScope = startContainer->treeScope().commonAncestorTreeScope(endContainer->treeScope());
326 ASSERT(commonAncestorTreeScope);
327 m_shadowDepth = 0;
328 for (const TreeScope* treeScope = &startContainer->treeScope(); treeScope != commonAncestorTreeScope; treeScope = treeScope->parentTreeScope())
329 ++m_shadowDepth;
330
331 // Set up the current node for processing.
332 if (startContainer->offsetInCharacters())
333 m_node = startContainer;
334 else if (Node* child = startContainer->traverseToChildAt(startOffset))
335 m_node = child;
336 else if (!startOffset)
337 m_node = startContainer;
338 else
339 m_node = NodeTraversal::nextSkippingChildren(*startContainer);
340
341 if (!m_node)
342 return;
343
344 setUpFullyClippedStack(m_fullyClippedStack, m_node);
345 m_offset = m_node == m_startContainer ? m_startOffset : 0;
346 m_iterationProgress = HandledNone;
347
348 // Calculate first out of bounds node.
349 m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset);
350
351 // Identify the first run.
352 advance();
353 }
354
~TextIterator()355 TextIterator::~TextIterator()
356 {
357 }
358
advance()359 void TextIterator::advance()
360 {
361 if (m_shouldStop)
362 return;
363
364 // reset the run information
365 m_positionNode = nullptr;
366 m_textLength = 0;
367
368 // handle remembered node that needed a newline after the text node's newline
369 if (m_needsAnotherNewline) {
370 // Emit the extra newline, and position it *inside* m_node, after m_node's
371 // contents, in case it's a block, in the same way that we position the first
372 // newline. The range for the emitted newline should start where the line
373 // break begins.
374 // FIXME: It would be cleaner if we emitted two newlines during the last
375 // iteration, instead of using m_needsAnotherNewline.
376 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node.get();
377 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
378 m_needsAnotherNewline = false;
379 return;
380 }
381
382 if (!m_textBox && m_remainingTextBox) {
383 m_textBox = m_remainingTextBox;
384 m_remainingTextBox = 0;
385 m_firstLetterText = 0;
386 m_offset = 0;
387 }
388 // handle remembered text box
389 if (m_textBox) {
390 handleTextBox();
391 if (m_positionNode)
392 return;
393 }
394
395 while (m_node && (m_node != m_pastEndNode || m_shadowDepth > 0)) {
396 if (!m_shouldStop && m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node))
397 m_shouldStop = true;
398
399 // if the range ends at offset 0 of an element, represent the
400 // position, but not the content, of that element e.g. if the
401 // node is a blockflow element, emit a newline that
402 // precedes the element
403 if (m_node == m_endContainer && !m_endOffset) {
404 representNodeOffsetZero();
405 m_node = nullptr;
406 return;
407 }
408
409 RenderObject* renderer = m_node->renderer();
410 if (!renderer) {
411 if (m_node->isShadowRoot()) {
412 // A shadow root doesn't have a renderer, but we want to visit children anyway.
413 m_iterationProgress = m_iterationProgress < HandledNode ? HandledNode : m_iterationProgress;
414 } else {
415 m_iterationProgress = HandledChildren;
416 }
417 } else {
418 // Enter author shadow roots, from youngest, if any and if necessary.
419 if (m_iterationProgress < HandledAuthorShadowRoots) {
420 if (m_entersAuthorShadowRoots && m_node->isElementNode() && toElement(m_node)->hasAuthorShadowRoot()) {
421 ShadowRoot* youngestShadowRoot = toElement(m_node)->shadowRoot();
422 ASSERT(youngestShadowRoot->type() == ShadowRoot::AuthorShadowRoot);
423 m_node = youngestShadowRoot;
424 m_iterationProgress = HandledNone;
425 ++m_shadowDepth;
426 pushFullyClippedState(m_fullyClippedStack, m_node);
427 continue;
428 }
429
430 m_iterationProgress = HandledAuthorShadowRoots;
431 }
432
433 // Enter user-agent shadow root, if necessary.
434 if (m_iterationProgress < HandledUserAgentShadowRoot) {
435 if (m_entersTextControls && renderer->isTextControl()) {
436 ShadowRoot* userAgentShadowRoot = toElement(m_node)->userAgentShadowRoot();
437 ASSERT(userAgentShadowRoot->type() == ShadowRoot::UserAgentShadowRoot);
438 m_node = userAgentShadowRoot;
439 m_iterationProgress = HandledNone;
440 ++m_shadowDepth;
441 pushFullyClippedState(m_fullyClippedStack, m_node);
442 continue;
443 }
444 m_iterationProgress = HandledUserAgentShadowRoot;
445 }
446
447 // Handle the current node according to its type.
448 if (m_iterationProgress < HandledNode) {
449 bool handledNode = false;
450 if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { // FIXME: What about CDATA_SECTION_NODE?
451 handledNode = handleTextNode();
452 } else if (renderer && (renderer->isImage() || renderer->isWidget()
453 || (m_node && m_node->isElementNode()
454 && (toElement(m_node)->isFormControlElement()
455 || isHTMLLegendElement(toElement(*m_node))
456 || isHTMLMeterElement(toElement(*m_node))
457 || isHTMLProgressElement(toElement(*m_node)))))) {
458 handledNode = handleReplacedElement();
459 } else {
460 handledNode = handleNonTextNode();
461 }
462 if (handledNode)
463 m_iterationProgress = HandledNode;
464 if (m_positionNode)
465 return;
466 }
467 }
468
469 // Find a new current node to handle in depth-first manner,
470 // calling exitNode() as we come back thru a parent node.
471 //
472 // 1. Iterate over child nodes, if we haven't done yet.
473 Node* next = m_iterationProgress < HandledChildren ? m_node->firstChild() : 0;
474 m_offset = 0;
475 if (!next) {
476 // 2. If we've already iterated children or they are not available, go to the next sibling node.
477 next = m_node->nextSibling();
478 if (!next) {
479 // 3. If we are at the last child, go up the node tree until we find a next sibling.
480 bool pastEnd = NodeTraversal::next(*m_node) == m_pastEndNode;
481 Node* parentNode = m_node->parentNode();
482 while (!next && parentNode) {
483 if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode))
484 return;
485 bool haveRenderer = m_node->renderer();
486 m_node = parentNode;
487 m_fullyClippedStack.pop();
488 parentNode = m_node->parentNode();
489 if (haveRenderer)
490 exitNode();
491 if (m_positionNode) {
492 m_iterationProgress = HandledChildren;
493 return;
494 }
495 next = m_node->nextSibling();
496 }
497
498 if (!next && !parentNode && m_shadowDepth > 0) {
499 // 4. Reached the top of a shadow root. If it's created by author, then try to visit the next
500 // sibling shadow root, if any.
501 ShadowRoot* shadowRoot = toShadowRoot(m_node);
502 if (shadowRoot->type() == ShadowRoot::AuthorShadowRoot) {
503 ShadowRoot* nextShadowRoot = shadowRoot->olderShadowRoot();
504 if (nextShadowRoot && nextShadowRoot->type() == ShadowRoot::AuthorShadowRoot) {
505 m_fullyClippedStack.pop();
506 m_node = nextShadowRoot;
507 m_iterationProgress = HandledNone;
508 // m_shadowDepth is unchanged since we exit from a shadow root and enter another.
509 pushFullyClippedState(m_fullyClippedStack, m_node);
510 } else {
511 // We are the last shadow root; exit from here and go back to where we were.
512 m_node = shadowRoot->host();
513 m_iterationProgress = HandledAuthorShadowRoots;
514 --m_shadowDepth;
515 m_fullyClippedStack.pop();
516 }
517 } else {
518 // If we are in a user-agent shadow root, then go back to the host.
519 ASSERT(shadowRoot->type() == ShadowRoot::UserAgentShadowRoot);
520 m_node = shadowRoot->host();
521 m_iterationProgress = HandledUserAgentShadowRoot;
522 --m_shadowDepth;
523 m_fullyClippedStack.pop();
524 }
525 m_handledFirstLetter = false;
526 m_firstLetterText = 0;
527 continue;
528 }
529 }
530 m_fullyClippedStack.pop();
531 }
532
533 // set the new current node
534 m_node = next;
535 if (m_node)
536 pushFullyClippedState(m_fullyClippedStack, m_node);
537 m_iterationProgress = HandledNone;
538 m_handledFirstLetter = false;
539 m_firstLetterText = 0;
540
541 // how would this ever be?
542 if (m_positionNode)
543 return;
544 }
545 }
546
characterAt(unsigned index) const547 UChar TextIterator::characterAt(unsigned index) const
548 {
549 ASSERT_WITH_SECURITY_IMPLICATION(index < static_cast<unsigned>(length()));
550 if (!(index < static_cast<unsigned>(length())))
551 return 0;
552
553 if (m_singleCharacterBuffer) {
554 ASSERT(!index);
555 ASSERT(length() == 1);
556 return m_singleCharacterBuffer;
557 }
558
559 return string()[startOffset() + index];
560 }
561
substring(unsigned position,unsigned length) const562 String TextIterator::substring(unsigned position, unsigned length) const
563 {
564 ASSERT_WITH_SECURITY_IMPLICATION(position <= static_cast<unsigned>(this->length()));
565 ASSERT_WITH_SECURITY_IMPLICATION(position + length <= static_cast<unsigned>(this->length()));
566 if (!length)
567 return emptyString();
568 if (m_singleCharacterBuffer) {
569 ASSERT(!position);
570 ASSERT(length == 1);
571 return String(&m_singleCharacterBuffer, 1);
572 }
573 return string().substring(startOffset() + position, length);
574 }
575
appendTextToStringBuilder(StringBuilder & builder,unsigned position,unsigned maxLength) const576 void TextIterator::appendTextToStringBuilder(StringBuilder& builder, unsigned position, unsigned maxLength) const
577 {
578 unsigned lengthToAppend = std::min(static_cast<unsigned>(length()) - position, maxLength);
579 if (!lengthToAppend)
580 return;
581 if (m_singleCharacterBuffer) {
582 ASSERT(!position);
583 builder.append(m_singleCharacterBuffer);
584 } else {
585 builder.append(string(), startOffset() + position, lengthToAppend);
586 }
587 }
588
handleTextNode()589 bool TextIterator::handleTextNode()
590 {
591 if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility)
592 return false;
593
594 RenderText* renderer = toRenderText(m_node->renderer());
595
596 m_lastTextNode = m_node;
597 String str = renderer->text();
598
599 // handle pre-formatted text
600 if (!renderer->style()->collapseWhiteSpace()) {
601 int runStart = m_offset;
602 if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) {
603 emitCharacter(' ', m_node, 0, runStart, runStart);
604 return false;
605 }
606 if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset) {
607 handleTextNodeFirstLetter(toRenderTextFragment(renderer));
608 if (m_firstLetterText) {
609 String firstLetter = m_firstLetterText->text();
610 emitText(m_node, m_firstLetterText, m_offset, m_offset + firstLetter.length());
611 m_firstLetterText = 0;
612 m_textBox = 0;
613 return false;
614 }
615 }
616 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
617 return false;
618 int strLength = str.length();
619 int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX;
620 int runEnd = std::min(strLength, end);
621
622 if (runStart >= runEnd)
623 return true;
624
625 emitText(m_node, runStart, runEnd);
626 return true;
627 }
628
629 if (renderer->firstTextBox())
630 m_textBox = renderer->firstTextBox();
631
632 bool shouldHandleFirstLetter = !m_handledFirstLetter && renderer->isTextFragment() && !m_offset;
633 if (shouldHandleFirstLetter)
634 handleTextNodeFirstLetter(toRenderTextFragment(renderer));
635
636 if (!renderer->firstTextBox() && str.length() > 0 && !shouldHandleFirstLetter) {
637 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
638 return false;
639 m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space
640 return true;
641 }
642
643 if (m_firstLetterText)
644 renderer = m_firstLetterText;
645
646 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
647 if (renderer->containsReversedText()) {
648 m_sortedTextBoxes.clear();
649 for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) {
650 m_sortedTextBoxes.append(textBox);
651 }
652 std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart);
653 m_sortedTextBoxesPosition = 0;
654 m_textBox = m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0];
655 }
656
657 handleTextBox();
658 return true;
659 }
660
handleTextBox()661 void TextIterator::handleTextBox()
662 {
663 RenderText* renderer = m_firstLetterText ? m_firstLetterText : toRenderText(m_node->renderer());
664 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) {
665 m_textBox = 0;
666 return;
667 }
668 String str = renderer->text();
669 unsigned start = m_offset;
670 unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : INT_MAX;
671 while (m_textBox) {
672 unsigned textBoxStart = m_textBox->start();
673 unsigned runStart = std::max(textBoxStart, start);
674
675 // Check for collapsed space at the start of this run.
676 InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox();
677 bool needSpace = m_lastTextNodeEndedWithCollapsedSpace
678 || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0);
679 if (needSpace && !renderer->style()->isCollapsibleWhiteSpace(m_lastCharacter) && m_lastCharacter) {
680 if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') {
681 unsigned spaceRunStart = runStart - 1;
682 while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ')
683 --spaceRunStart;
684 emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1);
685 } else {
686 emitCharacter(' ', m_node, 0, runStart, runStart);
687 }
688 return;
689 }
690 unsigned textBoxEnd = textBoxStart + m_textBox->len();
691 unsigned runEnd = std::min(textBoxEnd, end);
692
693 // Determine what the next text box will be, but don't advance yet
694 InlineTextBox* nextTextBox = 0;
695 if (renderer->containsReversedText()) {
696 if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size())
697 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1];
698 } else {
699 nextTextBox = m_textBox->nextTextBox();
700 }
701 ASSERT(!nextTextBox || nextTextBox->renderer() == renderer);
702
703 if (runStart < runEnd) {
704 // Handle either a single newline character (which becomes a space),
705 // or a run of characters that does not include a newline.
706 // This effectively translates newlines to spaces without copying the text.
707 if (str[runStart] == '\n') {
708 emitCharacter(' ', m_node, 0, runStart, runStart + 1);
709 m_offset = runStart + 1;
710 } else {
711 size_t subrunEnd = str.find('\n', runStart);
712 if (subrunEnd == kNotFound || subrunEnd > runEnd)
713 subrunEnd = runEnd;
714
715 m_offset = subrunEnd;
716 emitText(m_node, renderer, runStart, subrunEnd);
717 }
718
719 // If we are doing a subrun that doesn't go to the end of the text box,
720 // come back again to finish handling this text box; don't advance to the next one.
721 if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd)
722 return;
723
724 // Advance and return
725 unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length();
726 if (nextRunStart > runEnd)
727 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end
728 m_textBox = nextTextBox;
729 if (renderer->containsReversedText())
730 ++m_sortedTextBoxesPosition;
731 return;
732 }
733 // Advance and continue
734 m_textBox = nextTextBox;
735 if (renderer->containsReversedText())
736 ++m_sortedTextBoxesPosition;
737 }
738 if (!m_textBox && m_remainingTextBox) {
739 m_textBox = m_remainingTextBox;
740 m_remainingTextBox = 0;
741 m_firstLetterText = 0;
742 m_offset = 0;
743 handleTextBox();
744 }
745 }
746
firstRenderTextInFirstLetter(RenderBoxModelObject * firstLetter)747 static inline RenderText* firstRenderTextInFirstLetter(RenderBoxModelObject* firstLetter)
748 {
749 if (!firstLetter)
750 return 0;
751
752 // FIXME: Should this check descendent objects?
753 for (RenderObject* current = firstLetter->slowFirstChild(); current; current = current->nextSibling()) {
754 if (current->isText())
755 return toRenderText(current);
756 }
757 return 0;
758 }
759
handleTextNodeFirstLetter(RenderTextFragment * renderer)760 void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer)
761 {
762 if (renderer->firstLetter()) {
763 RenderBoxModelObject* r = renderer->firstLetter();
764 if (r->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
765 return;
766 if (RenderText* firstLetter = firstRenderTextInFirstLetter(r)) {
767 m_handledFirstLetter = true;
768 m_remainingTextBox = m_textBox;
769 m_textBox = firstLetter->firstTextBox();
770 m_sortedTextBoxes.clear();
771 m_firstLetterText = firstLetter;
772 }
773 }
774 m_handledFirstLetter = true;
775 }
776
handleReplacedElement()777 bool TextIterator::handleReplacedElement()
778 {
779 if (m_fullyClippedStack.top())
780 return false;
781
782 RenderObject* renderer = m_node->renderer();
783 if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
784 return false;
785
786 if (m_emitsObjectReplacementCharacter) {
787 emitCharacter(objectReplacementCharacter, m_node->parentNode(), m_node, 0, 1);
788 return true;
789 }
790
791 if (m_lastTextNodeEndedWithCollapsedSpace) {
792 emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1);
793 return false;
794 }
795
796 if (m_entersTextControls && renderer->isTextControl()) {
797 // The shadow tree should be already visited.
798 return true;
799 }
800
801 m_hasEmitted = true;
802
803 if (m_emitsCharactersBetweenAllVisiblePositions) {
804 // We want replaced elements to behave like punctuation for boundary
805 // finding, and to simply take up space for the selection preservation
806 // code in moveParagraphs, so we use a comma.
807 emitCharacter(',', m_node->parentNode(), m_node, 0, 1);
808 return true;
809 }
810
811 m_positionNode = m_node->parentNode();
812 m_positionOffsetBaseNode = m_node;
813 m_positionStartOffset = 0;
814 m_positionEndOffset = 1;
815 m_singleCharacterBuffer = 0;
816
817 if (m_emitsImageAltText && renderer->isImage() && renderer->isRenderImage()) {
818 m_text = toRenderImage(renderer)->altText();
819 if (!m_text.isEmpty()) {
820 m_textLength = m_text.length();
821 m_lastCharacter = m_text[m_textLength - 1];
822 return true;
823 }
824 }
825
826 m_textLength = 0;
827 m_lastCharacter = 0;
828
829 return true;
830 }
831
hasVisibleTextNode(RenderText * renderer)832 bool TextIterator::hasVisibleTextNode(RenderText* renderer)
833 {
834 if (renderer->style()->visibility() == VISIBLE)
835 return true;
836 if (renderer->isTextFragment()) {
837 RenderTextFragment* fragment = toRenderTextFragment(renderer);
838 if (fragment->firstLetter() && fragment->firstLetter()->style()->visibility() == VISIBLE)
839 return true;
840 }
841 return false;
842 }
843
shouldEmitTabBeforeNode(Node * node)844 static bool shouldEmitTabBeforeNode(Node* node)
845 {
846 RenderObject* r = node->renderer();
847
848 // Table cells are delimited by tabs.
849 if (!r || !isTableCell(node))
850 return false;
851
852 // Want a tab before every cell other than the first one
853 RenderTableCell* rc = toRenderTableCell(r);
854 RenderTable* t = rc->table();
855 return t && (t->cellBefore(rc) || t->cellAbove(rc));
856 }
857
shouldEmitNewlineForNode(Node * node,bool emitsOriginalText)858 static bool shouldEmitNewlineForNode(Node* node, bool emitsOriginalText)
859 {
860 RenderObject* renderer = node->renderer();
861
862 if (renderer ? !renderer->isBR() : !isHTMLBRElement(node))
863 return false;
864 return emitsOriginalText || !(node->isInShadowTree() && isHTMLInputElement(*node->shadowHost()));
865 }
866
shouldEmitNewlinesBeforeAndAfterNode(Node & node)867 static bool shouldEmitNewlinesBeforeAndAfterNode(Node& node)
868 {
869 // Block flow (versus inline flow) is represented by having
870 // a newline both before and after the element.
871 RenderObject* r = node.renderer();
872 if (!r) {
873 return (node.hasTagName(blockquoteTag)
874 || node.hasTagName(ddTag)
875 || node.hasTagName(divTag)
876 || node.hasTagName(dlTag)
877 || node.hasTagName(dtTag)
878 || node.hasTagName(h1Tag)
879 || node.hasTagName(h2Tag)
880 || node.hasTagName(h3Tag)
881 || node.hasTagName(h4Tag)
882 || node.hasTagName(h5Tag)
883 || node.hasTagName(h6Tag)
884 || node.hasTagName(hrTag)
885 || node.hasTagName(liTag)
886 || node.hasTagName(listingTag)
887 || node.hasTagName(olTag)
888 || node.hasTagName(pTag)
889 || node.hasTagName(preTag)
890 || node.hasTagName(trTag)
891 || node.hasTagName(ulTag));
892 }
893
894 // Need to make an exception for table cells, because they are blocks, but we
895 // want them tab-delimited rather than having newlines before and after.
896 if (isTableCell(&node))
897 return false;
898
899 // Need to make an exception for table row elements, because they are neither
900 // "inline" or "RenderBlock", but we want newlines for them.
901 if (r->isTableRow()) {
902 RenderTable* t = toRenderTableRow(r)->table();
903 if (t && !t->isInline())
904 return true;
905 }
906
907 return !r->isInline() && r->isRenderBlock()
908 && !r->isFloatingOrOutOfFlowPositioned() && !r->isBody() && !r->isRubyText();
909 }
910
shouldEmitNewlineAfterNode(Node & node)911 static bool shouldEmitNewlineAfterNode(Node& node)
912 {
913 // FIXME: It should be better but slower to create a VisiblePosition here.
914 if (!shouldEmitNewlinesBeforeAndAfterNode(node))
915 return false;
916 // Check if this is the very last renderer in the document.
917 // If so, then we should not emit a newline.
918 Node* next = &node;
919 while ((next = NodeTraversal::nextSkippingChildren(*next))) {
920 if (next->renderer())
921 return true;
922 }
923 return false;
924 }
925
shouldEmitNewlineBeforeNode(Node & node)926 static bool shouldEmitNewlineBeforeNode(Node& node)
927 {
928 return shouldEmitNewlinesBeforeAndAfterNode(node);
929 }
930
shouldEmitExtraNewlineForNode(Node * node)931 static bool shouldEmitExtraNewlineForNode(Node* node)
932 {
933 // When there is a significant collapsed bottom margin, emit an extra
934 // newline for a more realistic result. We end up getting the right
935 // result even without margin collapsing. For example: <div><p>text</p></div>
936 // will work right even if both the <div> and the <p> have bottom margins.
937 RenderObject* r = node->renderer();
938 if (!r || !r->isBox())
939 return false;
940
941 // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears
942 // not to do this at all
943 if (node->hasTagName(h1Tag)
944 || node->hasTagName(h2Tag)
945 || node->hasTagName(h3Tag)
946 || node->hasTagName(h4Tag)
947 || node->hasTagName(h5Tag)
948 || node->hasTagName(h6Tag)
949 || node->hasTagName(pTag)) {
950 RenderStyle* style = r->style();
951 if (style) {
952 int bottomMargin = toRenderBox(r)->collapsedMarginAfter();
953 int fontSize = style->fontDescription().computedPixelSize();
954 if (bottomMargin * 2 >= fontSize)
955 return true;
956 }
957 }
958
959 return false;
960 }
961
collapsedSpaceLength(RenderText * renderer,int textEnd)962 static int collapsedSpaceLength(RenderText* renderer, int textEnd)
963 {
964 const String& text = renderer->text();
965 int length = text.length();
966 for (int i = textEnd; i < length; ++i) {
967 if (!renderer->style()->isCollapsibleWhiteSpace(text[i]))
968 return i - textEnd;
969 }
970
971 return length - textEnd;
972 }
973
maxOffsetIncludingCollapsedSpaces(Node * node)974 static int maxOffsetIncludingCollapsedSpaces(Node* node)
975 {
976 int offset = caretMaxOffset(node);
977
978 if (node->renderer() && node->renderer()->isText())
979 offset += collapsedSpaceLength(toRenderText(node->renderer()), offset);
980
981 return offset;
982 }
983
984 // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic).
shouldRepresentNodeOffsetZero()985 bool TextIterator::shouldRepresentNodeOffsetZero()
986 {
987 if (m_emitsCharactersBetweenAllVisiblePositions && isRenderedTable(m_node))
988 return true;
989
990 // Leave element positioned flush with start of a paragraph
991 // (e.g. do not insert tab before a table cell at the start of a paragraph)
992 if (m_lastCharacter == '\n')
993 return false;
994
995 // Otherwise, show the position if we have emitted any characters
996 if (m_hasEmitted)
997 return true;
998
999 // We've not emitted anything yet. Generally, there is no need for any positioning then.
1000 // The only exception is when the element is visually not in the same line as
1001 // the start of the range (e.g. the range starts at the end of the previous paragraph).
1002 // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we
1003 // make quicker checks to possibly avoid that. Another check that we could make is
1004 // is whether the inline vs block flow changed since the previous visible element.
1005 // I think we're already in a special enough case that that won't be needed, tho.
1006
1007 // No character needed if this is the first node in the range.
1008 if (m_node == m_startContainer)
1009 return false;
1010
1011 // If we are outside the start container's subtree, assume we need to emit.
1012 // FIXME: m_startContainer could be an inline block
1013 if (!m_node->isDescendantOf(m_startContainer))
1014 return true;
1015
1016 // If we started as m_startContainer offset 0 and the current node is a descendant of
1017 // the start container, we already had enough context to correctly decide whether to
1018 // emit after a preceding block. We chose not to emit (m_hasEmitted is false),
1019 // so don't second guess that now.
1020 // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably
1021 // immaterial since we likely would have already emitted something by now.
1022 if (!m_startOffset)
1023 return false;
1024
1025 // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning.
1026 // Additionally, if the range we are iterating over contains huge sections of unrendered content,
1027 // we would create VisiblePositions on every call to this function without this check.
1028 if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE
1029 || (m_node->renderer()->isRenderBlockFlow() && !toRenderBlock(m_node->renderer())->height() && !isHTMLBodyElement(*m_node)))
1030 return false;
1031
1032 // The startPos.isNotNull() check is needed because the start could be before the body,
1033 // and in that case we'll get null. We don't want to put in newlines at the start in that case.
1034 // The currPos.isNotNull() check is needed because positions in non-HTML content
1035 // (like SVG) do not have visible positions, and we don't want to emit for them either.
1036 VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM);
1037 VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM);
1038 return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos);
1039 }
1040
shouldEmitSpaceBeforeAndAfterNode(Node * node)1041 bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node)
1042 {
1043 return isRenderedTable(node) && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions);
1044 }
1045
representNodeOffsetZero()1046 void TextIterator::representNodeOffsetZero()
1047 {
1048 // Emit a character to show the positioning of m_node.
1049
1050 // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can
1051 // create VisiblePositions, which is expensive. So, we perform the inexpensive checks
1052 // on m_node to see if it necessitates emitting a character first and will early return
1053 // before encountering shouldRepresentNodeOffsetZero()s worse case behavior.
1054 if (shouldEmitTabBeforeNode(m_node)) {
1055 if (shouldRepresentNodeOffsetZero())
1056 emitCharacter('\t', m_node->parentNode(), m_node, 0, 0);
1057 } else if (shouldEmitNewlineBeforeNode(*m_node)) {
1058 if (shouldRepresentNodeOffsetZero())
1059 emitCharacter('\n', m_node->parentNode(), m_node, 0, 0);
1060 } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) {
1061 if (shouldRepresentNodeOffsetZero())
1062 emitCharacter(' ', m_node->parentNode(), m_node, 0, 0);
1063 }
1064 }
1065
handleNonTextNode()1066 bool TextIterator::handleNonTextNode()
1067 {
1068 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText))
1069 emitCharacter('\n', m_node->parentNode(), m_node, 0, 1);
1070 else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR())
1071 emitCharacter(' ', m_node->parentNode(), m_node, 0, 1);
1072 else
1073 representNodeOffsetZero();
1074
1075 return true;
1076 }
1077
exitNode()1078 void TextIterator::exitNode()
1079 {
1080 // prevent emitting a newline when exiting a collapsed block at beginning of the range
1081 // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could
1082 // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and
1083 // therefore look like a blank line.
1084 if (!m_hasEmitted)
1085 return;
1086
1087 // Emit with a position *inside* m_node, after m_node's contents, in
1088 // case it is a block, because the run should start where the
1089 // emitted character is positioned visually.
1090 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node.get();
1091 // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making
1092 // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use
1093 // TextIterator in _web_attributedStringFromRange.
1094 // See <rdar://problem/5428427> for an example of how this mismatch will cause problems.
1095 if (m_lastTextNode && shouldEmitNewlineAfterNode(*m_node)) {
1096 // use extra newline to represent margin bottom, as needed
1097 bool addNewline = shouldEmitExtraNewlineForNode(m_node);
1098
1099 // FIXME: We need to emit a '\n' as we leave an empty block(s) that
1100 // contain a VisiblePosition when doing selection preservation.
1101 if (m_lastCharacter != '\n') {
1102 // insert a newline with a position following this block's contents.
1103 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
1104 // remember whether to later add a newline for the current node
1105 ASSERT(!m_needsAnotherNewline);
1106 m_needsAnotherNewline = addNewline;
1107 } else if (addNewline) {
1108 // insert a newline with a position following this block's contents.
1109 emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
1110 }
1111 }
1112
1113 // If nothing was emitted, see if we need to emit a space.
1114 if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node))
1115 emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1);
1116 }
1117
emitCharacter(UChar c,Node * textNode,Node * offsetBaseNode,int textStartOffset,int textEndOffset)1118 void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset)
1119 {
1120 m_hasEmitted = true;
1121
1122 // remember information with which to construct the TextIterator::range()
1123 // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode
1124 m_positionNode = textNode;
1125 m_positionOffsetBaseNode = offsetBaseNode;
1126 m_positionStartOffset = textStartOffset;
1127 m_positionEndOffset = textEndOffset;
1128
1129 // remember information with which to construct the TextIterator::characters() and length()
1130 m_singleCharacterBuffer = c;
1131 ASSERT(m_singleCharacterBuffer);
1132 m_textLength = 1;
1133
1134 // remember some iteration state
1135 m_lastTextNodeEndedWithCollapsedSpace = false;
1136 m_lastCharacter = c;
1137 }
1138
emitText(Node * textNode,RenderObject * renderObject,int textStartOffset,int textEndOffset)1139 void TextIterator::emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset)
1140 {
1141 RenderText* renderer = toRenderText(renderObject);
1142 m_text = m_emitsOriginalText ? renderer->originalText() : renderer->text();
1143 ASSERT(!m_text.isEmpty());
1144 ASSERT(0 <= textStartOffset && textStartOffset < static_cast<int>(m_text.length()));
1145 ASSERT(0 <= textEndOffset && textEndOffset <= static_cast<int>(m_text.length()));
1146 ASSERT(textStartOffset <= textEndOffset);
1147
1148 m_positionNode = textNode;
1149 m_positionOffsetBaseNode = nullptr;
1150 m_positionStartOffset = textStartOffset;
1151 m_positionEndOffset = textEndOffset;
1152 m_singleCharacterBuffer = 0;
1153 m_textLength = textEndOffset - textStartOffset;
1154 m_lastCharacter = m_text[textEndOffset - 1];
1155
1156 m_lastTextNodeEndedWithCollapsedSpace = false;
1157 m_hasEmitted = true;
1158 }
1159
emitText(Node * textNode,int textStartOffset,int textEndOffset)1160 void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset)
1161 {
1162 emitText(textNode, m_node->renderer(), textStartOffset, textEndOffset);
1163 }
1164
range() const1165 PassRefPtrWillBeRawPtr<Range> TextIterator::range() const
1166 {
1167 // use the current run information, if we have it
1168 if (m_positionNode) {
1169 if (m_positionOffsetBaseNode) {
1170 int index = m_positionOffsetBaseNode->nodeIndex();
1171 m_positionStartOffset += index;
1172 m_positionEndOffset += index;
1173 m_positionOffsetBaseNode = nullptr;
1174 }
1175 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
1176 }
1177
1178 // otherwise, return the end of the overall range we were given
1179 if (m_endContainer)
1180 return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset);
1181
1182 return nullptr;
1183 }
1184
node() const1185 Node* TextIterator::node() const
1186 {
1187 RefPtrWillBeRawPtr<Range> textRange = range();
1188 if (!textRange)
1189 return 0;
1190
1191 Node* node = textRange->startContainer();
1192 if (!node)
1193 return 0;
1194 if (node->offsetInCharacters())
1195 return node;
1196
1197 return node->traverseToChildAt(textRange->startOffset());
1198 }
1199
1200 // --------
1201
SimplifiedBackwardsTextIterator(const Range * r,TextIteratorBehaviorFlags behavior)1202 SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehaviorFlags behavior)
1203 : m_node(nullptr)
1204 , m_offset(0)
1205 , m_handledNode(false)
1206 , m_handledChildren(false)
1207 , m_startNode(nullptr)
1208 , m_startOffset(0)
1209 , m_endNode(nullptr)
1210 , m_endOffset(0)
1211 , m_positionNode(nullptr)
1212 , m_positionStartOffset(0)
1213 , m_positionEndOffset(0)
1214 , m_textOffset(0)
1215 , m_textLength(0)
1216 , m_lastTextNode(nullptr)
1217 , m_lastCharacter(0)
1218 , m_singleCharacterBuffer(0)
1219 , m_havePassedStartNode(false)
1220 , m_shouldHandleFirstLetter(false)
1221 , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
1222 , m_shouldStop(false)
1223 , m_emitsOriginalText(false)
1224 {
1225 ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls);
1226
1227 if (!r)
1228 return;
1229
1230 Node* startNode = r->startContainer();
1231 if (!startNode)
1232 return;
1233 Node* endNode = r->endContainer();
1234 int startOffset = r->startOffset();
1235 int endOffset = r->endOffset();
1236
1237 if (!startNode->offsetInCharacters() && startOffset >= 0) {
1238 // traverseToChildAt() will return 0 if the offset is out of range. We rely on this behavior
1239 // instead of calling countChildren() to avoid traversing the children twice.
1240 if (Node* childAtOffset = startNode->traverseToChildAt(startOffset)) {
1241 startNode = childAtOffset;
1242 startOffset = 0;
1243 }
1244 }
1245 if (!endNode->offsetInCharacters() && endOffset > 0) {
1246 // traverseToChildAt() will return 0 if the offset is out of range. We rely on this behavior
1247 // instead of calling countChildren() to avoid traversing the children twice.
1248 if (Node* childAtOffset = endNode->traverseToChildAt(endOffset - 1)) {
1249 endNode = childAtOffset;
1250 endOffset = lastOffsetInNode(endNode);
1251 }
1252 }
1253
1254 m_node = endNode;
1255 setUpFullyClippedStack(m_fullyClippedStack, m_node);
1256 m_offset = endOffset;
1257 m_handledNode = false;
1258 m_handledChildren = !endOffset;
1259
1260 m_startNode = startNode;
1261 m_startOffset = startOffset;
1262 m_endNode = endNode;
1263 m_endOffset = endOffset;
1264
1265 #ifndef NDEBUG
1266 // Need this just because of the assert.
1267 m_positionNode = endNode;
1268 #endif
1269
1270 m_lastTextNode = nullptr;
1271 m_lastCharacter = '\n';
1272
1273 m_havePassedStartNode = false;
1274
1275 advance();
1276 }
1277
advance()1278 void SimplifiedBackwardsTextIterator::advance()
1279 {
1280 ASSERT(m_positionNode);
1281
1282 if (m_shouldStop)
1283 return;
1284
1285 if (m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) {
1286 m_shouldStop = true;
1287 return;
1288 }
1289
1290 m_positionNode = nullptr;
1291 m_textLength = 0;
1292
1293 while (m_node && !m_havePassedStartNode) {
1294 // Don't handle node if we start iterating at [node, 0].
1295 if (!m_handledNode && !(m_node == m_endNode && !m_endOffset)) {
1296 RenderObject* renderer = m_node->renderer();
1297 if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) {
1298 // FIXME: What about CDATA_SECTION_NODE?
1299 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
1300 m_handledNode = handleTextNode();
1301 } else if (renderer && (renderer->isImage() || renderer->isWidget())) {
1302 if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
1303 m_handledNode = handleReplacedElement();
1304 } else {
1305 m_handledNode = handleNonTextNode();
1306 }
1307 if (m_positionNode)
1308 return;
1309 }
1310
1311 if (!m_handledChildren && m_node->hasChildren()) {
1312 m_node = m_node->lastChild();
1313 pushFullyClippedState(m_fullyClippedStack, m_node);
1314 } else {
1315 // Exit empty containers as we pass over them or containers
1316 // where [container, 0] is where we started iterating.
1317 if (!m_handledNode
1318 && canHaveChildrenForEditing(m_node)
1319 && m_node->parentNode()
1320 && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) {
1321 exitNode();
1322 if (m_positionNode) {
1323 m_handledNode = true;
1324 m_handledChildren = true;
1325 return;
1326 }
1327 }
1328
1329 // Exit all other containers.
1330 while (!m_node->previousSibling()) {
1331 if (!advanceRespectingRange(m_node->parentOrShadowHostNode()))
1332 break;
1333 m_fullyClippedStack.pop();
1334 exitNode();
1335 if (m_positionNode) {
1336 m_handledNode = true;
1337 m_handledChildren = true;
1338 return;
1339 }
1340 }
1341
1342 m_fullyClippedStack.pop();
1343 if (advanceRespectingRange(m_node->previousSibling()))
1344 pushFullyClippedState(m_fullyClippedStack, m_node);
1345 else
1346 m_node = nullptr;
1347 }
1348
1349 // For the purpose of word boundary detection,
1350 // we should iterate all visible text and trailing (collapsed) whitespaces.
1351 m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0;
1352 m_handledNode = false;
1353 m_handledChildren = false;
1354
1355 if (m_positionNode)
1356 return;
1357 }
1358 }
1359
handleTextNode()1360 bool SimplifiedBackwardsTextIterator::handleTextNode()
1361 {
1362 m_lastTextNode = m_node;
1363
1364 int startOffset;
1365 int offsetInNode;
1366 RenderText* renderer = handleFirstLetter(startOffset, offsetInNode);
1367 if (!renderer)
1368 return true;
1369
1370 String text = renderer->text();
1371 if (!renderer->firstTextBox() && text.length() > 0)
1372 return true;
1373
1374 m_positionEndOffset = m_offset;
1375 m_offset = startOffset + offsetInNode;
1376 m_positionNode = m_node;
1377 m_positionStartOffset = m_offset;
1378
1379 ASSERT(0 <= m_positionStartOffset - offsetInNode && m_positionStartOffset - offsetInNode <= static_cast<int>(text.length()));
1380 ASSERT(1 <= m_positionEndOffset - offsetInNode && m_positionEndOffset - offsetInNode <= static_cast<int>(text.length()));
1381 ASSERT(m_positionStartOffset <= m_positionEndOffset);
1382
1383 m_textLength = m_positionEndOffset - m_positionStartOffset;
1384 m_textOffset = m_positionStartOffset - offsetInNode;
1385 m_textContainer = text;
1386 m_singleCharacterBuffer = 0;
1387 RELEASE_ASSERT(static_cast<unsigned>(m_textOffset + m_textLength) <= text.length());
1388
1389 m_lastCharacter = text[m_positionEndOffset - 1];
1390
1391 return !m_shouldHandleFirstLetter;
1392 }
1393
handleFirstLetter(int & startOffset,int & offsetInNode)1394 RenderText* SimplifiedBackwardsTextIterator::handleFirstLetter(int& startOffset, int& offsetInNode)
1395 {
1396 RenderText* renderer = toRenderText(m_node->renderer());
1397 startOffset = (m_node == m_startNode) ? m_startOffset : 0;
1398
1399 if (!renderer->isTextFragment()) {
1400 offsetInNode = 0;
1401 return renderer;
1402 }
1403
1404 RenderTextFragment* fragment = toRenderTextFragment(renderer);
1405 int offsetAfterFirstLetter = fragment->start();
1406 if (startOffset >= offsetAfterFirstLetter) {
1407 ASSERT(!m_shouldHandleFirstLetter);
1408 offsetInNode = offsetAfterFirstLetter;
1409 return renderer;
1410 }
1411
1412 if (!m_shouldHandleFirstLetter && offsetAfterFirstLetter < m_offset) {
1413 m_shouldHandleFirstLetter = true;
1414 offsetInNode = offsetAfterFirstLetter;
1415 return renderer;
1416 }
1417
1418 m_shouldHandleFirstLetter = false;
1419 offsetInNode = 0;
1420 RenderText* firstLetterRenderer = firstRenderTextInFirstLetter(fragment->firstLetter());
1421
1422 m_offset = firstLetterRenderer->caretMaxOffset();
1423 m_offset += collapsedSpaceLength(firstLetterRenderer, m_offset);
1424
1425 return firstLetterRenderer;
1426 }
1427
handleReplacedElement()1428 bool SimplifiedBackwardsTextIterator::handleReplacedElement()
1429 {
1430 unsigned index = m_node->nodeIndex();
1431 // We want replaced elements to behave like punctuation for boundary
1432 // finding, and to simply take up space for the selection preservation
1433 // code in moveParagraphs, so we use a comma. Unconditionally emit
1434 // here because this iterator is only used for boundary finding.
1435 emitCharacter(',', m_node->parentNode(), index, index + 1);
1436 return true;
1437 }
1438
handleNonTextNode()1439 bool SimplifiedBackwardsTextIterator::handleNonTextNode()
1440 {
1441 // We can use a linefeed in place of a tab because this simple iterator is only used to
1442 // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs.
1443 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineAfterNode(*m_node) || shouldEmitTabBeforeNode(m_node)) {
1444 unsigned index = m_node->nodeIndex();
1445 // The start of this emitted range is wrong. Ensuring correctness would require
1446 // VisiblePositions and so would be slow. previousBoundary expects this.
1447 emitCharacter('\n', m_node->parentNode(), index + 1, index + 1);
1448 }
1449 return true;
1450 }
1451
exitNode()1452 void SimplifiedBackwardsTextIterator::exitNode()
1453 {
1454 if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineBeforeNode(*m_node) || shouldEmitTabBeforeNode(m_node)) {
1455 // The start of this emitted range is wrong. Ensuring correctness would require
1456 // VisiblePositions and so would be slow. previousBoundary expects this.
1457 emitCharacter('\n', m_node, 0, 0);
1458 }
1459 }
1460
emitCharacter(UChar c,Node * node,int startOffset,int endOffset)1461 void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset)
1462 {
1463 m_singleCharacterBuffer = c;
1464 m_positionNode = node;
1465 m_positionStartOffset = startOffset;
1466 m_positionEndOffset = endOffset;
1467 m_textOffset = 0;
1468 m_textLength = 1;
1469 m_lastCharacter = c;
1470 }
1471
advanceRespectingRange(Node * next)1472 bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next)
1473 {
1474 if (!next)
1475 return false;
1476 m_havePassedStartNode |= m_node == m_startNode;
1477 if (m_havePassedStartNode)
1478 return false;
1479 m_node = next;
1480 return true;
1481 }
1482
range() const1483 PassRefPtrWillBeRawPtr<Range> SimplifiedBackwardsTextIterator::range() const
1484 {
1485 if (m_positionNode)
1486 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
1487
1488 return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset);
1489 }
1490
1491 // --------
1492
CharacterIterator(const Range * range,TextIteratorBehaviorFlags behavior)1493 CharacterIterator::CharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior)
1494 : m_offset(0)
1495 , m_runOffset(0)
1496 , m_atBreak(true)
1497 , m_textIterator(range, behavior)
1498 {
1499 initialize();
1500 }
1501
CharacterIterator(const Position & start,const Position & end,TextIteratorBehaviorFlags behavior)1502 CharacterIterator::CharacterIterator(const Position& start, const Position& end, TextIteratorBehaviorFlags behavior)
1503 : m_offset(0)
1504 , m_runOffset(0)
1505 , m_atBreak(true)
1506 , m_textIterator(start, end, behavior)
1507 {
1508 initialize();
1509 }
1510
initialize()1511 void CharacterIterator::initialize()
1512 {
1513 while (!atEnd() && !m_textIterator.length())
1514 m_textIterator.advance();
1515 }
1516
range() const1517 PassRefPtrWillBeRawPtr<Range> CharacterIterator::range() const
1518 {
1519 RefPtrWillBeRawPtr<Range> r = m_textIterator.range();
1520 if (!m_textIterator.atEnd()) {
1521 if (m_textIterator.length() <= 1) {
1522 ASSERT(!m_runOffset);
1523 } else {
1524 Node* n = r->startContainer();
1525 ASSERT(n == r->endContainer());
1526 int offset = r->startOffset() + m_runOffset;
1527 r->setStart(n, offset, ASSERT_NO_EXCEPTION);
1528 r->setEnd(n, offset + 1, ASSERT_NO_EXCEPTION);
1529 }
1530 }
1531 return r.release();
1532 }
1533
advance(int count)1534 void CharacterIterator::advance(int count)
1535 {
1536 if (count <= 0) {
1537 ASSERT(!count);
1538 return;
1539 }
1540
1541 m_atBreak = false;
1542
1543 // easy if there is enough left in the current m_textIterator run
1544 int remaining = m_textIterator.length() - m_runOffset;
1545 if (count < remaining) {
1546 m_runOffset += count;
1547 m_offset += count;
1548 return;
1549 }
1550
1551 // exhaust the current m_textIterator run
1552 count -= remaining;
1553 m_offset += remaining;
1554
1555 // move to a subsequent m_textIterator run
1556 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
1557 int runLength = m_textIterator.length();
1558 if (!runLength) {
1559 m_atBreak = true;
1560 } else {
1561 // see whether this is m_textIterator to use
1562 if (count < runLength) {
1563 m_runOffset = count;
1564 m_offset += count;
1565 return;
1566 }
1567
1568 // exhaust this m_textIterator run
1569 count -= runLength;
1570 m_offset += runLength;
1571 }
1572 }
1573
1574 // ran to the end of the m_textIterator... no more runs left
1575 m_atBreak = true;
1576 m_runOffset = 0;
1577 }
1578
calculateCharacterSubrange(CharacterIterator & it,int offset,int length,Position & startPosition,Position & endPosition)1579 static void calculateCharacterSubrange(CharacterIterator& it, int offset, int length, Position& startPosition, Position& endPosition)
1580 {
1581 it.advance(offset);
1582 RefPtrWillBeRawPtr<Range> start = it.range();
1583 startPosition = start->startPosition();
1584
1585 if (length > 1)
1586 it.advance(length - 1);
1587 RefPtrWillBeRawPtr<Range> end = it.range();
1588 endPosition = end->endPosition();
1589 }
1590
BackwardsCharacterIterator(const Range * range,TextIteratorBehaviorFlags behavior)1591 BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehaviorFlags behavior)
1592 : m_offset(0)
1593 , m_runOffset(0)
1594 , m_atBreak(true)
1595 , m_textIterator(range, behavior)
1596 {
1597 while (!atEnd() && !m_textIterator.length())
1598 m_textIterator.advance();
1599 }
1600
range() const1601 PassRefPtrWillBeRawPtr<Range> BackwardsCharacterIterator::range() const
1602 {
1603 RefPtrWillBeRawPtr<Range> r = m_textIterator.range();
1604 if (!m_textIterator.atEnd()) {
1605 if (m_textIterator.length() <= 1) {
1606 ASSERT(!m_runOffset);
1607 } else {
1608 Node* n = r->startContainer();
1609 ASSERT(n == r->endContainer());
1610 int offset = r->endOffset() - m_runOffset;
1611 r->setStart(n, offset - 1, ASSERT_NO_EXCEPTION);
1612 r->setEnd(n, offset, ASSERT_NO_EXCEPTION);
1613 }
1614 }
1615 return r.release();
1616 }
1617
advance(int count)1618 void BackwardsCharacterIterator::advance(int count)
1619 {
1620 if (count <= 0) {
1621 ASSERT(!count);
1622 return;
1623 }
1624
1625 m_atBreak = false;
1626
1627 int remaining = m_textIterator.length() - m_runOffset;
1628 if (count < remaining) {
1629 m_runOffset += count;
1630 m_offset += count;
1631 return;
1632 }
1633
1634 count -= remaining;
1635 m_offset += remaining;
1636
1637 for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
1638 int runLength = m_textIterator.length();
1639 if (!runLength) {
1640 m_atBreak = true;
1641 } else {
1642 if (count < runLength) {
1643 m_runOffset = count;
1644 m_offset += count;
1645 return;
1646 }
1647
1648 count -= runLength;
1649 m_offset += runLength;
1650 }
1651 }
1652
1653 m_atBreak = true;
1654 m_runOffset = 0;
1655 }
1656
1657 // --------
1658
WordAwareIterator(const Range * range)1659 WordAwareIterator::WordAwareIterator(const Range* range)
1660 : m_didLookAhead(true) // So we consider the first chunk from the text iterator.
1661 , m_textIterator(range)
1662 {
1663 advance(); // Get in position over the first chunk of text.
1664 }
1665
~WordAwareIterator()1666 WordAwareIterator::~WordAwareIterator()
1667 {
1668 }
1669
1670 // FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries.
1671
advance()1672 void WordAwareIterator::advance()
1673 {
1674 m_buffer.clear();
1675
1676 // If last time we did a look-ahead, start with that looked-ahead chunk now
1677 if (!m_didLookAhead) {
1678 ASSERT(!m_textIterator.atEnd());
1679 m_textIterator.advance();
1680 }
1681 m_didLookAhead = false;
1682
1683 // Go to next non-empty chunk.
1684 while (!m_textIterator.atEnd() && !m_textIterator.length())
1685 m_textIterator.advance();
1686
1687 m_range = m_textIterator.range();
1688
1689 if (m_textIterator.atEnd())
1690 return;
1691
1692 while (1) {
1693 // If this chunk ends in whitespace we can just use it as our chunk.
1694 if (isSpaceOrNewline(m_textIterator.characterAt(m_textIterator.length() - 1)))
1695 return;
1696
1697 // If this is the first chunk that failed, save it in m_buffer before look ahead.
1698 if (m_buffer.isEmpty())
1699 m_textIterator.appendTextTo(m_buffer);
1700
1701 // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff
1702 m_textIterator.advance();
1703 if (m_textIterator.atEnd() || !m_textIterator.length() || isSpaceOrNewline(m_textIterator.characterAt(0))) {
1704 m_didLookAhead = true;
1705 return;
1706 }
1707
1708 // Start gobbling chunks until we get to a suitable stopping point
1709 m_textIterator.appendTextTo(m_buffer);
1710 m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), IGNORE_EXCEPTION);
1711 }
1712 }
1713
length() const1714 int WordAwareIterator::length() const
1715 {
1716 if (!m_buffer.isEmpty())
1717 return m_buffer.size();
1718 return m_textIterator.length();
1719 }
1720
substring(unsigned position,unsigned length) const1721 String WordAwareIterator::substring(unsigned position, unsigned length) const
1722 {
1723 if (!m_buffer.isEmpty())
1724 return String(m_buffer.data() + position, length);
1725 return m_textIterator.substring(position, length);
1726 }
1727
characterAt(unsigned index) const1728 UChar WordAwareIterator::characterAt(unsigned index) const
1729 {
1730 if (!m_buffer.isEmpty())
1731 return m_buffer[index];
1732 return m_textIterator.characterAt(index);
1733 }
1734
1735 // --------
1736
1737 static const size_t minimumSearchBufferSize = 8192;
1738
1739 #ifndef NDEBUG
1740 static bool searcherInUse;
1741 #endif
1742
createSearcher()1743 static UStringSearch* createSearcher()
1744 {
1745 // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
1746 // but it doesn't matter exactly what it is, since we don't perform any searches
1747 // without setting both the pattern and the text.
1748 UErrorCode status = U_ZERO_ERROR;
1749 String searchCollatorName = currentSearchLocaleID() + String("@collation=search");
1750 UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status);
1751 ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
1752 return searcher;
1753 }
1754
searcher()1755 static UStringSearch* searcher()
1756 {
1757 static UStringSearch* searcher = createSearcher();
1758 return searcher;
1759 }
1760
lockSearcher()1761 static inline void lockSearcher()
1762 {
1763 #ifndef NDEBUG
1764 ASSERT(!searcherInUse);
1765 searcherInUse = true;
1766 #endif
1767 }
1768
unlockSearcher()1769 static inline void unlockSearcher()
1770 {
1771 #ifndef NDEBUG
1772 ASSERT(searcherInUse);
1773 searcherInUse = false;
1774 #endif
1775 }
1776
SearchBuffer(const String & target,FindOptions options)1777 inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
1778 : m_options(options)
1779 , m_prefixLength(0)
1780 , m_numberOfCharactersJustAppended(0)
1781 , m_atBreak(true)
1782 , m_needsMoreContext(options & AtWordStarts)
1783 , m_targetRequiresKanaWorkaround(containsKanaLetters(target))
1784 {
1785 ASSERT(!target.isEmpty());
1786 target.appendTo(m_target);
1787
1788 // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
1789 // of doing it in a separate replacement pass here, but ICU doesn't offer a way
1790 // to add tailoring on top of the locale-specific tailoring as of this writing.
1791 foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size());
1792
1793 size_t targetLength = m_target.size();
1794 m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize));
1795 m_overlap = m_buffer.capacity() / 4;
1796
1797 if ((m_options & AtWordStarts) && targetLength) {
1798 UChar32 targetFirstCharacter;
1799 U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter);
1800 // Characters in the separator category never really occur at the beginning of a word,
1801 // so if the target begins with such a character, we just ignore the AtWordStart option.
1802 if (isSeparator(targetFirstCharacter)) {
1803 m_options &= ~AtWordStarts;
1804 m_needsMoreContext = false;
1805 }
1806 }
1807
1808 // Grab the single global searcher.
1809 // If we ever have a reason to do more than once search buffer at once, we'll have
1810 // to move to multiple searchers.
1811 lockSearcher();
1812
1813 UStringSearch* searcher = WebCore::searcher();
1814 UCollator* collator = usearch_getCollator(searcher);
1815
1816 UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
1817 if (ucol_getStrength(collator) != strength) {
1818 ucol_setStrength(collator, strength);
1819 usearch_reset(searcher);
1820 }
1821
1822 UErrorCode status = U_ZERO_ERROR;
1823 usearch_setPattern(searcher, m_target.data(), targetLength, &status);
1824 ASSERT(status == U_ZERO_ERROR);
1825
1826 // The kana workaround requires a normalized copy of the target string.
1827 if (m_targetRequiresKanaWorkaround)
1828 normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget);
1829 }
1830
~SearchBuffer()1831 inline SearchBuffer::~SearchBuffer()
1832 {
1833 // Leave the static object pointing to a valid string.
1834 UErrorCode status = U_ZERO_ERROR;
1835 usearch_setPattern(WebCore::searcher(), &newlineCharacter, 1, &status);
1836 ASSERT(status == U_ZERO_ERROR);
1837
1838 unlockSearcher();
1839 }
1840
1841 template<typename CharType>
append(const CharType * characters,size_t length)1842 inline void SearchBuffer::append(const CharType* characters, size_t length)
1843 {
1844 ASSERT(length);
1845
1846 if (m_atBreak) {
1847 m_buffer.shrink(0);
1848 m_prefixLength = 0;
1849 m_atBreak = false;
1850 } else if (m_buffer.size() == m_buffer.capacity()) {
1851 memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
1852 m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap);
1853 m_buffer.shrink(m_overlap);
1854 }
1855
1856 size_t oldLength = m_buffer.size();
1857 size_t usableLength = std::min(m_buffer.capacity() - oldLength, length);
1858 ASSERT(usableLength);
1859 m_buffer.resize(oldLength + usableLength);
1860 UChar* destination = m_buffer.data() + oldLength;
1861 StringImpl::copyChars(destination, characters, usableLength);
1862 foldQuoteMarksAndSoftHyphens(destination, usableLength);
1863 m_numberOfCharactersJustAppended = usableLength;
1864 }
1865
needsMoreContext() const1866 inline bool SearchBuffer::needsMoreContext() const
1867 {
1868 return m_needsMoreContext;
1869 }
1870
prependContext(const UChar * characters,size_t length)1871 inline void SearchBuffer::prependContext(const UChar* characters, size_t length)
1872 {
1873 ASSERT(m_needsMoreContext);
1874 ASSERT(m_prefixLength == m_buffer.size());
1875
1876 if (!length)
1877 return;
1878
1879 m_atBreak = false;
1880
1881 size_t wordBoundaryContextStart = length;
1882 if (wordBoundaryContextStart) {
1883 U16_BACK_1(characters, 0, wordBoundaryContextStart);
1884 wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart);
1885 }
1886
1887 size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart);
1888 m_buffer.prepend(characters + length - usableLength, usableLength);
1889 m_prefixLength += usableLength;
1890
1891 if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity())
1892 m_needsMoreContext = false;
1893 }
1894
atBreak() const1895 inline bool SearchBuffer::atBreak() const
1896 {
1897 return m_atBreak;
1898 }
1899
reachedBreak()1900 inline void SearchBuffer::reachedBreak()
1901 {
1902 m_atBreak = true;
1903 }
1904
isBadMatch(const UChar * match,size_t matchLength) const1905 inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const
1906 {
1907 // This function implements the kana workaround. If usearch treats
1908 // it as a match, but we do not want to, then it's a "bad match".
1909 if (!m_targetRequiresKanaWorkaround)
1910 return false;
1911
1912 // Normalize into a match buffer. We reuse a single buffer rather than
1913 // creating a new one each time.
1914 normalizeCharactersIntoNFCForm(match, matchLength, m_normalizedMatch);
1915
1916 return !checkOnlyKanaLettersInStrings(m_normalizedTarget.begin(), m_normalizedTarget.size(), m_normalizedMatch.begin(), m_normalizedMatch.size());
1917 }
1918
isWordStartMatch(size_t start,size_t length) const1919 inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const
1920 {
1921 ASSERT(m_options & AtWordStarts);
1922
1923 if (!start)
1924 return true;
1925
1926 int size = m_buffer.size();
1927 int offset = start;
1928 UChar32 firstCharacter;
1929 U16_GET(m_buffer.data(), 0, offset, size, firstCharacter);
1930
1931 if (m_options & TreatMedialCapitalAsWordStart) {
1932 UChar32 previousCharacter;
1933 U16_PREV(m_buffer.data(), 0, offset, previousCharacter);
1934
1935 if (isSeparator(firstCharacter)) {
1936 // The start of a separator run is a word start (".org" in "webkit.org").
1937 if (!isSeparator(previousCharacter))
1938 return true;
1939 } else if (isASCIIUpper(firstCharacter)) {
1940 // The start of an uppercase run is a word start ("Kit" in "WebKit").
1941 if (!isASCIIUpper(previousCharacter))
1942 return true;
1943 // The last character of an uppercase run followed by a non-separator, non-digit
1944 // is a word start ("Request" in "XMLHTTPRequest").
1945 offset = start;
1946 U16_FWD_1(m_buffer.data(), offset, size);
1947 UChar32 nextCharacter = 0;
1948 if (offset < size)
1949 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter);
1950 if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter))
1951 return true;
1952 } else if (isASCIIDigit(firstCharacter)) {
1953 // The start of a digit run is a word start ("2" in "WebKit2").
1954 if (!isASCIIDigit(previousCharacter))
1955 return true;
1956 } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) {
1957 // The start of a non-separator, non-uppercase, non-digit run is a word start,
1958 // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore").
1959 return true;
1960 }
1961 }
1962
1963 // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes
1964 // a word, so treat the position before any CJK character as a word start.
1965 if (Character::isCJKIdeographOrSymbol(firstCharacter))
1966 return true;
1967
1968 size_t wordBreakSearchStart = start + length;
1969 while (wordBreakSearchStart > start)
1970 wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */);
1971 return wordBreakSearchStart == start;
1972 }
1973
search(size_t & start)1974 inline size_t SearchBuffer::search(size_t& start)
1975 {
1976 size_t size = m_buffer.size();
1977 if (m_atBreak) {
1978 if (!size)
1979 return 0;
1980 } else {
1981 if (size != m_buffer.capacity())
1982 return 0;
1983 }
1984
1985 UStringSearch* searcher = WebCore::searcher();
1986
1987 UErrorCode status = U_ZERO_ERROR;
1988 usearch_setText(searcher, m_buffer.data(), size, &status);
1989 ASSERT(status == U_ZERO_ERROR);
1990
1991 usearch_setOffset(searcher, m_prefixLength, &status);
1992 ASSERT(status == U_ZERO_ERROR);
1993
1994 int matchStart = usearch_next(searcher, &status);
1995 ASSERT(status == U_ZERO_ERROR);
1996
1997 nextMatch:
1998 if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
1999 ASSERT(matchStart == USEARCH_DONE);
2000 return 0;
2001 }
2002
2003 // Matches that start in the overlap area are only tentative.
2004 // The same match may appear later, matching more characters,
2005 // possibly including a combining character that's not yet in the buffer.
2006 if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
2007 size_t overlap = m_overlap;
2008 if (m_options & AtWordStarts) {
2009 // Ensure that there is sufficient context before matchStart the next time around for
2010 // determining if it is at a word boundary.
2011 int wordBoundaryContextStart = matchStart;
2012 U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
2013 wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart);
2014 overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart));
2015 }
2016 memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar));
2017 m_prefixLength -= std::min(m_prefixLength, size - overlap);
2018 m_buffer.shrink(overlap);
2019 return 0;
2020 }
2021
2022 size_t matchedLength = usearch_getMatchedLength(searcher);
2023 ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size);
2024
2025 // If this match is "bad", move on to the next match.
2026 if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) {
2027 matchStart = usearch_next(searcher, &status);
2028 ASSERT(status == U_ZERO_ERROR);
2029 goto nextMatch;
2030 }
2031
2032 size_t newSize = size - (matchStart + 1);
2033 memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
2034 m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1);
2035 m_buffer.shrink(newSize);
2036
2037 start = size - matchStart;
2038 return matchedLength;
2039 }
2040
2041 // --------
2042
rangeLength(const Range * r,bool forSelectionPreservation)2043 int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation)
2044 {
2045 int length = 0;
2046 TextIteratorBehaviorFlags behaviorFlags = TextIteratorEmitsObjectReplacementCharacter;
2047 if (forSelectionPreservation)
2048 behaviorFlags |= TextIteratorEmitsCharactersBetweenAllVisiblePositions;
2049 for (TextIterator it(r, behaviorFlags); !it.atEnd(); it.advance())
2050 length += it.length();
2051
2052 return length;
2053 }
2054
subrange(Range * entireRange,int characterOffset,int characterCount)2055 PassRefPtrWillBeRawPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount)
2056 {
2057 CharacterIterator entireRangeIterator(entireRange);
2058 Position start;
2059 Position end;
2060 calculateCharacterSubrange(entireRangeIterator, characterOffset, characterCount, start, end);
2061 return Range::create(entireRange->ownerDocument(), start, end);
2062 }
2063
2064 // --------
2065
plainText(const Range * r,TextIteratorBehaviorFlags behavior)2066 String plainText(const Range* r, TextIteratorBehaviorFlags behavior)
2067 {
2068 // The initial buffer size can be critical for performance: https://bugs.webkit.org/show_bug.cgi?id=81192
2069 static const unsigned initialCapacity = 1 << 15;
2070
2071 unsigned bufferLength = 0;
2072 StringBuilder builder;
2073 builder.reserveCapacity(initialCapacity);
2074
2075 for (TextIterator it(r, behavior); !it.atEnd(); it.advance()) {
2076 it.appendTextToStringBuilder(builder);
2077 bufferLength += it.length();
2078 }
2079
2080 if (!bufferLength)
2081 return emptyString();
2082
2083 return builder.toString();
2084 }
2085
collapsedToBoundary(const Range * range,bool forward)2086 static PassRefPtrWillBeRawPtr<Range> collapsedToBoundary(const Range* range, bool forward)
2087 {
2088 RefPtrWillBeRawPtr<Range> result = range->cloneRange();
2089 result->collapse(!forward);
2090 return result.release();
2091 }
2092
2093 // Check if there's any unpaird surrogate code point.
2094 // Non-character code points are not checked.
isValidUTF16(const String & s)2095 static bool isValidUTF16(const String& s)
2096 {
2097 if (s.is8Bit())
2098 return true;
2099 const UChar* ustr = s.characters16();
2100 size_t length = s.length();
2101 size_t position = 0;
2102 while (position < length) {
2103 UChar32 character;
2104 U16_NEXT(ustr, position, length, character);
2105 if (U_IS_SURROGATE(character))
2106 return false;
2107 }
2108 return true;
2109 }
2110
findPlainTextInternal(CharacterIterator & it,const String & target,FindOptions options,size_t & matchStart)2111 static size_t findPlainTextInternal(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart)
2112 {
2113 matchStart = 0;
2114 size_t matchLength = 0;
2115
2116 if (!isValidUTF16(target))
2117 return 0;
2118
2119 SearchBuffer buffer(target, options);
2120
2121 if (buffer.needsMoreContext()) {
2122 RefPtrWillBeRawPtr<Range> startRange = it.range();
2123 RefPtrWillBeRawPtr<Range> beforeStartRange = startRange->ownerDocument().createRange();
2124 beforeStartRange->setEnd(startRange->startContainer(), startRange->startOffset(), IGNORE_EXCEPTION);
2125 for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) {
2126 Vector<UChar, 1024> characters;
2127 backwardsIterator.prependTextTo(characters);
2128 buffer.prependContext(characters.data(), characters.size());
2129 if (!buffer.needsMoreContext())
2130 break;
2131 }
2132 }
2133
2134 while (!it.atEnd()) {
2135 it.appendTextTo(buffer);
2136 it.advance(buffer.numberOfCharactersJustAppended());
2137 tryAgain:
2138 size_t matchStartOffset;
2139 if (size_t newMatchLength = buffer.search(matchStartOffset)) {
2140 // Note that we found a match, and where we found it.
2141 size_t lastCharacterInBufferOffset = it.characterOffset();
2142 ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
2143 matchStart = lastCharacterInBufferOffset - matchStartOffset;
2144 matchLength = newMatchLength;
2145 // If searching forward, stop on the first match.
2146 // If searching backward, don't stop, so we end up with the last match.
2147 if (!(options & Backwards))
2148 break;
2149 goto tryAgain;
2150 }
2151 if (it.atBreak() && !buffer.atBreak()) {
2152 buffer.reachedBreak();
2153 goto tryAgain;
2154 }
2155 }
2156
2157 return matchLength;
2158 }
2159
2160 static const TextIteratorBehaviorFlags iteratorFlagsForFindPlainText = TextIteratorEntersTextControls | TextIteratorEntersAuthorShadowRoots;
2161
findPlainText(const Range * range,const String & target,FindOptions options)2162 PassRefPtrWillBeRawPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options)
2163 {
2164 // CharacterIterator requires renderers to be up-to-date
2165 range->ownerDocument().updateLayout();
2166
2167 // First, find the text.
2168 size_t matchStart;
2169 size_t matchLength;
2170 {
2171 CharacterIterator findIterator(range, iteratorFlagsForFindPlainText);
2172 matchLength = findPlainTextInternal(findIterator, target, options, matchStart);
2173 if (!matchLength)
2174 return collapsedToBoundary(range, !(options & Backwards));
2175 }
2176
2177 // Then, find the document position of the start and the end of the text.
2178 CharacterIterator computeRangeIterator(range, iteratorFlagsForFindPlainText);
2179 Position resultStart;
2180 Position resultEnd;
2181 calculateCharacterSubrange(computeRangeIterator, matchStart, matchLength, resultStart, resultEnd);
2182 return Range::create(range->ownerDocument(), resultStart, resultEnd);
2183 }
2184
findPlainText(const Position & inputStart,const Position & inputEnd,const String & target,FindOptions options,Position & resultStart,Position & resultEnd)2185 void findPlainText(const Position& inputStart, const Position& inputEnd, const String& target, FindOptions options, Position& resultStart, Position& resultEnd)
2186 {
2187 resultStart.clear();
2188 resultEnd.clear();
2189 // CharacterIterator requires renderers to be up-to-date.
2190 if (!inputStart.inDocument())
2191 return;
2192 ASSERT(inputStart.document() == inputEnd.document());
2193 inputStart.document()->updateLayout();
2194
2195 // FIXME: Reduce the code duplication with above (but how?).
2196 size_t matchStart;
2197 size_t matchLength;
2198 {
2199 CharacterIterator findIterator(inputStart, inputEnd, iteratorFlagsForFindPlainText);
2200 matchLength = findPlainTextInternal(findIterator, target, options, matchStart);
2201 if (!matchLength) {
2202 const Position& collapseTo = options & Backwards ? inputStart : inputEnd;
2203 resultStart = collapseTo;
2204 resultEnd = collapseTo;
2205 return;
2206 }
2207 }
2208
2209 CharacterIterator computeRangeIterator(inputStart, inputEnd, iteratorFlagsForFindPlainText);
2210 calculateCharacterSubrange(computeRangeIterator, matchStart, matchLength, resultStart, resultEnd);
2211 }
2212
2213 }
2214