• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  *     * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *     * Redistributions in binary form must reproduce the above
11  * copyright notice, this list of conditions and the following disclaimer
12  * in the documentation and/or other materials provided with the
13  * distribution.
14  *     * Neither the name of Google Inc. nor the names of its
15  * contributors may be used to endorse or promote products derived from
16  * this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include "config.h"
32 #include "core/frame/SmartClip.h"
33 
34 #include "core/dom/ContainerNode.h"
35 #include "core/dom/Document.h"
36 #include "core/dom/NodeTraversal.h"
37 #include "core/frame/DOMWindow.h"
38 #include "core/frame/FrameView.h"
39 #include "core/html/HTMLFrameOwnerElement.h"
40 #include "core/page/Page.h"
41 #include "wtf/text/StringBuilder.h"
42 
43 namespace WebCore {
44 
applyScaleWithoutCollapsingToZero(const IntRect & rect,float scale)45 static IntRect applyScaleWithoutCollapsingToZero(const IntRect& rect, float scale)
46 {
47     IntRect result = rect;
48     result.scale(scale);
49     if (rect.width() > 0 && !result.width())
50         result.setWidth(1);
51     if (rect.height() > 0 && !result.height())
52         result.setHeight(1);
53     return result;
54 }
55 
nodeInsideFrame(Node * node)56 static Node* nodeInsideFrame(Node* node)
57 {
58     if (node->isFrameOwnerElement())
59         return toHTMLFrameOwnerElement(node)->contentDocument();
60     return 0;
61 }
62 
63 // FIXME: SmartClipData is eventually returned via
64 // SLookSmartClip.DataExtractionListener:
65 // http://img-developer.samsung.com/onlinedocs/sms/com/samsung/android/sdk/look/...
66 // however the original author of this change chose to use a string-serialization
67 // format (presumably to make IPC easy?).
68 // If we're going to use this as a Pickle format, we should at least have the
69 // read/write code in one place!
toString()70 String SmartClipData::toString()
71 {
72     if (!m_node)
73         return emptyString();
74 
75     const UChar fieldSeparator = 0xFFFE;
76     const UChar rowSeparator = 0xFFFF;
77 
78     StringBuilder result;
79     result.append(String::number(m_rect.x()));
80     result.append(fieldSeparator);
81     result.append(String::number(m_rect.y()));
82     result.append(fieldSeparator);
83     result.append(String::number(m_rect.width()));
84     result.append(fieldSeparator);
85     result.append(String::number(m_rect.height()));
86     result.append(fieldSeparator);
87     result.append(m_string);
88     result.append(rowSeparator);
89     return result.toString();
90 }
91 
SmartClip(PassRefPtr<Frame> frame)92 SmartClip::SmartClip(PassRefPtr<Frame> frame)
93     : m_frame(frame)
94 {
95 }
96 
dataForRect(const IntRect & cropRect)97 SmartClipData SmartClip::dataForRect(const IntRect& cropRect)
98 {
99     IntRect resizedCropRect = applyScaleWithoutCollapsingToZero(cropRect, 1 / pageScaleFactor());
100 
101     Node* bestNode = findBestOverlappingNode(m_frame->document(), resizedCropRect);
102     if (!bestNode)
103         return SmartClipData();
104 
105     if (Node* nodeFromFrame = nodeInsideFrame(bestNode)) {
106         // FIXME: This code only hit-tests a single iframe. It seems like we ought support nested frames.
107         if (Node* bestNodeInFrame = findBestOverlappingNode(nodeFromFrame, resizedCropRect))
108             bestNode = bestNodeInFrame;
109     }
110 
111     Vector<Node*> hitNodes;
112     collectOverlappingChildNodes(bestNode, resizedCropRect, hitNodes);
113 
114     if (hitNodes.isEmpty() || hitNodes.size() == bestNode->childNodeCount()) {
115         hitNodes.clear();
116         hitNodes.append(bestNode);
117     }
118 
119     // Unite won't work with the empty rect, so we initialize to the first rect.
120     IntRect unitedRects = hitNodes[0]->pixelSnappedBoundingBox();
121     StringBuilder collectedText;
122     for (size_t i = 0; i < hitNodes.size(); ++i) {
123         collectedText.append(extractTextFromNode(hitNodes[i]));
124         unitedRects.unite(hitNodes[i]->pixelSnappedBoundingBox());
125     }
126 
127     return SmartClipData(bestNode, convertRectToWindow(unitedRects), collectedText.toString());
128 }
129 
pageScaleFactor()130 float SmartClip::pageScaleFactor()
131 {
132     return m_frame->page()->pageScaleFactor();
133 }
134 
135 // This function is a bit of a mystery. If you understand what it does, please
136 // consider adding a more descriptive name.
minNodeContainsNodes(Node * minNode,Node * newNode)137 Node* SmartClip::minNodeContainsNodes(Node* minNode, Node* newNode)
138 {
139     if (!newNode)
140         return minNode;
141     if (!minNode)
142         return newNode;
143 
144     IntRect minNodeRect = minNode->pixelSnappedBoundingBox();
145     IntRect newNodeRect = newNode->pixelSnappedBoundingBox();
146 
147     Node* parentMinNode = minNode->parentNode();
148     Node* parentNewNode = newNode->parentNode();
149 
150     if (minNodeRect.contains(newNodeRect)) {
151         if (parentMinNode && parentNewNode && parentNewNode->parentNode() == parentMinNode)
152             return parentMinNode;
153         return minNode;
154     }
155 
156     if (newNodeRect.contains(minNodeRect)) {
157         if (parentMinNode && parentNewNode && parentMinNode->parentNode() == parentNewNode)
158             return parentNewNode;
159         return newNode;
160     }
161 
162     // This loop appears to find the nearest ancestor of minNode (in DOM order)
163     // that contains the newNodeRect. It's very unclear to me why that's an
164     // interesting node to find. Presumably this loop will often just return
165     // the documentElement.
166     Node* node = minNode;
167     while (node) {
168         if (node->renderer()) {
169             IntRect nodeRect = node->pixelSnappedBoundingBox();
170             if (nodeRect.contains(newNodeRect)) {
171                 return node;
172             }
173         }
174         node = node->parentNode();
175     }
176 
177     return 0;
178 }
179 
findBestOverlappingNode(Node * rootNode,const IntRect & cropRect)180 Node* SmartClip::findBestOverlappingNode(Node* rootNode, const IntRect& cropRect)
181 {
182     if (!rootNode)
183         return 0;
184 
185     IntRect resizedCropRect = rootNode->document().view()->windowToContents(cropRect);
186 
187     Node* node = rootNode;
188     Node* minNode = 0;
189 
190     while (node) {
191         IntRect nodeRect = node->pixelSnappedBoundingBox();
192 
193         if (node->isElementNode() && equalIgnoringCase(toElement(node)->fastGetAttribute(HTMLNames::aria_hiddenAttr), "true")) {
194             node = NodeTraversal::nextSkippingChildren(*node, rootNode);
195             continue;
196         }
197 
198         RenderObject* renderer = node->renderer();
199         if (renderer && !nodeRect.isEmpty()) {
200             if (renderer->isText()
201                 || renderer->isRenderImage()
202                 || node->isFrameOwnerElement()
203                 || (renderer->style()->hasBackgroundImage() && !shouldSkipBackgroundImage(node))) {
204                 if (resizedCropRect.intersects(nodeRect)) {
205                     minNode = minNodeContainsNodes(minNode, node);
206                 } else {
207                     node = NodeTraversal::nextSkippingChildren(*node, rootNode);
208                     continue;
209                 }
210             }
211         }
212         node = NodeTraversal::next(*node, rootNode);
213     }
214 
215     return minNode;
216 }
217 
218 // This function appears to heuristically guess whether to include a background
219 // image in the smart clip. It seems to want to include sprites created from
220 // CSS background images but to skip actual backgrounds.
shouldSkipBackgroundImage(Node * node)221 bool SmartClip::shouldSkipBackgroundImage(Node* node)
222 {
223     // Apparently we're only interested in background images on spans and divs.
224     if (!node->hasTagName(HTMLNames::spanTag) && !node->hasTagName(HTMLNames::divTag))
225         return true;
226 
227     // This check actually makes a bit of sense. If you're going to sprite an
228     // image out of a CSS background, you're probably going to specify a height
229     // or a width. On the other hand, if we've got a legit background image,
230     // it's very likely the height or the width will be set to auto.
231     RenderObject* renderer = node->renderer();
232     if (renderer && (renderer->style()->logicalHeight().isAuto() || renderer->style()->logicalWidth().isAuto()))
233         return true;
234 
235     return false;
236 }
237 
collectOverlappingChildNodes(Node * parentNode,const IntRect & cropRect,Vector<Node * > & hitNodes)238 void SmartClip::collectOverlappingChildNodes(Node* parentNode, const IntRect& cropRect, Vector<Node*>& hitNodes)
239 {
240     if (!parentNode)
241         return;
242     IntRect resizedCropRect = parentNode->document().view()->windowToContents(cropRect);
243     for (Node* child = parentNode->firstChild(); child; child = child->nextSibling()) {
244         IntRect childRect = child->pixelSnappedBoundingBox();
245         if (resizedCropRect.intersects(childRect))
246             hitNodes.append(child);
247     }
248 }
249 
convertRectToWindow(const IntRect & nodeRect)250 IntRect SmartClip::convertRectToWindow(const IntRect& nodeRect)
251 {
252     IntRect result = m_frame->document()->view()->contentsToWindow(nodeRect);
253     result.scale(pageScaleFactor());
254     return result;
255 }
256 
extractTextFromNode(Node * node)257 String SmartClip::extractTextFromNode(Node* node)
258 {
259     // Science has proven that no text nodes are ever positioned at y == -99999.
260     int prevYPos = -99999;
261 
262     StringBuilder result;
263     for (Node* currentNode = node; currentNode; currentNode = NodeTraversal::next(*currentNode, node)) {
264         RenderStyle* style = currentNode->computedStyle();
265         if (style && style->userSelect() == SELECT_NONE)
266             continue;
267 
268         if (Node* nodeFromFrame = nodeInsideFrame(currentNode))
269             result.append(extractTextFromNode(nodeFromFrame));
270 
271         IntRect nodeRect = currentNode->pixelSnappedBoundingBox();
272         if (currentNode->renderer() && !nodeRect.isEmpty()) {
273             if (currentNode->isTextNode()) {
274                 String nodeValue = currentNode->nodeValue();
275 
276                 // It's unclear why we blacklist solitary "\n" node values.
277                 // Maybe we're trying to ignore <br> tags somehow?
278                 if (nodeValue == "\n")
279                     nodeValue = "";
280 
281                 if (nodeRect.y() != prevYPos) {
282                     prevYPos = nodeRect.y();
283                     result.append('\n');
284                 }
285 
286                 result.append(nodeValue);
287             }
288         }
289     }
290 
291     return result.toString();
292 }
293 
294 } // namespace WebCore
295