1 /*
2 * Copyright (C) 2013 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "config.h"
32 #include "core/frame/SmartClip.h"
33
34 #include "core/dom/ContainerNode.h"
35 #include "core/dom/Document.h"
36 #include "core/dom/NodeTraversal.h"
37 #include "core/frame/DOMWindow.h"
38 #include "core/frame/FrameView.h"
39 #include "core/html/HTMLFrameOwnerElement.h"
40 #include "core/page/Page.h"
41 #include "wtf/text/StringBuilder.h"
42
43 namespace WebCore {
44
applyScaleWithoutCollapsingToZero(const IntRect & rect,float scale)45 static IntRect applyScaleWithoutCollapsingToZero(const IntRect& rect, float scale)
46 {
47 IntRect result = rect;
48 result.scale(scale);
49 if (rect.width() > 0 && !result.width())
50 result.setWidth(1);
51 if (rect.height() > 0 && !result.height())
52 result.setHeight(1);
53 return result;
54 }
55
nodeInsideFrame(Node * node)56 static Node* nodeInsideFrame(Node* node)
57 {
58 if (node->isFrameOwnerElement())
59 return toHTMLFrameOwnerElement(node)->contentDocument();
60 return 0;
61 }
62
63 // FIXME: SmartClipData is eventually returned via
64 // SLookSmartClip.DataExtractionListener:
65 // http://img-developer.samsung.com/onlinedocs/sms/com/samsung/android/sdk/look/...
66 // however the original author of this change chose to use a string-serialization
67 // format (presumably to make IPC easy?).
68 // If we're going to use this as a Pickle format, we should at least have the
69 // read/write code in one place!
toString()70 String SmartClipData::toString()
71 {
72 if (!m_node)
73 return emptyString();
74
75 const UChar fieldSeparator = 0xFFFE;
76 const UChar rowSeparator = 0xFFFF;
77
78 StringBuilder result;
79 result.append(String::number(m_rect.x()));
80 result.append(fieldSeparator);
81 result.append(String::number(m_rect.y()));
82 result.append(fieldSeparator);
83 result.append(String::number(m_rect.width()));
84 result.append(fieldSeparator);
85 result.append(String::number(m_rect.height()));
86 result.append(fieldSeparator);
87 result.append(m_string);
88 result.append(rowSeparator);
89 return result.toString();
90 }
91
SmartClip(PassRefPtr<Frame> frame)92 SmartClip::SmartClip(PassRefPtr<Frame> frame)
93 : m_frame(frame)
94 {
95 }
96
dataForRect(const IntRect & cropRect)97 SmartClipData SmartClip::dataForRect(const IntRect& cropRect)
98 {
99 IntRect resizedCropRect = applyScaleWithoutCollapsingToZero(cropRect, 1 / pageScaleFactor());
100
101 Node* bestNode = findBestOverlappingNode(m_frame->document(), resizedCropRect);
102 if (!bestNode)
103 return SmartClipData();
104
105 if (Node* nodeFromFrame = nodeInsideFrame(bestNode)) {
106 // FIXME: This code only hit-tests a single iframe. It seems like we ought support nested frames.
107 if (Node* bestNodeInFrame = findBestOverlappingNode(nodeFromFrame, resizedCropRect))
108 bestNode = bestNodeInFrame;
109 }
110
111 Vector<Node*> hitNodes;
112 collectOverlappingChildNodes(bestNode, resizedCropRect, hitNodes);
113
114 if (hitNodes.isEmpty() || hitNodes.size() == bestNode->childNodeCount()) {
115 hitNodes.clear();
116 hitNodes.append(bestNode);
117 }
118
119 // Unite won't work with the empty rect, so we initialize to the first rect.
120 IntRect unitedRects = hitNodes[0]->pixelSnappedBoundingBox();
121 StringBuilder collectedText;
122 for (size_t i = 0; i < hitNodes.size(); ++i) {
123 collectedText.append(extractTextFromNode(hitNodes[i]));
124 unitedRects.unite(hitNodes[i]->pixelSnappedBoundingBox());
125 }
126
127 return SmartClipData(bestNode, convertRectToWindow(unitedRects), collectedText.toString());
128 }
129
pageScaleFactor()130 float SmartClip::pageScaleFactor()
131 {
132 return m_frame->page()->pageScaleFactor();
133 }
134
135 // This function is a bit of a mystery. If you understand what it does, please
136 // consider adding a more descriptive name.
minNodeContainsNodes(Node * minNode,Node * newNode)137 Node* SmartClip::minNodeContainsNodes(Node* minNode, Node* newNode)
138 {
139 if (!newNode)
140 return minNode;
141 if (!minNode)
142 return newNode;
143
144 IntRect minNodeRect = minNode->pixelSnappedBoundingBox();
145 IntRect newNodeRect = newNode->pixelSnappedBoundingBox();
146
147 Node* parentMinNode = minNode->parentNode();
148 Node* parentNewNode = newNode->parentNode();
149
150 if (minNodeRect.contains(newNodeRect)) {
151 if (parentMinNode && parentNewNode && parentNewNode->parentNode() == parentMinNode)
152 return parentMinNode;
153 return minNode;
154 }
155
156 if (newNodeRect.contains(minNodeRect)) {
157 if (parentMinNode && parentNewNode && parentMinNode->parentNode() == parentNewNode)
158 return parentNewNode;
159 return newNode;
160 }
161
162 // This loop appears to find the nearest ancestor of minNode (in DOM order)
163 // that contains the newNodeRect. It's very unclear to me why that's an
164 // interesting node to find. Presumably this loop will often just return
165 // the documentElement.
166 Node* node = minNode;
167 while (node) {
168 if (node->renderer()) {
169 IntRect nodeRect = node->pixelSnappedBoundingBox();
170 if (nodeRect.contains(newNodeRect)) {
171 return node;
172 }
173 }
174 node = node->parentNode();
175 }
176
177 return 0;
178 }
179
findBestOverlappingNode(Node * rootNode,const IntRect & cropRect)180 Node* SmartClip::findBestOverlappingNode(Node* rootNode, const IntRect& cropRect)
181 {
182 if (!rootNode)
183 return 0;
184
185 IntRect resizedCropRect = rootNode->document().view()->windowToContents(cropRect);
186
187 Node* node = rootNode;
188 Node* minNode = 0;
189
190 while (node) {
191 IntRect nodeRect = node->pixelSnappedBoundingBox();
192
193 if (node->isElementNode() && equalIgnoringCase(toElement(node)->fastGetAttribute(HTMLNames::aria_hiddenAttr), "true")) {
194 node = NodeTraversal::nextSkippingChildren(*node, rootNode);
195 continue;
196 }
197
198 RenderObject* renderer = node->renderer();
199 if (renderer && !nodeRect.isEmpty()) {
200 if (renderer->isText()
201 || renderer->isRenderImage()
202 || node->isFrameOwnerElement()
203 || (renderer->style()->hasBackgroundImage() && !shouldSkipBackgroundImage(node))) {
204 if (resizedCropRect.intersects(nodeRect)) {
205 minNode = minNodeContainsNodes(minNode, node);
206 } else {
207 node = NodeTraversal::nextSkippingChildren(*node, rootNode);
208 continue;
209 }
210 }
211 }
212 node = NodeTraversal::next(*node, rootNode);
213 }
214
215 return minNode;
216 }
217
218 // This function appears to heuristically guess whether to include a background
219 // image in the smart clip. It seems to want to include sprites created from
220 // CSS background images but to skip actual backgrounds.
shouldSkipBackgroundImage(Node * node)221 bool SmartClip::shouldSkipBackgroundImage(Node* node)
222 {
223 // Apparently we're only interested in background images on spans and divs.
224 if (!node->hasTagName(HTMLNames::spanTag) && !node->hasTagName(HTMLNames::divTag))
225 return true;
226
227 // This check actually makes a bit of sense. If you're going to sprite an
228 // image out of a CSS background, you're probably going to specify a height
229 // or a width. On the other hand, if we've got a legit background image,
230 // it's very likely the height or the width will be set to auto.
231 RenderObject* renderer = node->renderer();
232 if (renderer && (renderer->style()->logicalHeight().isAuto() || renderer->style()->logicalWidth().isAuto()))
233 return true;
234
235 return false;
236 }
237
collectOverlappingChildNodes(Node * parentNode,const IntRect & cropRect,Vector<Node * > & hitNodes)238 void SmartClip::collectOverlappingChildNodes(Node* parentNode, const IntRect& cropRect, Vector<Node*>& hitNodes)
239 {
240 if (!parentNode)
241 return;
242 IntRect resizedCropRect = parentNode->document().view()->windowToContents(cropRect);
243 for (Node* child = parentNode->firstChild(); child; child = child->nextSibling()) {
244 IntRect childRect = child->pixelSnappedBoundingBox();
245 if (resizedCropRect.intersects(childRect))
246 hitNodes.append(child);
247 }
248 }
249
convertRectToWindow(const IntRect & nodeRect)250 IntRect SmartClip::convertRectToWindow(const IntRect& nodeRect)
251 {
252 IntRect result = m_frame->document()->view()->contentsToWindow(nodeRect);
253 result.scale(pageScaleFactor());
254 return result;
255 }
256
extractTextFromNode(Node * node)257 String SmartClip::extractTextFromNode(Node* node)
258 {
259 // Science has proven that no text nodes are ever positioned at y == -99999.
260 int prevYPos = -99999;
261
262 StringBuilder result;
263 for (Node* currentNode = node; currentNode; currentNode = NodeTraversal::next(*currentNode, node)) {
264 RenderStyle* style = currentNode->computedStyle();
265 if (style && style->userSelect() == SELECT_NONE)
266 continue;
267
268 if (Node* nodeFromFrame = nodeInsideFrame(currentNode))
269 result.append(extractTextFromNode(nodeFromFrame));
270
271 IntRect nodeRect = currentNode->pixelSnappedBoundingBox();
272 if (currentNode->renderer() && !nodeRect.isEmpty()) {
273 if (currentNode->isTextNode()) {
274 String nodeValue = currentNode->nodeValue();
275
276 // It's unclear why we blacklist solitary "\n" node values.
277 // Maybe we're trying to ignore <br> tags somehow?
278 if (nodeValue == "\n")
279 nodeValue = "";
280
281 if (nodeRect.y() != prevYPos) {
282 prevYPos = nodeRect.y();
283 result.append('\n');
284 }
285
286 result.append(nodeValue);
287 }
288 }
289 }
290
291 return result.toString();
292 }
293
294 } // namespace WebCore
295