1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/autofill/content/renderer/form_autofill_util.h"
6
7 #include <map>
8
9 #include "base/command_line.h"
10 #include "base/logging.h"
11 #include "base/memory/scoped_vector.h"
12 #include "base/metrics/field_trial.h"
13 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "components/autofill/core/common/autofill_data_validation.h"
16 #include "components/autofill/core/common/autofill_switches.h"
17 #include "components/autofill/core/common/form_data.h"
18 #include "components/autofill/core/common/form_field_data.h"
19 #include "components/autofill/core/common/web_element_descriptor.h"
20 #include "third_party/WebKit/public/platform/WebString.h"
21 #include "third_party/WebKit/public/platform/WebVector.h"
22 #include "third_party/WebKit/public/web/WebDocument.h"
23 #include "third_party/WebKit/public/web/WebElement.h"
24 #include "third_party/WebKit/public/web/WebElementCollection.h"
25 #include "third_party/WebKit/public/web/WebExceptionCode.h"
26 #include "third_party/WebKit/public/web/WebFormControlElement.h"
27 #include "third_party/WebKit/public/web/WebFormElement.h"
28 #include "third_party/WebKit/public/web/WebInputElement.h"
29 #include "third_party/WebKit/public/web/WebLabelElement.h"
30 #include "third_party/WebKit/public/web/WebLocalFrame.h"
31 #include "third_party/WebKit/public/web/WebNode.h"
32 #include "third_party/WebKit/public/web/WebNodeList.h"
33 #include "third_party/WebKit/public/web/WebOptionElement.h"
34 #include "third_party/WebKit/public/web/WebSelectElement.h"
35 #include "third_party/WebKit/public/web/WebTextAreaElement.h"
36
37 using blink::WebDocument;
38 using blink::WebElement;
39 using blink::WebElementCollection;
40 using blink::WebExceptionCode;
41 using blink::WebFormControlElement;
42 using blink::WebFormElement;
43 using blink::WebFrame;
44 using blink::WebInputElement;
45 using blink::WebLabelElement;
46 using blink::WebNode;
47 using blink::WebNodeList;
48 using blink::WebOptionElement;
49 using blink::WebSelectElement;
50 using blink::WebTextAreaElement;
51 using blink::WebString;
52 using blink::WebVector;
53
54 namespace autofill {
55 namespace {
56
IsOptionElement(const WebElement & element)57 bool IsOptionElement(const WebElement& element) {
58 CR_DEFINE_STATIC_LOCAL(WebString, kOption, ("option"));
59 return element.hasHTMLTagName(kOption);
60 }
61
IsScriptElement(const WebElement & element)62 bool IsScriptElement(const WebElement& element) {
63 CR_DEFINE_STATIC_LOCAL(WebString, kScript, ("script"));
64 return element.hasHTMLTagName(kScript);
65 }
66
IsNoScriptElement(const WebElement & element)67 bool IsNoScriptElement(const WebElement& element) {
68 CR_DEFINE_STATIC_LOCAL(WebString, kNoScript, ("noscript"));
69 return element.hasHTMLTagName(kNoScript);
70 }
71
HasTagName(const WebNode & node,const blink::WebString & tag)72 bool HasTagName(const WebNode& node, const blink::WebString& tag) {
73 return node.isElementNode() && node.toConst<WebElement>().hasHTMLTagName(tag);
74 }
75
IsAutofillableElement(const WebFormControlElement & element)76 bool IsAutofillableElement(const WebFormControlElement& element) {
77 const WebInputElement* input_element = toWebInputElement(&element);
78 return IsAutofillableInputElement(input_element) ||
79 IsSelectElement(element) ||
80 IsTextAreaElement(element);
81 }
82
83 // Check whether the given field satisfies the REQUIRE_AUTOCOMPLETE requirement.
SatisfiesRequireAutocomplete(const WebInputElement & input_element)84 bool SatisfiesRequireAutocomplete(const WebInputElement& input_element) {
85 return input_element.autoComplete();
86 }
87
88 // Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed
89 // to a single space. If |force_whitespace| is true, then the resulting string
90 // is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the
91 // result includes a space only if |prefix| has trailing whitespace or |suffix|
92 // has leading whitespace.
93 // A few examples:
94 // * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar"
95 // * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar"
96 // * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar"
97 // * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar"
98 // * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar"
99 // * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar"
100 // * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar "
101 // * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar "
CombineAndCollapseWhitespace(const base::string16 & prefix,const base::string16 & suffix,bool force_whitespace)102 const base::string16 CombineAndCollapseWhitespace(
103 const base::string16& prefix,
104 const base::string16& suffix,
105 bool force_whitespace) {
106 base::string16 prefix_trimmed;
107 base::TrimPositions prefix_trailing_whitespace =
108 base::TrimWhitespace(prefix, base::TRIM_TRAILING, &prefix_trimmed);
109
110 // Recursively compute the children's text.
111 base::string16 suffix_trimmed;
112 base::TrimPositions suffix_leading_whitespace =
113 base::TrimWhitespace(suffix, base::TRIM_LEADING, &suffix_trimmed);
114
115 if (prefix_trailing_whitespace || suffix_leading_whitespace ||
116 force_whitespace) {
117 return prefix_trimmed + base::ASCIIToUTF16(" ") + suffix_trimmed;
118 } else {
119 return prefix_trimmed + suffix_trimmed;
120 }
121 }
122
123 // This is a helper function for the FindChildText() function (see below).
124 // Search depth is limited with the |depth| parameter.
FindChildTextInner(const WebNode & node,int depth)125 base::string16 FindChildTextInner(const WebNode& node, int depth) {
126 if (depth <= 0 || node.isNull())
127 return base::string16();
128
129 // Skip over comments.
130 if (node.nodeType() == WebNode::CommentNode)
131 return FindChildTextInner(node.nextSibling(), depth - 1);
132
133 if (node.nodeType() != WebNode::ElementNode &&
134 node.nodeType() != WebNode::TextNode)
135 return base::string16();
136
137 // Ignore elements known not to contain inferable labels.
138 if (node.isElementNode()) {
139 const WebElement element = node.toConst<WebElement>();
140 if (IsOptionElement(element) ||
141 IsScriptElement(element) ||
142 IsNoScriptElement(element) ||
143 (element.isFormControlElement() &&
144 IsAutofillableElement(element.toConst<WebFormControlElement>()))) {
145 return base::string16();
146 }
147 }
148
149 // Extract the text exactly at this node.
150 base::string16 node_text = node.nodeValue();
151
152 // Recursively compute the children's text.
153 // Preserve inter-element whitespace separation.
154 base::string16 child_text = FindChildTextInner(node.firstChild(), depth - 1);
155 bool add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
156 node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space);
157
158 // Recursively compute the siblings' text.
159 // Again, preserve inter-element whitespace separation.
160 base::string16 sibling_text =
161 FindChildTextInner(node.nextSibling(), depth - 1);
162 add_space = node.nodeType() == WebNode::TextNode && node_text.empty();
163 node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space);
164
165 return node_text;
166 }
167
168 // Returns the aggregated values of the descendants of |element| that are
169 // non-empty text nodes. This is a faster alternative to |innerText()| for
170 // performance critical operations. It does a full depth-first search so can be
171 // used when the structure is not directly known. However, unlike with
172 // |innerText()|, the search depth and breadth are limited to a fixed threshold.
173 // Whitespace is trimmed from text accumulated at descendant nodes.
FindChildText(const WebNode & node)174 base::string16 FindChildText(const WebNode& node) {
175 if (node.isTextNode())
176 return node.nodeValue();
177
178 WebNode child = node.firstChild();
179
180 const int kChildSearchDepth = 10;
181 base::string16 node_text = FindChildTextInner(child, kChildSearchDepth);
182 base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text);
183 return node_text;
184 }
185
186 // Helper for |InferLabelForElement()| that infers a label, if possible, from
187 // a previous sibling of |element|,
188 // e.g. Some Text <input ...>
189 // or Some <span>Text</span> <input ...>
190 // or <p>Some Text</p><input ...>
191 // or <label>Some Text</label> <input ...>
192 // or Some Text <img><input ...>
193 // or <b>Some Text</b><br/> <input ...>.
InferLabelFromPrevious(const WebFormControlElement & element)194 base::string16 InferLabelFromPrevious(const WebFormControlElement& element) {
195 base::string16 inferred_label;
196 WebNode previous = element;
197 while (true) {
198 previous = previous.previousSibling();
199 if (previous.isNull())
200 break;
201
202 // Skip over comments.
203 WebNode::NodeType node_type = previous.nodeType();
204 if (node_type == WebNode::CommentNode)
205 continue;
206
207 // Otherwise, only consider normal HTML elements and their contents.
208 if (node_type != WebNode::TextNode &&
209 node_type != WebNode::ElementNode)
210 break;
211
212 // A label might be split across multiple "lightweight" nodes.
213 // Coalesce any text contained in multiple consecutive
214 // (a) plain text nodes or
215 // (b) inline HTML elements that are essentially equivalent to text nodes.
216 CR_DEFINE_STATIC_LOCAL(WebString, kBold, ("b"));
217 CR_DEFINE_STATIC_LOCAL(WebString, kStrong, ("strong"));
218 CR_DEFINE_STATIC_LOCAL(WebString, kSpan, ("span"));
219 CR_DEFINE_STATIC_LOCAL(WebString, kFont, ("font"));
220 if (previous.isTextNode() ||
221 HasTagName(previous, kBold) || HasTagName(previous, kStrong) ||
222 HasTagName(previous, kSpan) || HasTagName(previous, kFont)) {
223 base::string16 value = FindChildText(previous);
224 // A text node's value will be empty if it is for a line break.
225 bool add_space = previous.isTextNode() && value.empty();
226 inferred_label =
227 CombineAndCollapseWhitespace(value, inferred_label, add_space);
228 continue;
229 }
230
231 // If we have identified a partial label and have reached a non-lightweight
232 // element, consider the label to be complete.
233 base::string16 trimmed_label;
234 base::TrimWhitespace(inferred_label, base::TRIM_ALL, &trimmed_label);
235 if (!trimmed_label.empty())
236 break;
237
238 // <img> and <br> tags often appear between the input element and its
239 // label text, so skip over them.
240 CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("img"));
241 CR_DEFINE_STATIC_LOCAL(WebString, kBreak, ("br"));
242 if (HasTagName(previous, kImage) || HasTagName(previous, kBreak))
243 continue;
244
245 // We only expect <p> and <label> tags to contain the full label text.
246 CR_DEFINE_STATIC_LOCAL(WebString, kPage, ("p"));
247 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
248 if (HasTagName(previous, kPage) || HasTagName(previous, kLabel))
249 inferred_label = FindChildText(previous);
250
251 break;
252 }
253
254 base::TrimWhitespace(inferred_label, base::TRIM_ALL, &inferred_label);
255 return inferred_label;
256 }
257
258 // Helper for |InferLabelForElement()| that infers a label, if possible, from
259 // enclosing list item,
260 // e.g. <li>Some Text<input ...><input ...><input ...></tr>
InferLabelFromListItem(const WebFormControlElement & element)261 base::string16 InferLabelFromListItem(const WebFormControlElement& element) {
262 WebNode parent = element.parentNode();
263 CR_DEFINE_STATIC_LOCAL(WebString, kListItem, ("li"));
264 while (!parent.isNull() && parent.isElementNode() &&
265 !parent.to<WebElement>().hasHTMLTagName(kListItem)) {
266 parent = parent.parentNode();
267 }
268
269 if (!parent.isNull() && HasTagName(parent, kListItem))
270 return FindChildText(parent);
271
272 return base::string16();
273 }
274
275 // Helper for |InferLabelForElement()| that infers a label, if possible, from
276 // surrounding table structure,
277 // e.g. <tr><td>Some Text</td><td><input ...></td></tr>
278 // or <tr><th>Some Text</th><td><input ...></td></tr>
279 // or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr>
280 // or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr>
InferLabelFromTableColumn(const WebFormControlElement & element)281 base::string16 InferLabelFromTableColumn(const WebFormControlElement& element) {
282 CR_DEFINE_STATIC_LOCAL(WebString, kTableCell, ("td"));
283 WebNode parent = element.parentNode();
284 while (!parent.isNull() && parent.isElementNode() &&
285 !parent.to<WebElement>().hasHTMLTagName(kTableCell)) {
286 parent = parent.parentNode();
287 }
288
289 if (parent.isNull())
290 return base::string16();
291
292 // Check all previous siblings, skipping non-element nodes, until we find a
293 // non-empty text block.
294 base::string16 inferred_label;
295 WebNode previous = parent.previousSibling();
296 CR_DEFINE_STATIC_LOCAL(WebString, kTableHeader, ("th"));
297 while (inferred_label.empty() && !previous.isNull()) {
298 if (HasTagName(previous, kTableCell) || HasTagName(previous, kTableHeader))
299 inferred_label = FindChildText(previous);
300
301 previous = previous.previousSibling();
302 }
303
304 return inferred_label;
305 }
306
307 // Helper for |InferLabelForElement()| that infers a label, if possible, from
308 // surrounding table structure,
309 // e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr>
InferLabelFromTableRow(const WebFormControlElement & element)310 base::string16 InferLabelFromTableRow(const WebFormControlElement& element) {
311 CR_DEFINE_STATIC_LOCAL(WebString, kTableRow, ("tr"));
312 WebNode parent = element.parentNode();
313 while (!parent.isNull() && parent.isElementNode() &&
314 !parent.to<WebElement>().hasHTMLTagName(kTableRow)) {
315 parent = parent.parentNode();
316 }
317
318 if (parent.isNull())
319 return base::string16();
320
321 // Check all previous siblings, skipping non-element nodes, until we find a
322 // non-empty text block.
323 base::string16 inferred_label;
324 WebNode previous = parent.previousSibling();
325 while (inferred_label.empty() && !previous.isNull()) {
326 if (HasTagName(previous, kTableRow))
327 inferred_label = FindChildText(previous);
328
329 previous = previous.previousSibling();
330 }
331
332 return inferred_label;
333 }
334
335 // Helper for |InferLabelForElement()| that infers a label, if possible, from
336 // a surrounding div table,
337 // e.g. <div>Some Text<span><input ...></span></div>
338 // e.g. <div>Some Text</div><div><input ...></div>
InferLabelFromDivTable(const WebFormControlElement & element)339 base::string16 InferLabelFromDivTable(const WebFormControlElement& element) {
340 WebNode node = element.parentNode();
341 bool looking_for_parent = true;
342
343 // Search the sibling and parent <div>s until we find a candidate label.
344 base::string16 inferred_label;
345 CR_DEFINE_STATIC_LOCAL(WebString, kDiv, ("div"));
346 CR_DEFINE_STATIC_LOCAL(WebString, kTable, ("table"));
347 CR_DEFINE_STATIC_LOCAL(WebString, kFieldSet, ("fieldset"));
348 while (inferred_label.empty() && !node.isNull()) {
349 if (HasTagName(node, kDiv)) {
350 looking_for_parent = false;
351 inferred_label = FindChildText(node);
352 } else if (looking_for_parent &&
353 (HasTagName(node, kTable) || HasTagName(node, kFieldSet))) {
354 // If the element is in a table or fieldset, its label most likely is too.
355 break;
356 }
357
358 if (node.previousSibling().isNull()) {
359 // If there are no more siblings, continue walking up the tree.
360 looking_for_parent = true;
361 }
362
363 if (looking_for_parent)
364 node = node.parentNode();
365 else
366 node = node.previousSibling();
367 }
368
369 return inferred_label;
370 }
371
372 // Helper for |InferLabelForElement()| that infers a label, if possible, from
373 // a surrounding definition list,
374 // e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl>
375 // e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl>
InferLabelFromDefinitionList(const WebFormControlElement & element)376 base::string16 InferLabelFromDefinitionList(
377 const WebFormControlElement& element) {
378 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionData, ("dd"));
379 WebNode parent = element.parentNode();
380 while (!parent.isNull() && parent.isElementNode() &&
381 !parent.to<WebElement>().hasHTMLTagName(kDefinitionData))
382 parent = parent.parentNode();
383
384 if (parent.isNull() || !HasTagName(parent, kDefinitionData))
385 return base::string16();
386
387 // Skip by any intervening text nodes.
388 WebNode previous = parent.previousSibling();
389 while (!previous.isNull() && previous.isTextNode())
390 previous = previous.previousSibling();
391
392 CR_DEFINE_STATIC_LOCAL(WebString, kDefinitionTag, ("dt"));
393 if (previous.isNull() || !HasTagName(previous, kDefinitionTag))
394 return base::string16();
395
396 return FindChildText(previous);
397 }
398
399 // Infers corresponding label for |element| from surrounding context in the DOM,
400 // e.g. the contents of the preceding <p> tag or text element.
InferLabelForElement(const WebFormControlElement & element)401 base::string16 InferLabelForElement(const WebFormControlElement& element) {
402 base::string16 inferred_label = InferLabelFromPrevious(element);
403 if (!inferred_label.empty())
404 return inferred_label;
405
406 // If we didn't find a label, check for list item case.
407 inferred_label = InferLabelFromListItem(element);
408 if (!inferred_label.empty())
409 return inferred_label;
410
411 // If we didn't find a label, check for table cell case.
412 inferred_label = InferLabelFromTableColumn(element);
413 if (!inferred_label.empty())
414 return inferred_label;
415
416 // If we didn't find a label, check for table row case.
417 inferred_label = InferLabelFromTableRow(element);
418 if (!inferred_label.empty())
419 return inferred_label;
420
421 // If we didn't find a label, check for definition list case.
422 inferred_label = InferLabelFromDefinitionList(element);
423 if (!inferred_label.empty())
424 return inferred_label;
425
426 // If we didn't find a label, check for div table case.
427 return InferLabelFromDivTable(element);
428 }
429
430 // Fills |option_strings| with the values of the <option> elements present in
431 // |select_element|.
GetOptionStringsFromElement(const WebSelectElement & select_element,std::vector<base::string16> * option_values,std::vector<base::string16> * option_contents)432 void GetOptionStringsFromElement(const WebSelectElement& select_element,
433 std::vector<base::string16>* option_values,
434 std::vector<base::string16>* option_contents) {
435 DCHECK(!select_element.isNull());
436
437 option_values->clear();
438 option_contents->clear();
439 WebVector<WebElement> list_items = select_element.listItems();
440
441 // Constrain the maximum list length to prevent a malicious site from DOS'ing
442 // the browser, without entirely breaking autocomplete for some extreme
443 // legitimate sites: http://crbug.com/49332 and http://crbug.com/363094
444 if (list_items.size() > kMaxListSize)
445 return;
446
447 option_values->reserve(list_items.size());
448 option_contents->reserve(list_items.size());
449 for (size_t i = 0; i < list_items.size(); ++i) {
450 if (IsOptionElement(list_items[i])) {
451 const WebOptionElement option = list_items[i].toConst<WebOptionElement>();
452 option_values->push_back(option.value());
453 option_contents->push_back(option.text());
454 }
455 }
456 }
457
458 // The callback type used by |ForEachMatchingFormField()|.
459 typedef void (*Callback)(const FormFieldData&,
460 bool, /* is_initiating_element */
461 blink::WebFormControlElement*);
462
463 // For each autofillable field in |data| that matches a field in the |form|,
464 // the |callback| is invoked with the corresponding |form| field data.
ForEachMatchingFormField(const WebFormElement & form_element,const WebElement & initiating_element,const FormData & data,bool only_focusable_elements,bool force_override,Callback callback)465 void ForEachMatchingFormField(const WebFormElement& form_element,
466 const WebElement& initiating_element,
467 const FormData& data,
468 bool only_focusable_elements,
469 bool force_override,
470 Callback callback) {
471 std::vector<WebFormControlElement> control_elements;
472 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
473 &control_elements);
474
475 if (control_elements.size() != data.fields.size()) {
476 // This case should be reachable only for pathological websites and tests,
477 // which add or remove form fields while the user is interacting with the
478 // Autofill popup.
479 return;
480 }
481
482 // It's possible that the site has injected fields into the form after the
483 // page has loaded, so we can't assert that the size of the cached control
484 // elements is equal to the size of the fields in |form|. Fortunately, the
485 // one case in the wild where this happens, paypal.com signup form, the fields
486 // are appended to the end of the form and are not visible.
487 for (size_t i = 0; i < control_elements.size(); ++i) {
488 WebFormControlElement* element = &control_elements[i];
489
490 if (base::string16(element->nameForAutofill()) != data.fields[i].name) {
491 // This case should be reachable only for pathological websites, which
492 // rename form fields while the user is interacting with the Autofill
493 // popup. I (isherman) am not aware of any such websites, and so am
494 // optimistically including a NOTREACHED(). If you ever trip this check,
495 // please file a bug against me.
496 NOTREACHED();
497 continue;
498 }
499
500 bool is_initiating_element = (*element == initiating_element);
501
502 // Only autofill empty fields and the field that initiated the filling,
503 // i.e. the field the user is currently editing and interacting with.
504 const WebInputElement* input_element = toWebInputElement(element);
505 if (!force_override && !is_initiating_element &&
506 ((IsAutofillableInputElement(input_element) ||
507 IsTextAreaElement(*element)) &&
508 !element->value().isEmpty()))
509 continue;
510
511 if (!element->isEnabled() || element->isReadOnly() ||
512 (only_focusable_elements && !element->isFocusable()))
513 continue;
514
515 callback(data.fields[i], is_initiating_element, element);
516 }
517 }
518
519 // Sets the |field|'s value to the value in |data|.
520 // Also sets the "autofilled" attribute, causing the background to be yellow.
FillFormField(const FormFieldData & data,bool is_initiating_node,blink::WebFormControlElement * field)521 void FillFormField(const FormFieldData& data,
522 bool is_initiating_node,
523 blink::WebFormControlElement* field) {
524 // Nothing to fill.
525 if (data.value.empty())
526 return;
527
528 if (!data.is_autofilled)
529 return;
530
531 WebInputElement* input_element = toWebInputElement(field);
532 if (IsCheckableElement(input_element)) {
533 input_element->setChecked(data.is_checked, true);
534 } else {
535 base::string16 value = data.value;
536 if (IsTextInput(input_element) || IsMonthInput(input_element)) {
537 // If the maxlength attribute contains a negative value, maxLength()
538 // returns the default maxlength value.
539 value = value.substr(0, input_element->maxLength());
540 }
541 field->setValue(value, true);
542 }
543
544 field->setAutofilled(true);
545
546 if (is_initiating_node &&
547 ((IsTextInput(input_element) || IsMonthInput(input_element)) ||
548 IsTextAreaElement(*field))) {
549 int length = field->value().length();
550 field->setSelectionRange(length, length);
551 // Clear the current IME composition (the underline), if there is one.
552 field->document().frame()->unmarkText();
553 }
554 }
555
556 // Sets the |field|'s "suggested" (non JS visible) value to the value in |data|.
557 // Also sets the "autofilled" attribute, causing the background to be yellow.
PreviewFormField(const FormFieldData & data,bool is_initiating_node,blink::WebFormControlElement * field)558 void PreviewFormField(const FormFieldData& data,
559 bool is_initiating_node,
560 blink::WebFormControlElement* field) {
561 // Nothing to preview.
562 if (data.value.empty())
563 return;
564
565 if (!data.is_autofilled)
566 return;
567
568 // Preview input, textarea and select fields. For input fields, excludes
569 // checkboxes and radio buttons, as there is no provision for
570 // setSuggestedCheckedValue in WebInputElement.
571 WebInputElement* input_element = toWebInputElement(field);
572 if (IsTextInput(input_element) || IsMonthInput(input_element)) {
573 // If the maxlength attribute contains a negative value, maxLength()
574 // returns the default maxlength value.
575 input_element->setSuggestedValue(
576 data.value.substr(0, input_element->maxLength()));
577 input_element->setAutofilled(true);
578 } else if (IsTextAreaElement(*field) || IsSelectElement(*field)) {
579 field->setSuggestedValue(data.value);
580 field->setAutofilled(true);
581 }
582
583 if (is_initiating_node &&
584 (IsTextInput(input_element) || IsTextAreaElement(*field))) {
585 // Select the part of the text that the user didn't type.
586 int start = field->value().length();
587 int end = field->suggestedValue().length();
588 field->setSelectionRange(start, end);
589 }
590 }
591
RetrievalMethodToString(const WebElementDescriptor::RetrievalMethod & method)592 std::string RetrievalMethodToString(
593 const WebElementDescriptor::RetrievalMethod& method) {
594 switch (method) {
595 case WebElementDescriptor::CSS_SELECTOR:
596 return "CSS_SELECTOR";
597 case WebElementDescriptor::ID:
598 return "ID";
599 case WebElementDescriptor::NONE:
600 return "NONE";
601 }
602 NOTREACHED();
603 return "UNKNOWN";
604 }
605
606 // Recursively checks whether |node| or any of its children have a non-empty
607 // bounding box. The recursion depth is bounded by |depth|.
IsWebNodeVisibleImpl(const blink::WebNode & node,const int depth)608 bool IsWebNodeVisibleImpl(const blink::WebNode& node, const int depth) {
609 if (depth < 0)
610 return false;
611 if (node.hasNonEmptyBoundingBox())
612 return true;
613
614 // The childNodes method is not a const method. Therefore it cannot be called
615 // on a const reference. Therefore we need a const cast.
616 const blink::WebNodeList& children =
617 const_cast<blink::WebNode&>(node).childNodes();
618 size_t length = children.length();
619 for (size_t i = 0; i < length; ++i) {
620 const blink::WebNode& item = children.item(i);
621 if (IsWebNodeVisibleImpl(item, depth - 1))
622 return true;
623 }
624 return false;
625 }
626
627 } // namespace
628
629 const size_t kMaxParseableFields = 200;
630
IsMonthInput(const WebInputElement * element)631 bool IsMonthInput(const WebInputElement* element) {
632 CR_DEFINE_STATIC_LOCAL(WebString, kMonth, ("month"));
633 return element && !element->isNull() && element->formControlType() == kMonth;
634 }
635
636 // All text fields, including password fields, should be extracted.
IsTextInput(const WebInputElement * element)637 bool IsTextInput(const WebInputElement* element) {
638 return element && !element->isNull() && element->isTextField();
639 }
640
IsSelectElement(const WebFormControlElement & element)641 bool IsSelectElement(const WebFormControlElement& element) {
642 // Static for improved performance.
643 CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one"));
644 return !element.isNull() && element.formControlType() == kSelectOne;
645 }
646
IsTextAreaElement(const WebFormControlElement & element)647 bool IsTextAreaElement(const WebFormControlElement& element) {
648 // Static for improved performance.
649 CR_DEFINE_STATIC_LOCAL(WebString, kTextArea, ("textarea"));
650 return !element.isNull() && element.formControlType() == kTextArea;
651 }
652
IsCheckableElement(const WebInputElement * element)653 bool IsCheckableElement(const WebInputElement* element) {
654 if (!element || element->isNull())
655 return false;
656
657 return element->isCheckbox() || element->isRadioButton();
658 }
659
IsAutofillableInputElement(const WebInputElement * element)660 bool IsAutofillableInputElement(const WebInputElement* element) {
661 return IsTextInput(element) ||
662 IsMonthInput(element) ||
663 IsCheckableElement(element);
664 }
665
GetFormIdentifier(const WebFormElement & form)666 const base::string16 GetFormIdentifier(const WebFormElement& form) {
667 base::string16 identifier = form.name();
668 CR_DEFINE_STATIC_LOCAL(WebString, kId, ("id"));
669 if (identifier.empty())
670 identifier = form.getAttribute(kId);
671
672 return identifier;
673 }
674
IsWebNodeVisible(const blink::WebNode & node)675 bool IsWebNodeVisible(const blink::WebNode& node) {
676 // In the bug http://crbug.com/237216 the form's bounding box is empty
677 // however the form has non empty children. Thus we need to look at the
678 // form's children.
679 int kNodeSearchDepth = 2;
680 return IsWebNodeVisibleImpl(node, kNodeSearchDepth);
681 }
682
ClickElement(const WebDocument & document,const WebElementDescriptor & element_descriptor)683 bool ClickElement(const WebDocument& document,
684 const WebElementDescriptor& element_descriptor) {
685 WebString web_descriptor = WebString::fromUTF8(element_descriptor.descriptor);
686 blink::WebElement element;
687
688 switch (element_descriptor.retrieval_method) {
689 case WebElementDescriptor::CSS_SELECTOR: {
690 WebExceptionCode ec = 0;
691 element = document.querySelector(web_descriptor, ec);
692 if (ec)
693 DVLOG(1) << "Query selector failed. Error code: " << ec << ".";
694 break;
695 }
696 case WebElementDescriptor::ID:
697 element = document.getElementById(web_descriptor);
698 break;
699 case WebElementDescriptor::NONE:
700 return true;
701 }
702
703 if (element.isNull()) {
704 DVLOG(1) << "Could not find "
705 << element_descriptor.descriptor
706 << " by "
707 << RetrievalMethodToString(element_descriptor.retrieval_method)
708 << ".";
709 return false;
710 }
711
712 element.simulateClick();
713 return true;
714 }
715
716 // Fills |autofillable_elements| with all the auto-fillable form control
717 // elements in |form_element|.
ExtractAutofillableElements(const WebFormElement & form_element,RequirementsMask requirements,std::vector<WebFormControlElement> * autofillable_elements)718 void ExtractAutofillableElements(
719 const WebFormElement& form_element,
720 RequirementsMask requirements,
721 std::vector<WebFormControlElement>* autofillable_elements) {
722 WebVector<WebFormControlElement> control_elements;
723 form_element.getFormControlElements(control_elements);
724
725 autofillable_elements->clear();
726 for (size_t i = 0; i < control_elements.size(); ++i) {
727 WebFormControlElement element = control_elements[i];
728 if (!IsAutofillableElement(element))
729 continue;
730
731 if (requirements & REQUIRE_AUTOCOMPLETE) {
732 // TODO(isherman): WebKit currently doesn't handle the autocomplete
733 // attribute for select or textarea elements, but it probably should.
734 WebInputElement* input_element = toWebInputElement(&control_elements[i]);
735 if (IsAutofillableInputElement(input_element) &&
736 !SatisfiesRequireAutocomplete(*input_element))
737 continue;
738 }
739
740 autofillable_elements->push_back(element);
741 }
742 }
743
WebFormControlElementToFormField(const WebFormControlElement & element,ExtractMask extract_mask,FormFieldData * field)744 void WebFormControlElementToFormField(const WebFormControlElement& element,
745 ExtractMask extract_mask,
746 FormFieldData* field) {
747 DCHECK(field);
748 DCHECK(!element.isNull());
749 CR_DEFINE_STATIC_LOCAL(WebString, kAutocomplete, ("autocomplete"));
750
751 // The label is not officially part of a WebFormControlElement; however, the
752 // labels for all form control elements are scraped from the DOM and set in
753 // WebFormElementToFormData.
754 field->name = element.nameForAutofill();
755 field->form_control_type = base::UTF16ToUTF8(element.formControlType());
756 field->autocomplete_attribute =
757 base::UTF16ToUTF8(element.getAttribute(kAutocomplete));
758 if (field->autocomplete_attribute.size() > kMaxDataLength) {
759 // Discard overly long attribute values to avoid DOS-ing the browser
760 // process. However, send over a default string to indicate that the
761 // attribute was present.
762 field->autocomplete_attribute = "x-max-data-length-exceeded";
763 }
764
765 if (!IsAutofillableElement(element))
766 return;
767
768 const WebInputElement* input_element = toWebInputElement(&element);
769 if (IsAutofillableInputElement(input_element) ||
770 IsTextAreaElement(element)) {
771 field->is_autofilled = element.isAutofilled();
772 field->is_focusable = element.isFocusable();
773 field->should_autocomplete = element.autoComplete();
774 field->text_direction = element.directionForFormData() ==
775 "rtl" ? base::i18n::RIGHT_TO_LEFT : base::i18n::LEFT_TO_RIGHT;
776 }
777
778 if (IsAutofillableInputElement(input_element)) {
779 if (IsTextInput(input_element))
780 field->max_length = input_element->maxLength();
781
782 field->is_checkable = IsCheckableElement(input_element);
783 field->is_checked = input_element->isChecked();
784 } else if (IsTextAreaElement(element)) {
785 // Nothing more to do in this case.
786 } else if (extract_mask & EXTRACT_OPTIONS) {
787 // Set option strings on the field if available.
788 DCHECK(IsSelectElement(element));
789 const WebSelectElement select_element = element.toConst<WebSelectElement>();
790 GetOptionStringsFromElement(select_element,
791 &field->option_values,
792 &field->option_contents);
793 }
794
795 if (!(extract_mask & EXTRACT_VALUE))
796 return;
797
798 base::string16 value = element.value();
799
800 if (IsSelectElement(element) && (extract_mask & EXTRACT_OPTION_TEXT)) {
801 const WebSelectElement select_element = element.toConst<WebSelectElement>();
802 // Convert the |select_element| value to text if requested.
803 WebVector<WebElement> list_items = select_element.listItems();
804 for (size_t i = 0; i < list_items.size(); ++i) {
805 if (IsOptionElement(list_items[i])) {
806 const WebOptionElement option_element =
807 list_items[i].toConst<WebOptionElement>();
808 if (option_element.value() == value) {
809 value = option_element.text();
810 break;
811 }
812 }
813 }
814 }
815
816 // Constrain the maximum data length to prevent a malicious site from DOS'ing
817 // the browser: http://crbug.com/49332
818 if (value.size() > kMaxDataLength)
819 value = value.substr(0, kMaxDataLength);
820
821 field->value = value;
822 }
823
WebFormElementToFormData(const blink::WebFormElement & form_element,const blink::WebFormControlElement & form_control_element,RequirementsMask requirements,ExtractMask extract_mask,FormData * form,FormFieldData * field)824 bool WebFormElementToFormData(
825 const blink::WebFormElement& form_element,
826 const blink::WebFormControlElement& form_control_element,
827 RequirementsMask requirements,
828 ExtractMask extract_mask,
829 FormData* form,
830 FormFieldData* field) {
831 CR_DEFINE_STATIC_LOCAL(WebString, kLabel, ("label"));
832 CR_DEFINE_STATIC_LOCAL(WebString, kFor, ("for"));
833 CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden"));
834
835 const WebFrame* frame = form_element.document().frame();
836 if (!frame)
837 return false;
838
839 if (requirements & REQUIRE_AUTOCOMPLETE && !form_element.autoComplete())
840 return false;
841
842 form->name = GetFormIdentifier(form_element);
843 form->origin = frame->document().url();
844 form->action = frame->document().completeURL(form_element.action());
845 form->user_submitted = form_element.wasUserSubmitted();
846
847 // If the completed URL is not valid, just use the action we get from
848 // WebKit.
849 if (!form->action.is_valid())
850 form->action = GURL(form_element.action());
851
852 // A map from a FormFieldData's name to the FormFieldData itself.
853 std::map<base::string16, FormFieldData*> name_map;
854
855 // The extracted FormFields. We use pointers so we can store them in
856 // |name_map|.
857 ScopedVector<FormFieldData> form_fields;
858
859 WebVector<WebFormControlElement> control_elements;
860 form_element.getFormControlElements(control_elements);
861
862 // A vector of bools that indicate whether each field in the form meets the
863 // requirements and thus will be in the resulting |form|.
864 std::vector<bool> fields_extracted(control_elements.size(), false);
865
866 for (size_t i = 0; i < control_elements.size(); ++i) {
867 const WebFormControlElement& control_element = control_elements[i];
868
869 if (!IsAutofillableElement(control_element))
870 continue;
871
872 const WebInputElement* input_element = toWebInputElement(&control_element);
873 if (requirements & REQUIRE_AUTOCOMPLETE &&
874 IsAutofillableInputElement(input_element) &&
875 !SatisfiesRequireAutocomplete(*input_element))
876 continue;
877
878 // Create a new FormFieldData, fill it out and map it to the field's name.
879 FormFieldData* form_field = new FormFieldData;
880 WebFormControlElementToFormField(control_element, extract_mask, form_field);
881 form_fields.push_back(form_field);
882 // TODO(jhawkins): A label element is mapped to a form control element's id.
883 // field->name() will contain the id only if the name does not exist. Add
884 // an id() method to WebFormControlElement and use that here.
885 name_map[form_field->name] = form_field;
886 fields_extracted[i] = true;
887 }
888
889 // If we failed to extract any fields, give up. Also, to avoid overly
890 // expensive computation, we impose a maximum number of allowable fields.
891 if (form_fields.empty() || form_fields.size() > kMaxParseableFields)
892 return false;
893
894 // Loop through the label elements inside the form element. For each label
895 // element, get the corresponding form control element, use the form control
896 // element's name as a key into the <name, FormFieldData> map to find the
897 // previously created FormFieldData and set the FormFieldData's label to the
898 // label.firstChild().nodeValue() of the label element.
899 WebElementCollection labels = form_element.getElementsByHTMLTagName(kLabel);
900 DCHECK(!labels.isNull());
901 for (WebElement item = labels.firstItem(); !item.isNull();
902 item = labels.nextItem()) {
903 WebLabelElement label = item.to<WebLabelElement>();
904 WebFormControlElement field_element =
905 label.correspondingControl().to<WebFormControlElement>();
906
907 base::string16 element_name;
908 if (field_element.isNull()) {
909 // Sometimes site authors will incorrectly specify the corresponding
910 // field element's name rather than its id, so we compensate here.
911 element_name = label.getAttribute(kFor);
912 } else if (
913 !field_element.isFormControlElement() ||
914 field_element.formControlType() == kHidden) {
915 continue;
916 } else {
917 element_name = field_element.nameForAutofill();
918 }
919
920 std::map<base::string16, FormFieldData*>::iterator iter =
921 name_map.find(element_name);
922 if (iter != name_map.end()) {
923 base::string16 label_text = FindChildText(label);
924
925 // Concatenate labels because some sites might have multiple label
926 // candidates.
927 if (!iter->second->label.empty() && !label_text.empty())
928 iter->second->label += base::ASCIIToUTF16(" ");
929 iter->second->label += label_text;
930 }
931 }
932
933 // Loop through the form control elements, extracting the label text from
934 // the DOM. We use the |fields_extracted| vector to make sure we assign the
935 // extracted label to the correct field, as it's possible |form_fields| will
936 // not contain all of the elements in |control_elements|.
937 for (size_t i = 0, field_idx = 0;
938 i < control_elements.size() && field_idx < form_fields.size(); ++i) {
939 // This field didn't meet the requirements, so don't try to find a label
940 // for it.
941 if (!fields_extracted[i])
942 continue;
943
944 const WebFormControlElement& control_element = control_elements[i];
945 if (form_fields[field_idx]->label.empty())
946 form_fields[field_idx]->label = InferLabelForElement(control_element);
947
948 if (field && form_control_element == control_element)
949 *field = *form_fields[field_idx];
950
951 ++field_idx;
952 }
953
954 // Copy the created FormFields into the resulting FormData object.
955 for (ScopedVector<FormFieldData>::const_iterator iter = form_fields.begin();
956 iter != form_fields.end(); ++iter) {
957 form->fields.push_back(**iter);
958 }
959
960 return true;
961 }
962
FindFormAndFieldForFormControlElement(const WebFormControlElement & element,FormData * form,FormFieldData * field,RequirementsMask requirements)963 bool FindFormAndFieldForFormControlElement(const WebFormControlElement& element,
964 FormData* form,
965 FormFieldData* field,
966 RequirementsMask requirements) {
967 if (!IsAutofillableElement(element))
968 return false;
969
970 const WebFormElement form_element = element.form();
971 if (form_element.isNull())
972 return false;
973
974 ExtractMask extract_mask =
975 static_cast<ExtractMask>(EXTRACT_VALUE | EXTRACT_OPTIONS);
976 return WebFormElementToFormData(form_element,
977 element,
978 requirements,
979 extract_mask,
980 form,
981 field);
982 }
983
FillForm(const FormData & form,const WebFormControlElement & element)984 void FillForm(const FormData& form, const WebFormControlElement& element) {
985 WebFormElement form_element = element.form();
986 if (form_element.isNull())
987 return;
988
989 ForEachMatchingFormField(form_element,
990 element,
991 form,
992 true, /* only_focusable_elements */
993 false, /* don't force override */
994 &FillFormField);
995 }
996
FillFormIncludingNonFocusableElements(const FormData & form_data,const WebFormElement & form_element)997 void FillFormIncludingNonFocusableElements(const FormData& form_data,
998 const WebFormElement& form_element) {
999 if (form_element.isNull())
1000 return;
1001
1002 ForEachMatchingFormField(form_element,
1003 WebInputElement(),
1004 form_data,
1005 false, /* only_focusable_elements */
1006 true, /* force override */
1007 &FillFormField);
1008 }
1009
PreviewForm(const FormData & form,const WebFormControlElement & element)1010 void PreviewForm(const FormData& form, const WebFormControlElement& element) {
1011 WebFormElement form_element = element.form();
1012 if (form_element.isNull())
1013 return;
1014
1015 ForEachMatchingFormField(form_element,
1016 element,
1017 form,
1018 true, /* only_focusable_elements */
1019 false, /* dont force override */
1020 &PreviewFormField);
1021 }
1022
ClearPreviewedFormWithElement(const WebFormControlElement & element,bool was_autofilled)1023 bool ClearPreviewedFormWithElement(const WebFormControlElement& element,
1024 bool was_autofilled) {
1025 WebFormElement form_element = element.form();
1026 if (form_element.isNull())
1027 return false;
1028
1029 std::vector<WebFormControlElement> control_elements;
1030 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
1031 &control_elements);
1032 for (size_t i = 0; i < control_elements.size(); ++i) {
1033 // There might be unrelated elements in this form which have already been
1034 // auto-filled. For example, the user might have already filled the address
1035 // part of a form and now be dealing with the credit card section. We only
1036 // want to reset the auto-filled status for fields that were previewed.
1037 WebFormControlElement control_element = control_elements[i];
1038
1039 // Only text input, textarea and select elements can be previewed.
1040 WebInputElement* input_element = toWebInputElement(&control_element);
1041 if (!IsTextInput(input_element) &&
1042 !IsMonthInput(input_element) &&
1043 !IsTextAreaElement(control_element) &&
1044 !IsSelectElement(control_element))
1045 continue;
1046
1047 // If the element is not auto-filled, we did not preview it,
1048 // so there is nothing to reset.
1049 if(!control_element.isAutofilled())
1050 continue;
1051
1052 if ((IsTextInput(input_element) ||
1053 IsMonthInput(input_element) ||
1054 IsTextAreaElement(control_element) ||
1055 IsSelectElement(control_element)) &&
1056 control_element.suggestedValue().isEmpty())
1057 continue;
1058
1059 // Clear the suggested value. For the initiating node, also restore the
1060 // original value.
1061 if (IsTextInput(input_element) || IsMonthInput(input_element) ||
1062 IsTextAreaElement(control_element)) {
1063 control_element.setSuggestedValue(WebString());
1064 bool is_initiating_node = (element == control_element);
1065 if (is_initiating_node) {
1066 control_element.setAutofilled(was_autofilled);
1067 // Clearing the suggested value in the focused node (above) can cause
1068 // selection to be lost. We force selection range to restore the text
1069 // cursor.
1070 int length = control_element.value().length();
1071 control_element.setSelectionRange(length, length);
1072 } else {
1073 control_element.setAutofilled(false);
1074 }
1075 } else if (IsSelectElement(control_element)) {
1076 control_element.setSuggestedValue(WebString());
1077 control_element.setAutofilled(false);
1078 }
1079 }
1080
1081 return true;
1082 }
1083
FormWithElementIsAutofilled(const WebInputElement & element)1084 bool FormWithElementIsAutofilled(const WebInputElement& element) {
1085 WebFormElement form_element = element.form();
1086 if (form_element.isNull())
1087 return false;
1088
1089 std::vector<WebFormControlElement> control_elements;
1090 ExtractAutofillableElements(form_element, REQUIRE_AUTOCOMPLETE,
1091 &control_elements);
1092 for (size_t i = 0; i < control_elements.size(); ++i) {
1093 WebInputElement* input_element = toWebInputElement(&control_elements[i]);
1094 if (!IsAutofillableInputElement(input_element))
1095 continue;
1096
1097 if (input_element->isAutofilled())
1098 return true;
1099 }
1100
1101 return false;
1102 }
1103
IsWebpageEmpty(const blink::WebFrame * frame)1104 bool IsWebpageEmpty(const blink::WebFrame* frame) {
1105 blink::WebDocument document = frame->document();
1106
1107 return IsWebElementEmpty(document.head()) &&
1108 IsWebElementEmpty(document.body());
1109 }
1110
IsWebElementEmpty(const blink::WebElement & element)1111 bool IsWebElementEmpty(const blink::WebElement& element) {
1112 // This array contains all tags which can be present in an empty page.
1113 const char* const kAllowedValue[] = {
1114 "script",
1115 "meta",
1116 "title",
1117 };
1118 const size_t kAllowedValueLength = arraysize(kAllowedValue);
1119
1120 if (element.isNull())
1121 return true;
1122 // The childNodes method is not a const method. Therefore it cannot be called
1123 // on a const reference. Therefore we need a const cast.
1124 const blink::WebNodeList& children =
1125 const_cast<blink::WebElement&>(element).childNodes();
1126 for (size_t i = 0; i < children.length(); ++i) {
1127 const blink::WebNode& item = children.item(i);
1128
1129 if (item.isTextNode() &&
1130 !base::ContainsOnlyChars(item.nodeValue().utf8(),
1131 base::kWhitespaceASCII))
1132 return false;
1133
1134 // We ignore all other items with names which begin with
1135 // the character # because they are not html tags.
1136 if (item.nodeName().utf8()[0] == '#')
1137 continue;
1138
1139 bool tag_is_allowed = false;
1140 // Test if the item name is in the kAllowedValue array
1141 for (size_t allowed_value_index = 0;
1142 allowed_value_index < kAllowedValueLength; ++allowed_value_index) {
1143 if (HasTagName(item,
1144 WebString::fromUTF8(kAllowedValue[allowed_value_index]))) {
1145 tag_is_allowed = true;
1146 break;
1147 }
1148 }
1149 if (!tag_is_allowed)
1150 return false;
1151 }
1152 return true;
1153 }
1154
GetScaledBoundingBox(float scale,WebFormControlElement * element)1155 gfx::RectF GetScaledBoundingBox(float scale, WebFormControlElement* element) {
1156 gfx::Rect bounding_box(element->boundsInViewportSpace());
1157 return gfx::RectF(bounding_box.x() * scale,
1158 bounding_box.y() * scale,
1159 bounding_box.width() * scale,
1160 bounding_box.height() * scale);
1161 }
1162
1163 } // namespace autofill
1164