1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/autofill/core/browser/name_field.h"
6
7 #include "base/logging.h"
8 #include "base/memory/scoped_ptr.h"
9 #include "base/strings/string_util.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "components/autofill/core/browser/autofill_regex_constants.h"
12 #include "components/autofill/core/browser/autofill_scanner.h"
13 #include "components/autofill/core/browser/autofill_type.h"
14 #include "ui/base/l10n/l10n_util.h"
15
16 using base::UTF8ToUTF16;
17
18 namespace autofill {
19 namespace {
20
21 // A form field that can parse a full name field.
22 class FullNameField : public NameField {
23 public:
24 static FullNameField* Parse(AutofillScanner* scanner);
25
26 protected:
27 // FormField:
28 virtual bool ClassifyField(ServerFieldTypeMap* map) const OVERRIDE;
29
30 private:
31 explicit FullNameField(AutofillField* field);
32
33 AutofillField* field_;
34
35 DISALLOW_COPY_AND_ASSIGN(FullNameField);
36 };
37
38 // A form field that can parse a first and last name field.
39 class FirstLastNameField : public NameField {
40 public:
41 static FirstLastNameField* ParseSpecificName(AutofillScanner* scanner);
42 static FirstLastNameField* ParseComponentNames(AutofillScanner* scanner);
43 static FirstLastNameField* Parse(AutofillScanner* scanner);
44
45 protected:
46 // FormField:
47 virtual bool ClassifyField(ServerFieldTypeMap* map) const OVERRIDE;
48
49 private:
50 FirstLastNameField();
51
52 AutofillField* first_name_;
53 AutofillField* middle_name_; // Optional.
54 AutofillField* last_name_;
55 bool middle_initial_; // True if middle_name_ is a middle initial.
56
57 DISALLOW_COPY_AND_ASSIGN(FirstLastNameField);
58 };
59
60 } // namespace
61
Parse(AutofillScanner * scanner)62 FormField* NameField::Parse(AutofillScanner* scanner) {
63 if (scanner->IsEnd())
64 return NULL;
65
66 // Try FirstLastNameField first since it's more specific.
67 NameField* field = FirstLastNameField::Parse(scanner);
68 if (!field)
69 field = FullNameField::Parse(scanner);
70 return field;
71 }
72
73 // This is overriden in concrete subclasses.
ClassifyField(ServerFieldTypeMap * map) const74 bool NameField::ClassifyField(ServerFieldTypeMap* map) const {
75 return false;
76 }
77
Parse(AutofillScanner * scanner)78 FullNameField* FullNameField::Parse(AutofillScanner* scanner) {
79 // Exclude e.g. "username" or "nickname" fields.
80 scanner->SaveCursor();
81 bool should_ignore = ParseField(scanner,
82 UTF8ToUTF16(autofill::kNameIgnoredRe), NULL);
83 scanner->Rewind();
84 if (should_ignore)
85 return NULL;
86
87 // Searching for any label containing the word "name" is too general;
88 // for example, Travelocity_Edit travel profile.html contains a field
89 // "Travel Profile Name".
90 AutofillField* field = NULL;
91 if (ParseField(scanner, UTF8ToUTF16(autofill::kNameRe), &field))
92 return new FullNameField(field);
93
94 return NULL;
95 }
96
ClassifyField(ServerFieldTypeMap * map) const97 bool FullNameField::ClassifyField(ServerFieldTypeMap* map) const {
98 return AddClassification(field_, NAME_FULL, map);
99 }
100
FullNameField(AutofillField * field)101 FullNameField::FullNameField(AutofillField* field) : field_(field) {
102 }
103
ParseSpecificName(AutofillScanner * scanner)104 FirstLastNameField* FirstLastNameField::ParseSpecificName(
105 AutofillScanner* scanner) {
106 // Some pages (e.g. Overstock_comBilling.html, SmithsonianCheckout.html)
107 // have the label "Name" followed by two or three text fields.
108 scoped_ptr<FirstLastNameField> v(new FirstLastNameField);
109 scanner->SaveCursor();
110
111 AutofillField* next = NULL;
112 if (ParseField(scanner,
113 UTF8ToUTF16(autofill::kNameSpecificRe), &v->first_name_) &&
114 ParseEmptyLabel(scanner, &next)) {
115 if (ParseEmptyLabel(scanner, &v->last_name_)) {
116 // There are three name fields; assume that the middle one is a
117 // middle initial (it is, at least, on SmithsonianCheckout.html).
118 v->middle_name_ = next;
119 v->middle_initial_ = true;
120 } else { // only two name fields
121 v->last_name_ = next;
122 }
123
124 return v.release();
125 }
126
127 scanner->Rewind();
128 return NULL;
129 }
130
ParseComponentNames(AutofillScanner * scanner)131 FirstLastNameField* FirstLastNameField::ParseComponentNames(
132 AutofillScanner* scanner) {
133 scoped_ptr<FirstLastNameField> v(new FirstLastNameField);
134 scanner->SaveCursor();
135
136 // A fair number of pages use the names "fname" and "lname" for naming
137 // first and last name fields (examples from the test suite:
138 // BESTBUY_COM - Sign In2.html; Crate and Barrel Check Out.html;
139 // dell_checkout1.html). At least one UK page (The China Shop2.html)
140 // asks, in stuffy English style, for just initials and a surname,
141 // so we match "initials" here (and just fill in a first name there,
142 // American-style).
143 // The ".*first$" matches fields ending in "first" (example in sample8.html).
144 // The ".*last$" matches fields ending in "last" (example in sample8.html).
145
146 // Allow name fields to appear in any order.
147 while (!scanner->IsEnd()) {
148 // Skip over any unrelated fields, e.g. "username" or "nickname".
149 if (ParseFieldSpecifics(scanner, UTF8ToUTF16(autofill::kNameIgnoredRe),
150 MATCH_DEFAULT | MATCH_SELECT, NULL)) {
151 continue;
152 }
153
154 if (!v->first_name_ &&
155 ParseField(scanner, UTF8ToUTF16(autofill::kFirstNameRe),
156 &v->first_name_)) {
157 continue;
158 }
159
160 // We check for a middle initial before checking for a middle name
161 // because at least one page (PC Connection.html) has a field marked
162 // as both (the label text is "MI" and the element name is
163 // "txtmiddlename"); such a field probably actually represents a
164 // middle initial.
165 if (!v->middle_name_ &&
166 ParseField(scanner, UTF8ToUTF16(autofill::kMiddleInitialRe),
167 &v->middle_name_)) {
168 v->middle_initial_ = true;
169 continue;
170 }
171
172 if (!v->middle_name_ &&
173 ParseField(scanner, UTF8ToUTF16(autofill::kMiddleNameRe),
174 &v->middle_name_)) {
175 continue;
176 }
177
178 if (!v->last_name_ &&
179 ParseField(scanner, UTF8ToUTF16(autofill::kLastNameRe),
180 &v->last_name_)) {
181 continue;
182 }
183
184 break;
185 }
186
187 // Consider the match to be successful if we detected both first and last name
188 // fields.
189 if (v->first_name_ && v->last_name_)
190 return v.release();
191
192 scanner->Rewind();
193 return NULL;
194 }
195
Parse(AutofillScanner * scanner)196 FirstLastNameField* FirstLastNameField::Parse(AutofillScanner* scanner) {
197 FirstLastNameField* field = ParseSpecificName(scanner);
198 if (!field)
199 field = ParseComponentNames(scanner);
200 return field;
201 }
202
FirstLastNameField()203 FirstLastNameField::FirstLastNameField()
204 : first_name_(NULL),
205 middle_name_(NULL),
206 last_name_(NULL),
207 middle_initial_(false) {
208 }
209
ClassifyField(ServerFieldTypeMap * map) const210 bool FirstLastNameField::ClassifyField(ServerFieldTypeMap* map) const {
211 bool ok = AddClassification(first_name_, NAME_FIRST, map);
212 ok = ok && AddClassification(last_name_, NAME_LAST, map);
213 ServerFieldType type = middle_initial_ ? NAME_MIDDLE_INITIAL : NAME_MIDDLE;
214 ok = ok && AddClassification(middle_name_, type, map);
215 return ok;
216 }
217
218 } // namespace autofill
219