1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/autofill/address_field.h"
6
7 #include <stddef.h>
8
9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h"
11 #include "base/string16.h"
12 #include "base/string_util.h"
13 #include "base/utf_string_conversions.h"
14 #include "chrome/browser/autofill/autofill_field.h"
15 #include "grit/autofill_resources.h"
16 #include "ui/base/l10n/l10n_util.h"
17
GetFieldInfo(FieldTypeMap * field_type_map) const18 bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const {
19 AutofillFieldType address_company;
20 AutofillFieldType address_line1;
21 AutofillFieldType address_line2;
22 AutofillFieldType address_city;
23 AutofillFieldType address_state;
24 AutofillFieldType address_zip;
25 AutofillFieldType address_country;
26
27 switch (type_) {
28 case kShippingAddress:
29 // Fall through. Autofill does not support shipping addresses.
30 case kGenericAddress:
31 address_company = COMPANY_NAME;
32 address_line1 = ADDRESS_HOME_LINE1;
33 address_line2 = ADDRESS_HOME_LINE2;
34 address_city = ADDRESS_HOME_CITY;
35 address_state = ADDRESS_HOME_STATE;
36 address_zip = ADDRESS_HOME_ZIP;
37 address_country = ADDRESS_HOME_COUNTRY;
38 break;
39
40 case kBillingAddress:
41 address_company = COMPANY_NAME;
42 address_line1 = ADDRESS_BILLING_LINE1;
43 address_line2 = ADDRESS_BILLING_LINE2;
44 address_city = ADDRESS_BILLING_CITY;
45 address_state = ADDRESS_BILLING_STATE;
46 address_zip = ADDRESS_BILLING_ZIP;
47 address_country = ADDRESS_BILLING_COUNTRY;
48 break;
49
50 default:
51 NOTREACHED();
52 return false;
53 }
54
55 bool ok;
56 ok = Add(field_type_map, company_, AutofillType(address_company));
57 DCHECK(ok);
58 ok = ok && Add(field_type_map, address1_, AutofillType(address_line1));
59 DCHECK(ok);
60 ok = ok && Add(field_type_map, address2_, AutofillType(address_line2));
61 DCHECK(ok);
62 ok = ok && Add(field_type_map, city_, AutofillType(address_city));
63 DCHECK(ok);
64 ok = ok && Add(field_type_map, state_, AutofillType(address_state));
65 DCHECK(ok);
66 ok = ok && Add(field_type_map, zip_, AutofillType(address_zip));
67 DCHECK(ok);
68 ok = ok && Add(field_type_map, country_, AutofillType(address_country));
69 DCHECK(ok);
70
71 return ok;
72 }
73
GetFormFieldType() const74 FormFieldType AddressField::GetFormFieldType() const {
75 return kAddressType;
76 }
77
Parse(std::vector<AutofillField * >::const_iterator * iter,bool is_ecml)78 AddressField* AddressField::Parse(
79 std::vector<AutofillField*>::const_iterator* iter,
80 bool is_ecml) {
81 DCHECK(iter);
82 if (!iter)
83 return NULL;
84
85 scoped_ptr<AddressField> address_field(new AddressField);
86 std::vector<AutofillField*>::const_iterator q = *iter;
87 string16 pattern;
88
89 // The ECML standard uses 2 letter country codes. So we will
90 // have to remember that this is an ECML form, for when we fill
91 // it out.
92 address_field->is_ecml_ = is_ecml;
93
94 string16 attention_ignored =
95 l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE);
96 string16 region_ignored =
97 l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE);
98
99 // Allow address fields to appear in any order.
100 while (true) {
101 if (ParseCompany(&q, is_ecml, address_field.get()) ||
102 ParseAddressLines(&q, is_ecml, address_field.get()) ||
103 ParseCity(&q, is_ecml, address_field.get()) ||
104 ParseState(&q, is_ecml, address_field.get()) ||
105 ParseZipCode(&q, is_ecml, address_field.get()) ||
106 ParseCountry(&q, is_ecml, address_field.get())) {
107 continue;
108 } else if (ParseText(&q, attention_ignored) ||
109 ParseText(&q, region_ignored)) {
110 // We ignore the following:
111 // * Attention.
112 // * Province/Region/Other.
113 continue;
114 } else if (*q != **iter && ParseEmpty(&q)) {
115 // Ignore non-labeled fields within an address; the page
116 // MapQuest Driving Directions North America.html contains such a field.
117 // We only ignore such fields after we've parsed at least one other field;
118 // otherwise we'd effectively parse address fields before other field
119 // types after any non-labeled fields, and we want email address fields to
120 // have precedence since some pages contain fields labeled
121 // "Email address".
122 continue;
123 } else {
124 // No field found.
125 break;
126 }
127 }
128
129 // If we have identified any address fields in this field then it should be
130 // added to the list of fields.
131 if (address_field->company_ != NULL ||
132 address_field->address1_ != NULL || address_field->address2_ != NULL ||
133 address_field->city_ != NULL || address_field->state_ != NULL ||
134 address_field->zip_ != NULL || address_field->zip4_ ||
135 address_field->country_ != NULL) {
136 *iter = q;
137 return address_field.release();
138 }
139
140 return NULL;
141 }
142
FindType() const143 AddressType AddressField::FindType() const {
144 // This is not a full address, so don't even bother trying to figure
145 // out its type.
146 if (address1_ == NULL)
147 return kGenericAddress;
148
149 // First look at the field name, which itself will sometimes contain
150 // "bill" or "ship". We could check for the ECML type prefixes
151 // here, but there's no need to since ECML's prefixes Ecom_BillTo
152 // and Ecom_ShipTo contain "bill" and "ship" anyway.
153 string16 name = StringToLowerASCII(address1_->name);
154 return AddressTypeFromText(name);
155 }
156
IsFullAddress()157 bool AddressField::IsFullAddress() {
158 return address1_ != NULL;
159 }
160
AddressField()161 AddressField::AddressField()
162 : company_(NULL),
163 address1_(NULL),
164 address2_(NULL),
165 city_(NULL),
166 state_(NULL),
167 zip_(NULL),
168 zip4_(NULL),
169 country_(NULL),
170 type_(kGenericAddress),
171 is_ecml_(false) {
172 }
173
174 // static
ParseCompany(std::vector<AutofillField * >::const_iterator * iter,bool is_ecml,AddressField * address_field)175 bool AddressField::ParseCompany(
176 std::vector<AutofillField*>::const_iterator* iter,
177 bool is_ecml, AddressField* address_field) {
178 if (address_field->company_ && !address_field->company_->IsEmpty())
179 return false;
180
181 string16 pattern;
182 if (is_ecml)
183 pattern = GetEcmlPattern(kEcmlShipToCompanyName,
184 kEcmlBillToCompanyName, '|');
185 else
186 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE);
187
188 if (!ParseText(iter, pattern, &address_field->company_))
189 return false;
190
191 return true;
192 }
193
194 // static
ParseAddressLines(std::vector<AutofillField * >::const_iterator * iter,bool is_ecml,AddressField * address_field)195 bool AddressField::ParseAddressLines(
196 std::vector<AutofillField*>::const_iterator* iter,
197 bool is_ecml, AddressField* address_field) {
198 // We only match the string "address" in page text, not in element names,
199 // because sometimes every element in a group of address fields will have
200 // a name containing the string "address"; for example, on the page
201 // Kohl's - Register Billing Address.html the text element labeled "city"
202 // has the name "BILL_TO_ADDRESS<>city". We do match address labels
203 // such as "address1", which appear as element names on various pages (eg
204 // AmericanGirl-Registration.html, BloomingdalesBilling.html,
205 // EBay Registration Enter Information.html).
206 if (address_field->address1_)
207 return false;
208
209 string16 pattern;
210 if (is_ecml) {
211 pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|');
212 if (!ParseText(iter, pattern, &address_field->address1_))
213 return false;
214 } else {
215 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE);
216 string16 label_pattern =
217 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE);
218
219 if (!ParseText(iter, pattern, &address_field->address1_))
220 if (!ParseLabelText(iter, label_pattern, &address_field->address1_))
221 return false;
222 }
223
224 // Optionally parse more address lines, which may have empty labels.
225 // Some pages have 3 address lines (eg SharperImageModifyAccount.html)
226 // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)!
227 if (is_ecml) {
228 pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|');
229 if (!ParseEmptyText(iter, &address_field->address2_))
230 ParseText(iter, pattern, &address_field->address2_);
231 } else {
232 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE);
233 string16 label_pattern =
234 l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE);
235 if (!ParseEmptyText(iter, &address_field->address2_))
236 if (!ParseText(iter, pattern, &address_field->address2_))
237 ParseLabelText(iter, label_pattern, &address_field->address2_);
238 }
239
240 // Try for a third line, which we will promptly discard.
241 if (address_field->address2_ != NULL) {
242 if (is_ecml) {
243 pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|');
244 ParseText(iter, pattern);
245 } else {
246 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE);
247 if (!ParseEmptyText(iter, NULL))
248 ParseText(iter, pattern, NULL);
249 }
250 }
251
252 return true;
253 }
254
255 // static
ParseCountry(std::vector<AutofillField * >::const_iterator * iter,bool is_ecml,AddressField * address_field)256 bool AddressField::ParseCountry(
257 std::vector<AutofillField*>::const_iterator* iter,
258 bool is_ecml, AddressField* address_field) {
259 // Parse a country. The occasional page (e.g.
260 // Travelocity_New Member Information1.html) calls this a "location".
261 // Note: ECML standard uses 2 letter country code (ISO 3166)
262 if (address_field->country_ && !address_field->country_->IsEmpty())
263 return false;
264
265 string16 pattern;
266 if (is_ecml)
267 pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|');
268 else
269 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE);
270
271 if (!ParseText(iter, pattern, &address_field->country_))
272 return false;
273
274 return true;
275 }
276
277 // static
ParseZipCode(std::vector<AutofillField * >::const_iterator * iter,bool is_ecml,AddressField * address_field)278 bool AddressField::ParseZipCode(
279 std::vector<AutofillField*>::const_iterator* iter,
280 bool is_ecml, AddressField* address_field) {
281 // Parse a zip code. On some UK pages (e.g. The China Shop2.html) this
282 // is called a "post code".
283 //
284 // HACK: Just for the MapQuest driving directions page we match the
285 // exact name "1z", which MapQuest uses to label its zip code field.
286 // Hopefully before long we'll be smart enough to find the zip code
287 // on that page automatically.
288 if (address_field->zip_)
289 return false;
290
291 // We may be out of fields.
292 if (!**iter)
293 return false;
294
295 string16 pattern;
296 if (is_ecml) {
297 pattern = GetEcmlPattern(kEcmlShipToPostalCode, kEcmlBillToPostalCode, '|');
298 } else {
299 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_CODE_RE);
300 }
301
302 AddressType tempType;
303 string16 name = (**iter)->name;
304
305 // Note: comparisons using the ecml compliant name as a prefix must be used in
306 // order to accommodate Google Checkout. See FormFieldSet::GetEcmlPattern for
307 // more detail.
308 string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode));
309 if (StartsWith(name, bill_to_postal_code_field, false)) {
310 tempType = kBillingAddress;
311 } else if (StartsWith(name, bill_to_postal_code_field, false)) {
312 tempType = kShippingAddress;
313 } else {
314 tempType = kGenericAddress;
315 }
316
317 if (!ParseText(iter, pattern, &address_field->zip_))
318 return false;
319
320 address_field->type_ = tempType;
321 if (!is_ecml) {
322 // Look for a zip+4, whose field name will also often contain
323 // the substring "zip".
324 ParseText(iter,
325 l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE),
326 &address_field->zip4_);
327 }
328
329 return true;
330 }
331
332 // static
ParseCity(std::vector<AutofillField * >::const_iterator * iter,bool is_ecml,AddressField * address_field)333 bool AddressField::ParseCity(
334 std::vector<AutofillField*>::const_iterator* iter,
335 bool is_ecml, AddressField* address_field) {
336 // Parse a city name. Some UK pages (e.g. The China Shop2.html) use
337 // the term "town".
338 if (address_field->city_)
339 return false;
340
341 string16 pattern;
342 if (is_ecml)
343 pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|');
344 else
345 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE);
346
347 if (!ParseText(iter, pattern, &address_field->city_))
348 return false;
349
350 return true;
351 }
352
353 // static
ParseState(std::vector<AutofillField * >::const_iterator * iter,bool is_ecml,AddressField * address_field)354 bool AddressField::ParseState(
355 std::vector<AutofillField*>::const_iterator* iter,
356 bool is_ecml, AddressField* address_field) {
357 if (address_field->state_)
358 return false;
359
360 string16 pattern;
361 if (is_ecml)
362 pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|');
363 else
364 pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE);
365
366 if (!ParseText(iter, pattern, &address_field->state_))
367 return false;
368
369 return true;
370 }
371
AddressTypeFromText(const string16 & text)372 AddressType AddressField::AddressTypeFromText(const string16 &text) {
373 if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE))
374 != string16::npos ||
375 text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE))
376 != string16::npos)
377 // This text could be a checkbox label such as "same as my billing
378 // address" or "use my shipping address".
379 // ++ It would help if we generally skipped all text that appears
380 // after a check box.
381 return kGenericAddress;
382
383 // Not all pages say "billing address" and "shipping address" explicitly;
384 // for example, Craft Catalog1.html has "Bill-to Address" and
385 // "Ship-to Address".
386 size_t bill = text.rfind(
387 l10n_util::GetStringUTF16(IDS_AUTOFILL_BILLING_DESIGNATOR_RE));
388 size_t ship = text.rfind(
389 l10n_util::GetStringUTF16(IDS_AUTOFILL_SHIPPING_DESIGNATOR_RE));
390
391 if (bill == string16::npos && ship == string16::npos)
392 return kGenericAddress;
393
394 if (bill != string16::npos && ship == string16::npos)
395 return kBillingAddress;
396
397 if (bill == string16::npos && ship != string16::npos)
398 return kShippingAddress;
399
400 if (bill > ship)
401 return kBillingAddress;
402
403 return kShippingAddress;
404 }
405