• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/autofill/address_field.h"
6 
7 #include <stddef.h>
8 
9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h"
11 #include "base/string16.h"
12 #include "base/string_util.h"
13 #include "base/utf_string_conversions.h"
14 #include "chrome/browser/autofill/autofill_field.h"
15 #include "grit/autofill_resources.h"
16 #include "ui/base/l10n/l10n_util.h"
17 
GetFieldInfo(FieldTypeMap * field_type_map) const18 bool AddressField::GetFieldInfo(FieldTypeMap* field_type_map) const {
19   AutofillFieldType address_company;
20   AutofillFieldType address_line1;
21   AutofillFieldType address_line2;
22   AutofillFieldType address_city;
23   AutofillFieldType address_state;
24   AutofillFieldType address_zip;
25   AutofillFieldType address_country;
26 
27   switch (type_) {
28     case kShippingAddress:
29      // Fall through. Autofill does not support shipping addresses.
30     case kGenericAddress:
31       address_company = COMPANY_NAME;
32       address_line1 = ADDRESS_HOME_LINE1;
33       address_line2 = ADDRESS_HOME_LINE2;
34       address_city = ADDRESS_HOME_CITY;
35       address_state = ADDRESS_HOME_STATE;
36       address_zip = ADDRESS_HOME_ZIP;
37       address_country = ADDRESS_HOME_COUNTRY;
38       break;
39 
40     case kBillingAddress:
41       address_company = COMPANY_NAME;
42       address_line1 = ADDRESS_BILLING_LINE1;
43       address_line2 = ADDRESS_BILLING_LINE2;
44       address_city = ADDRESS_BILLING_CITY;
45       address_state = ADDRESS_BILLING_STATE;
46       address_zip = ADDRESS_BILLING_ZIP;
47       address_country = ADDRESS_BILLING_COUNTRY;
48       break;
49 
50     default:
51       NOTREACHED();
52       return false;
53   }
54 
55   bool ok;
56   ok = Add(field_type_map, company_, AutofillType(address_company));
57   DCHECK(ok);
58   ok = ok && Add(field_type_map, address1_, AutofillType(address_line1));
59   DCHECK(ok);
60   ok = ok && Add(field_type_map, address2_, AutofillType(address_line2));
61   DCHECK(ok);
62   ok = ok && Add(field_type_map, city_, AutofillType(address_city));
63   DCHECK(ok);
64   ok = ok && Add(field_type_map, state_, AutofillType(address_state));
65   DCHECK(ok);
66   ok = ok && Add(field_type_map, zip_, AutofillType(address_zip));
67   DCHECK(ok);
68   ok = ok && Add(field_type_map, country_, AutofillType(address_country));
69   DCHECK(ok);
70 
71   return ok;
72 }
73 
GetFormFieldType() const74 FormFieldType AddressField::GetFormFieldType() const {
75   return kAddressType;
76 }
77 
Parse(std::vector<AutofillField * >::const_iterator * iter,bool is_ecml)78 AddressField* AddressField::Parse(
79     std::vector<AutofillField*>::const_iterator* iter,
80     bool is_ecml) {
81   DCHECK(iter);
82   if (!iter)
83     return NULL;
84 
85   scoped_ptr<AddressField> address_field(new AddressField);
86   std::vector<AutofillField*>::const_iterator q = *iter;
87   string16 pattern;
88 
89   // The ECML standard uses 2 letter country codes.  So we will
90   // have to remember that this is an ECML form, for when we fill
91   // it out.
92   address_field->is_ecml_ = is_ecml;
93 
94   string16 attention_ignored =
95       l10n_util::GetStringUTF16(IDS_AUTOFILL_ATTENTION_IGNORED_RE);
96   string16 region_ignored =
97       l10n_util::GetStringUTF16(IDS_AUTOFILL_REGION_IGNORED_RE);
98 
99   // Allow address fields to appear in any order.
100   while (true) {
101     if (ParseCompany(&q, is_ecml, address_field.get()) ||
102         ParseAddressLines(&q, is_ecml, address_field.get()) ||
103         ParseCity(&q, is_ecml, address_field.get()) ||
104         ParseState(&q, is_ecml, address_field.get()) ||
105         ParseZipCode(&q, is_ecml, address_field.get()) ||
106         ParseCountry(&q, is_ecml, address_field.get())) {
107       continue;
108     } else if (ParseText(&q, attention_ignored) ||
109                ParseText(&q, region_ignored)) {
110       // We ignore the following:
111       // * Attention.
112       // * Province/Region/Other.
113       continue;
114     } else if (*q != **iter && ParseEmpty(&q)) {
115       // Ignore non-labeled fields within an address; the page
116       // MapQuest Driving Directions North America.html contains such a field.
117       // We only ignore such fields after we've parsed at least one other field;
118       // otherwise we'd effectively parse address fields before other field
119       // types after any non-labeled fields, and we want email address fields to
120       // have precedence since some pages contain fields labeled
121       // "Email address".
122       continue;
123     } else {
124       // No field found.
125       break;
126     }
127   }
128 
129   // If we have identified any address fields in this field then it should be
130   // added to the list of fields.
131   if (address_field->company_ != NULL ||
132       address_field->address1_ != NULL || address_field->address2_ != NULL ||
133       address_field->city_ != NULL || address_field->state_ != NULL ||
134       address_field->zip_ != NULL || address_field->zip4_ ||
135       address_field->country_ != NULL) {
136     *iter = q;
137     return address_field.release();
138   }
139 
140   return NULL;
141 }
142 
FindType() const143 AddressType AddressField::FindType() const {
144   // This is not a full address, so don't even bother trying to figure
145   // out its type.
146   if (address1_ == NULL)
147     return kGenericAddress;
148 
149   // First look at the field name, which itself will sometimes contain
150   // "bill" or "ship".  We could check for the ECML type prefixes
151   // here, but there's no need to since ECML's prefixes Ecom_BillTo
152   // and Ecom_ShipTo contain "bill" and "ship" anyway.
153   string16 name = StringToLowerASCII(address1_->name);
154   return AddressTypeFromText(name);
155 }
156 
IsFullAddress()157 bool AddressField::IsFullAddress() {
158   return address1_ != NULL;
159 }
160 
AddressField()161 AddressField::AddressField()
162     : company_(NULL),
163       address1_(NULL),
164       address2_(NULL),
165       city_(NULL),
166       state_(NULL),
167       zip_(NULL),
168       zip4_(NULL),
169       country_(NULL),
170       type_(kGenericAddress),
171       is_ecml_(false) {
172 }
173 
174 // static
ParseCompany(std::vector<AutofillField * >::const_iterator * iter,bool is_ecml,AddressField * address_field)175 bool AddressField::ParseCompany(
176     std::vector<AutofillField*>::const_iterator* iter,
177     bool is_ecml, AddressField* address_field) {
178   if (address_field->company_ && !address_field->company_->IsEmpty())
179     return false;
180 
181   string16 pattern;
182   if (is_ecml)
183     pattern = GetEcmlPattern(kEcmlShipToCompanyName,
184                              kEcmlBillToCompanyName, '|');
185   else
186     pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COMPANY_RE);
187 
188   if (!ParseText(iter, pattern, &address_field->company_))
189     return false;
190 
191   return true;
192 }
193 
194 // static
ParseAddressLines(std::vector<AutofillField * >::const_iterator * iter,bool is_ecml,AddressField * address_field)195 bool AddressField::ParseAddressLines(
196     std::vector<AutofillField*>::const_iterator* iter,
197     bool is_ecml, AddressField* address_field) {
198   // We only match the string "address" in page text, not in element names,
199   // because sometimes every element in a group of address fields will have
200   // a name containing the string "address"; for example, on the page
201   // Kohl's - Register Billing Address.html the text element labeled "city"
202   // has the name "BILL_TO_ADDRESS<>city".  We do match address labels
203   // such as "address1", which appear as element names on various pages (eg
204   // AmericanGirl-Registration.html, BloomingdalesBilling.html,
205   // EBay Registration Enter Information.html).
206   if (address_field->address1_)
207     return false;
208 
209   string16 pattern;
210   if (is_ecml) {
211     pattern = GetEcmlPattern(kEcmlShipToAddress1, kEcmlBillToAddress1, '|');
212     if (!ParseText(iter, pattern, &address_field->address1_))
213       return false;
214   } else {
215     pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_RE);
216     string16 label_pattern =
217         l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE);
218 
219     if (!ParseText(iter, pattern, &address_field->address1_))
220       if (!ParseLabelText(iter, label_pattern, &address_field->address1_))
221         return false;
222   }
223 
224   // Optionally parse more address lines, which may have empty labels.
225   // Some pages have 3 address lines (eg SharperImageModifyAccount.html)
226   // Some pages even have 4 address lines (e.g. uk/ShoesDirect2.html)!
227   if (is_ecml) {
228     pattern = GetEcmlPattern(kEcmlShipToAddress2, kEcmlBillToAddress2, '|');
229     if (!ParseEmptyText(iter, &address_field->address2_))
230       ParseText(iter, pattern, &address_field->address2_);
231   } else {
232     pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_2_RE);
233     string16 label_pattern =
234         l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE);
235     if (!ParseEmptyText(iter, &address_field->address2_))
236       if (!ParseText(iter, pattern, &address_field->address2_))
237         ParseLabelText(iter, label_pattern, &address_field->address2_);
238   }
239 
240   // Try for a third line, which we will promptly discard.
241   if (address_field->address2_ != NULL) {
242     if (is_ecml) {
243       pattern = GetEcmlPattern(kEcmlShipToAddress3, kEcmlBillToAddress3, '|');
244       ParseText(iter, pattern);
245     } else {
246       pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_3_RE);
247       if (!ParseEmptyText(iter, NULL))
248         ParseText(iter, pattern, NULL);
249     }
250   }
251 
252   return true;
253 }
254 
255 // static
ParseCountry(std::vector<AutofillField * >::const_iterator * iter,bool is_ecml,AddressField * address_field)256 bool AddressField::ParseCountry(
257     std::vector<AutofillField*>::const_iterator* iter,
258     bool is_ecml, AddressField* address_field) {
259   // Parse a country.  The occasional page (e.g.
260   // Travelocity_New Member Information1.html) calls this a "location".
261   // Note: ECML standard uses 2 letter country code (ISO 3166)
262   if (address_field->country_ && !address_field->country_->IsEmpty())
263     return false;
264 
265   string16 pattern;
266   if (is_ecml)
267     pattern = GetEcmlPattern(kEcmlShipToCountry, kEcmlBillToCountry, '|');
268   else
269     pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE);
270 
271   if (!ParseText(iter, pattern, &address_field->country_))
272     return false;
273 
274   return true;
275 }
276 
277 // static
ParseZipCode(std::vector<AutofillField * >::const_iterator * iter,bool is_ecml,AddressField * address_field)278 bool AddressField::ParseZipCode(
279     std::vector<AutofillField*>::const_iterator* iter,
280     bool is_ecml, AddressField* address_field) {
281   // Parse a zip code.  On some UK pages (e.g. The China Shop2.html) this
282   // is called a "post code".
283   //
284   // HACK: Just for the MapQuest driving directions page we match the
285   // exact name "1z", which MapQuest uses to label its zip code field.
286   // Hopefully before long we'll be smart enough to find the zip code
287   // on that page automatically.
288   if (address_field->zip_)
289     return false;
290 
291   // We may be out of fields.
292   if (!**iter)
293     return false;
294 
295   string16 pattern;
296   if (is_ecml) {
297     pattern = GetEcmlPattern(kEcmlShipToPostalCode, kEcmlBillToPostalCode, '|');
298   } else {
299     pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_CODE_RE);
300   }
301 
302   AddressType tempType;
303   string16 name = (**iter)->name;
304 
305   // Note: comparisons using the ecml compliant name as a prefix must be used in
306   // order to accommodate Google Checkout. See FormFieldSet::GetEcmlPattern for
307   // more detail.
308   string16 bill_to_postal_code_field(ASCIIToUTF16(kEcmlBillToPostalCode));
309   if (StartsWith(name, bill_to_postal_code_field, false)) {
310     tempType = kBillingAddress;
311   } else if (StartsWith(name, bill_to_postal_code_field, false)) {
312     tempType = kShippingAddress;
313   } else {
314     tempType = kGenericAddress;
315   }
316 
317   if (!ParseText(iter, pattern, &address_field->zip_))
318     return false;
319 
320   address_field->type_ = tempType;
321   if (!is_ecml) {
322     // Look for a zip+4, whose field name will also often contain
323     // the substring "zip".
324     ParseText(iter,
325               l10n_util::GetStringUTF16(IDS_AUTOFILL_ZIP_4_RE),
326               &address_field->zip4_);
327   }
328 
329   return true;
330 }
331 
332 // static
ParseCity(std::vector<AutofillField * >::const_iterator * iter,bool is_ecml,AddressField * address_field)333 bool AddressField::ParseCity(
334     std::vector<AutofillField*>::const_iterator* iter,
335     bool is_ecml, AddressField* address_field) {
336   // Parse a city name.  Some UK pages (e.g. The China Shop2.html) use
337   // the term "town".
338   if (address_field->city_)
339     return false;
340 
341   string16 pattern;
342   if (is_ecml)
343     pattern = GetEcmlPattern(kEcmlShipToCity, kEcmlBillToCity, '|');
344   else
345     pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_CITY_RE);
346 
347   if (!ParseText(iter, pattern, &address_field->city_))
348     return false;
349 
350   return true;
351 }
352 
353 // static
ParseState(std::vector<AutofillField * >::const_iterator * iter,bool is_ecml,AddressField * address_field)354 bool AddressField::ParseState(
355     std::vector<AutofillField*>::const_iterator* iter,
356     bool is_ecml, AddressField* address_field) {
357   if (address_field->state_)
358     return false;
359 
360   string16 pattern;
361   if (is_ecml)
362     pattern = GetEcmlPattern(kEcmlShipToStateProv, kEcmlBillToStateProv, '|');
363   else
364     pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE);
365 
366   if (!ParseText(iter, pattern, &address_field->state_))
367     return false;
368 
369   return true;
370 }
371 
AddressTypeFromText(const string16 & text)372 AddressType AddressField::AddressTypeFromText(const string16 &text) {
373   if (text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_SAME_AS_RE))
374           != string16::npos ||
375       text.find(l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_TYPE_USE_MY_RE))
376           != string16::npos)
377     // This text could be a checkbox label such as "same as my billing
378     // address" or "use my shipping address".
379     // ++ It would help if we generally skipped all text that appears
380     // after a check box.
381     return kGenericAddress;
382 
383   // Not all pages say "billing address" and "shipping address" explicitly;
384   // for example, Craft Catalog1.html has "Bill-to Address" and
385   // "Ship-to Address".
386   size_t bill = text.rfind(
387       l10n_util::GetStringUTF16(IDS_AUTOFILL_BILLING_DESIGNATOR_RE));
388   size_t ship = text.rfind(
389       l10n_util::GetStringUTF16(IDS_AUTOFILL_SHIPPING_DESIGNATOR_RE));
390 
391   if (bill == string16::npos && ship == string16::npos)
392     return kGenericAddress;
393 
394   if (bill != string16::npos && ship == string16::npos)
395     return kBillingAddress;
396 
397   if (bill == string16::npos && ship != string16::npos)
398     return kShippingAddress;
399 
400   if (bill > ship)
401     return kBillingAddress;
402 
403   return kShippingAddress;
404 }
405