1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/autofill/core/browser/phone_field.h"
6
7 #include "base/logging.h"
8 #include "base/memory/scoped_ptr.h"
9 #include "base/strings/string16.h"
10 #include "base/strings/string_util.h"
11 #include "base/strings/utf_string_conversions.h"
12 #include "components/autofill/core/browser/autofill_field.h"
13 #include "components/autofill/core/browser/autofill_regex_constants.h"
14 #include "components/autofill/core/browser/autofill_scanner.h"
15 #include "ui/base/l10n/l10n_util.h"
16
17 namespace autofill {
18 namespace {
19
20 // This string includes all area code separators, including NoText.
GetAreaRegex()21 base::string16 GetAreaRegex() {
22 base::string16 area_code = base::UTF8ToUTF16(autofill::kAreaCodeRe);
23 area_code.append(base::ASCIIToUTF16("|")); // Regexp separator.
24 area_code.append(base::UTF8ToUTF16(autofill::kAreaCodeNotextRe));
25 return area_code;
26 }
27
28 } // namespace
29
~PhoneField()30 PhoneField::~PhoneField() {}
31
32 // Phone field grammars - first matched grammar will be parsed. Grammars are
33 // separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are
34 // parsed separately unless they are necessary parts of the match.
35 // The following notation is used to describe the patterns:
36 // <cc> - country code field.
37 // <ac> - area code field.
38 // <phone> - phone or prefix.
39 // <suffix> - suffix.
40 // <ext> - extension.
41 // :N means field is limited to N characters, otherwise it is unlimited.
42 // (pattern <field>)? means pattern is optional and matched separately.
43 const PhoneField::Parser PhoneField::kPhoneFieldGrammars[] = {
44 // Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix>
45 // (Ext: <ext>)?)?
46 { REGEX_COUNTRY, FIELD_COUNTRY_CODE, 0 },
47 { REGEX_AREA, FIELD_AREA_CODE, 0 },
48 { REGEX_PHONE, FIELD_PHONE, 0 },
49 { REGEX_SEPARATOR, FIELD_NONE, 0 },
50 // \( <ac> \) <phone>:3 <suffix>:4 (Ext: <ext>)?
51 { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 3 },
52 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 },
53 { REGEX_PHONE, FIELD_SUFFIX, 4 },
54 { REGEX_SEPARATOR, FIELD_NONE, 0 },
55 // Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)?
56 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
57 { REGEX_PHONE, FIELD_AREA_CODE, 3 },
58 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 },
59 { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4 },
60 { REGEX_SEPARATOR, FIELD_NONE, 0 },
61 // Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)?
62 { REGEX_PHONE, FIELD_COUNTRY_CODE, 3 },
63 { REGEX_PHONE, FIELD_AREA_CODE, 3 },
64 { REGEX_PHONE, FIELD_PHONE, 3 },
65 { REGEX_PHONE, FIELD_SUFFIX, 4 },
66 { REGEX_SEPARATOR, FIELD_NONE, 0 },
67 // Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)?
68 { REGEX_AREA, FIELD_AREA_CODE, 0 },
69 { REGEX_PHONE, FIELD_PHONE, 0 },
70 { REGEX_SEPARATOR, FIELD_NONE, 0 },
71 // Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)?
72 { REGEX_PHONE, FIELD_AREA_CODE, 0 },
73 { REGEX_PHONE, FIELD_PHONE, 3 },
74 { REGEX_PHONE, FIELD_SUFFIX, 4 },
75 { REGEX_SEPARATOR, FIELD_NONE, 0 },
76 // Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
77 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
78 { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0 },
79 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 },
80 { REGEX_SEPARATOR, FIELD_NONE, 0 },
81 // Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
82 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
83 { REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0 },
84 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 },
85 { REGEX_SEPARATOR, FIELD_NONE, 0 },
86 // Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)?
87 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
88 { REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0 },
89 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0 },
90 { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 0 },
91 { REGEX_SEPARATOR, FIELD_NONE, 0 },
92 // Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)?
93 { REGEX_PHONE, FIELD_AREA_CODE, 0 },
94 { REGEX_PREFIX, FIELD_PHONE, 0 },
95 { REGEX_SUFFIX, FIELD_SUFFIX, 0 },
96 { REGEX_SEPARATOR, FIELD_NONE, 0 },
97 // Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)?
98 { REGEX_PHONE, FIELD_AREA_CODE, 0 },
99 { REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3 },
100 { REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4 },
101 { REGEX_SEPARATOR, FIELD_NONE, 0 },
102 // Phone: <cc> - <ac> - <phone> (Ext: <ext>)?
103 { REGEX_PHONE, FIELD_COUNTRY_CODE, 0 },
104 { REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0 },
105 { REGEX_SUFFIX_SEPARATOR, FIELD_PHONE, 0 },
106 { REGEX_SEPARATOR, FIELD_NONE, 0 },
107 // Phone: <ac> - <phone> (Ext: <ext>)?
108 { REGEX_AREA, FIELD_AREA_CODE, 0 },
109 { REGEX_PHONE, FIELD_PHONE, 0 },
110 { REGEX_SEPARATOR, FIELD_NONE, 0 },
111 // Phone: <cc>:3 - <phone>:10 (Ext: <ext>)?
112 { REGEX_PHONE, FIELD_COUNTRY_CODE, 3 },
113 { REGEX_PHONE, FIELD_PHONE, 10 },
114 { REGEX_SEPARATOR, FIELD_NONE, 0 },
115 // Phone: <phone> (Ext: <ext>)?
116 { REGEX_PHONE, FIELD_PHONE, 0 },
117 { REGEX_SEPARATOR, FIELD_NONE, 0 },
118 };
119
120 // static
Parse(AutofillScanner * scanner)121 FormField* PhoneField::Parse(AutofillScanner* scanner) {
122 if (scanner->IsEnd())
123 return NULL;
124
125 scanner->SaveCursor();
126
127 // The form owns the following variables, so they should not be deleted.
128 AutofillField* parsed_fields[FIELD_MAX];
129
130 for (size_t i = 0; i < arraysize(kPhoneFieldGrammars); ++i) {
131 memset(parsed_fields, 0, sizeof(parsed_fields));
132 scanner->SaveCursor();
133
134 // Attempt to parse according to the next grammar.
135 for (; i < arraysize(kPhoneFieldGrammars) &&
136 kPhoneFieldGrammars[i].regex != REGEX_SEPARATOR; ++i) {
137 if (!ParseFieldSpecifics(
138 scanner,
139 GetRegExp(kPhoneFieldGrammars[i].regex),
140 MATCH_DEFAULT | MATCH_TELEPHONE,
141 &parsed_fields[kPhoneFieldGrammars[i].phone_part]))
142 break;
143 if (kPhoneFieldGrammars[i].max_size &&
144 (!parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length ||
145 kPhoneFieldGrammars[i].max_size <
146 parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length)) {
147 break;
148 }
149 }
150
151 if (i >= arraysize(kPhoneFieldGrammars)) {
152 scanner->Rewind();
153 return NULL; // Parsing failed.
154 }
155 if (kPhoneFieldGrammars[i].regex == REGEX_SEPARATOR)
156 break; // Parsing succeeded.
157
158 // Proceed to the next grammar.
159 do {
160 ++i;
161 } while (i < arraysize(kPhoneFieldGrammars) &&
162 kPhoneFieldGrammars[i].regex != REGEX_SEPARATOR);
163
164 if (i + 1 == arraysize(kPhoneFieldGrammars)) {
165 scanner->Rewind();
166 return NULL; // Tried through all the possibilities - did not match.
167 }
168
169 scanner->Rewind();
170 }
171
172 if (!parsed_fields[FIELD_PHONE]) {
173 scanner->Rewind();
174 return NULL;
175 }
176
177 scoped_ptr<PhoneField> phone_field(new PhoneField);
178 for (int i = 0; i < FIELD_MAX; ++i)
179 phone_field->parsed_phone_fields_[i] = parsed_fields[i];
180
181 // Look for optional fields.
182
183 // Look for a third text box.
184 if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) {
185 if (!ParseField(scanner, base::UTF8ToUTF16(autofill::kPhoneSuffixRe),
186 &phone_field->parsed_phone_fields_[FIELD_SUFFIX])) {
187 ParseField(scanner, base::UTF8ToUTF16(autofill::kPhoneSuffixSeparatorRe),
188 &phone_field->parsed_phone_fields_[FIELD_SUFFIX]);
189 }
190 }
191
192 // Now look for an extension.
193 ParseField(scanner, base::UTF8ToUTF16(autofill::kPhoneExtensionRe),
194 &phone_field->parsed_phone_fields_[FIELD_EXTENSION]);
195
196 return phone_field.release();
197 }
198
ClassifyField(ServerFieldTypeMap * map) const199 bool PhoneField::ClassifyField(ServerFieldTypeMap* map) const {
200 bool ok = true;
201
202 DCHECK(parsed_phone_fields_[FIELD_PHONE]); // Phone was correctly parsed.
203
204 if ((parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) ||
205 (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) ||
206 (parsed_phone_fields_[FIELD_SUFFIX] != NULL)) {
207 if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) {
208 ok = ok && AddClassification(parsed_phone_fields_[FIELD_COUNTRY_CODE],
209 PHONE_HOME_COUNTRY_CODE,
210 map);
211 }
212
213 ServerFieldType field_number_type = PHONE_HOME_NUMBER;
214 if (parsed_phone_fields_[FIELD_AREA_CODE] != NULL) {
215 ok = ok && AddClassification(parsed_phone_fields_[FIELD_AREA_CODE],
216 PHONE_HOME_CITY_CODE,
217 map);
218 } else if (parsed_phone_fields_[FIELD_COUNTRY_CODE] != NULL) {
219 // Only if we can find country code without city code, it means the phone
220 // number include city code.
221 field_number_type = PHONE_HOME_CITY_AND_NUMBER;
222 }
223 // We tag the prefix as PHONE_HOME_NUMBER, then when filling the form
224 // we fill only the prefix depending on the size of the input field.
225 ok = ok && AddClassification(parsed_phone_fields_[FIELD_PHONE],
226 field_number_type,
227 map);
228 // We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
229 // we fill only the suffix depending on the size of the input field.
230 if (parsed_phone_fields_[FIELD_SUFFIX] != NULL) {
231 ok = ok && AddClassification(parsed_phone_fields_[FIELD_SUFFIX],
232 PHONE_HOME_NUMBER,
233 map);
234 }
235 } else {
236 ok = AddClassification(parsed_phone_fields_[FIELD_PHONE],
237 PHONE_HOME_WHOLE_NUMBER,
238 map);
239 }
240
241 return ok;
242 }
243
PhoneField()244 PhoneField::PhoneField() {
245 memset(parsed_phone_fields_, 0, sizeof(parsed_phone_fields_));
246 }
247
248 // static
GetRegExp(RegexType regex_id)249 base::string16 PhoneField::GetRegExp(RegexType regex_id) {
250 switch (regex_id) {
251 case REGEX_COUNTRY:
252 return base::UTF8ToUTF16(autofill::kCountryCodeRe);
253 case REGEX_AREA:
254 return GetAreaRegex();
255 case REGEX_AREA_NOTEXT:
256 return base::UTF8ToUTF16(autofill::kAreaCodeNotextRe);
257 case REGEX_PHONE:
258 return base::UTF8ToUTF16(autofill::kPhoneRe);
259 case REGEX_PREFIX_SEPARATOR:
260 return base::UTF8ToUTF16(autofill::kPhonePrefixSeparatorRe);
261 case REGEX_PREFIX:
262 return base::UTF8ToUTF16(autofill::kPhonePrefixRe);
263 case REGEX_SUFFIX_SEPARATOR:
264 return base::UTF8ToUTF16(autofill::kPhoneSuffixSeparatorRe);
265 case REGEX_SUFFIX:
266 return base::UTF8ToUTF16(autofill::kPhoneSuffixRe);
267 case REGEX_EXTENSION:
268 return base::UTF8ToUTF16(autofill::kPhoneExtensionRe);
269 default:
270 NOTREACHED();
271 break;
272 }
273 return base::string16();
274 }
275
276 } // namespace autofill
277