1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/omnibox/search_suggestion_parser.h"
6
7 #include "base/i18n/icu_string_conversions.h"
8 #include "base/json/json_string_value_serializer.h"
9 #include "base/json/json_writer.h"
10 #include "base/logging.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "base/values.h"
14 #include "components/omnibox/autocomplete_input.h"
15 #include "components/omnibox/url_prefix.h"
16 #include "components/url_fixer/url_fixer.h"
17 #include "net/base/net_util.h"
18 #include "net/http/http_response_headers.h"
19 #include "net/url_request/url_fetcher.h"
20 #include "url/url_constants.h"
21
22 namespace {
23
GetAutocompleteMatchType(const std::string & type)24 AutocompleteMatchType::Type GetAutocompleteMatchType(const std::string& type) {
25 if (type == "ENTITY")
26 return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY;
27 if (type == "INFINITE")
28 return AutocompleteMatchType::SEARCH_SUGGEST_INFINITE;
29 if (type == "PERSONALIZED_QUERY")
30 return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED;
31 if (type == "PROFILE")
32 return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE;
33 if (type == "NAVIGATION")
34 return AutocompleteMatchType::NAVSUGGEST;
35 if (type == "PERSONALIZED_NAVIGATION")
36 return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED;
37 return AutocompleteMatchType::SEARCH_SUGGEST;
38 }
39
40 } // namespace
41
42 // SearchSuggestionParser::Result ----------------------------------------------
43
Result(bool from_keyword_provider,int relevance,bool relevance_from_server,AutocompleteMatchType::Type type,const std::string & deletion_url)44 SearchSuggestionParser::Result::Result(bool from_keyword_provider,
45 int relevance,
46 bool relevance_from_server,
47 AutocompleteMatchType::Type type,
48 const std::string& deletion_url)
49 : from_keyword_provider_(from_keyword_provider),
50 type_(type),
51 relevance_(relevance),
52 relevance_from_server_(relevance_from_server),
53 received_after_last_keystroke_(true),
54 deletion_url_(deletion_url) {}
55
~Result()56 SearchSuggestionParser::Result::~Result() {}
57
58 // SearchSuggestionParser::SuggestResult ---------------------------------------
59
SuggestResult(const base::string16 & suggestion,AutocompleteMatchType::Type type,const base::string16 & match_contents,const base::string16 & match_contents_prefix,const base::string16 & annotation,const base::string16 & answer_contents,const base::string16 & answer_type,const std::string & suggest_query_params,const std::string & deletion_url,bool from_keyword_provider,int relevance,bool relevance_from_server,bool should_prefetch,const base::string16 & input_text)60 SearchSuggestionParser::SuggestResult::SuggestResult(
61 const base::string16& suggestion,
62 AutocompleteMatchType::Type type,
63 const base::string16& match_contents,
64 const base::string16& match_contents_prefix,
65 const base::string16& annotation,
66 const base::string16& answer_contents,
67 const base::string16& answer_type,
68 const std::string& suggest_query_params,
69 const std::string& deletion_url,
70 bool from_keyword_provider,
71 int relevance,
72 bool relevance_from_server,
73 bool should_prefetch,
74 const base::string16& input_text)
75 : Result(from_keyword_provider,
76 relevance,
77 relevance_from_server,
78 type,
79 deletion_url),
80 suggestion_(suggestion),
81 match_contents_prefix_(match_contents_prefix),
82 annotation_(annotation),
83 suggest_query_params_(suggest_query_params),
84 answer_contents_(answer_contents),
85 answer_type_(answer_type),
86 should_prefetch_(should_prefetch) {
87 match_contents_ = match_contents;
88 DCHECK(!match_contents_.empty());
89 ClassifyMatchContents(true, input_text);
90 }
91
~SuggestResult()92 SearchSuggestionParser::SuggestResult::~SuggestResult() {}
93
ClassifyMatchContents(const bool allow_bolding_all,const base::string16 & input_text)94 void SearchSuggestionParser::SuggestResult::ClassifyMatchContents(
95 const bool allow_bolding_all,
96 const base::string16& input_text) {
97 if (input_text.empty()) {
98 // In case of zero-suggest results, do not highlight matches.
99 match_contents_class_.push_back(
100 ACMatchClassification(0, ACMatchClassification::NONE));
101 return;
102 }
103
104 base::string16 lookup_text = input_text;
105 if (type_ == AutocompleteMatchType::SEARCH_SUGGEST_INFINITE) {
106 const size_t contents_index =
107 suggestion_.length() - match_contents_.length();
108 // Ensure the query starts with the input text, and ends with the match
109 // contents, and the input text has an overlap with contents.
110 if (StartsWith(suggestion_, input_text, true) &&
111 EndsWith(suggestion_, match_contents_, true) &&
112 (input_text.length() > contents_index)) {
113 lookup_text = input_text.substr(contents_index);
114 }
115 }
116 size_t lookup_position = match_contents_.find(lookup_text);
117 if (!allow_bolding_all && (lookup_position == base::string16::npos)) {
118 // Bail if the code below to update the bolding would bold the whole
119 // string. Note that the string may already be entirely bolded; if
120 // so, leave it as is.
121 return;
122 }
123 match_contents_class_.clear();
124 // We do intra-string highlighting for suggestions - the suggested segment
125 // will be highlighted, e.g. for input_text = "you" the suggestion may be
126 // "youtube", so we'll bold the "tube" section: you*tube*.
127 if (input_text != match_contents_) {
128 if (lookup_position == base::string16::npos) {
129 // The input text is not a substring of the query string, e.g. input
130 // text is "slasdot" and the query string is "slashdot", so we bold the
131 // whole thing.
132 match_contents_class_.push_back(
133 ACMatchClassification(0, ACMatchClassification::MATCH));
134 } else {
135 // We don't iterate over the string here annotating all matches because
136 // it looks odd to have every occurrence of a substring that may be as
137 // short as a single character highlighted in a query suggestion result,
138 // e.g. for input text "s" and query string "southwest airlines", it
139 // looks odd if both the first and last s are highlighted.
140 if (lookup_position != 0) {
141 match_contents_class_.push_back(
142 ACMatchClassification(0, ACMatchClassification::MATCH));
143 }
144 match_contents_class_.push_back(
145 ACMatchClassification(lookup_position, ACMatchClassification::NONE));
146 size_t next_fragment_position = lookup_position + lookup_text.length();
147 if (next_fragment_position < match_contents_.length()) {
148 match_contents_class_.push_back(ACMatchClassification(
149 next_fragment_position, ACMatchClassification::MATCH));
150 }
151 }
152 } else {
153 // Otherwise, match_contents_ is a verbatim (what-you-typed) match, either
154 // for the default provider or a keyword search provider.
155 match_contents_class_.push_back(
156 ACMatchClassification(0, ACMatchClassification::NONE));
157 }
158 }
159
CalculateRelevance(const AutocompleteInput & input,bool keyword_provider_requested) const160 int SearchSuggestionParser::SuggestResult::CalculateRelevance(
161 const AutocompleteInput& input,
162 bool keyword_provider_requested) const {
163 if (!from_keyword_provider_ && keyword_provider_requested)
164 return 100;
165 return ((input.type() == metrics::OmniboxInputType::URL) ? 300 : 600);
166 }
167
168 // SearchSuggestionParser::NavigationResult ------------------------------------
169
NavigationResult(const AutocompleteSchemeClassifier & scheme_classifier,const GURL & url,AutocompleteMatchType::Type type,const base::string16 & description,const std::string & deletion_url,bool from_keyword_provider,int relevance,bool relevance_from_server,const base::string16 & input_text,const std::string & languages)170 SearchSuggestionParser::NavigationResult::NavigationResult(
171 const AutocompleteSchemeClassifier& scheme_classifier,
172 const GURL& url,
173 AutocompleteMatchType::Type type,
174 const base::string16& description,
175 const std::string& deletion_url,
176 bool from_keyword_provider,
177 int relevance,
178 bool relevance_from_server,
179 const base::string16& input_text,
180 const std::string& languages)
181 : Result(from_keyword_provider, relevance, relevance_from_server, type,
182 deletion_url),
183 url_(url),
184 formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning(
185 url, net::FormatUrl(url, languages,
186 net::kFormatUrlOmitAll & ~net::kFormatUrlOmitHTTP,
187 net::UnescapeRule::SPACES, NULL, NULL, NULL),
188 scheme_classifier)),
189 description_(description) {
190 DCHECK(url_.is_valid());
191 CalculateAndClassifyMatchContents(true, input_text, languages);
192 }
193
~NavigationResult()194 SearchSuggestionParser::NavigationResult::~NavigationResult() {}
195
196 void
CalculateAndClassifyMatchContents(const bool allow_bolding_nothing,const base::string16 & input_text,const std::string & languages)197 SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents(
198 const bool allow_bolding_nothing,
199 const base::string16& input_text,
200 const std::string& languages) {
201 if (input_text.empty()) {
202 // In case of zero-suggest results, do not highlight matches.
203 match_contents_class_.push_back(
204 ACMatchClassification(0, ACMatchClassification::NONE));
205 return;
206 }
207
208 // First look for the user's input inside the formatted url as it would be
209 // without trimming the scheme, so we can find matches at the beginning of the
210 // scheme.
211 const URLPrefix* prefix =
212 URLPrefix::BestURLPrefix(formatted_url_, input_text);
213 size_t match_start = (prefix == NULL) ?
214 formatted_url_.find(input_text) : prefix->prefix.length();
215 bool trim_http = !AutocompleteInput::HasHTTPScheme(input_text) &&
216 (!prefix || (match_start != 0));
217 const net::FormatUrlTypes format_types =
218 net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP);
219
220 base::string16 match_contents = net::FormatUrl(url_, languages, format_types,
221 net::UnescapeRule::SPACES, NULL, NULL, &match_start);
222 // If the first match in the untrimmed string was inside a scheme that we
223 // trimmed, look for a subsequent match.
224 if (match_start == base::string16::npos)
225 match_start = match_contents.find(input_text);
226 // Update |match_contents_| and |match_contents_class_| if it's allowed.
227 if (allow_bolding_nothing || (match_start != base::string16::npos)) {
228 match_contents_ = match_contents;
229 // Safe if |match_start| is npos; also safe if the input is longer than the
230 // remaining contents after |match_start|.
231 AutocompleteMatch::ClassifyLocationInString(match_start,
232 input_text.length(), match_contents_.length(),
233 ACMatchClassification::URL, &match_contents_class_);
234 }
235 }
236
CalculateRelevance(const AutocompleteInput & input,bool keyword_provider_requested) const237 int SearchSuggestionParser::NavigationResult::CalculateRelevance(
238 const AutocompleteInput& input,
239 bool keyword_provider_requested) const {
240 return (from_keyword_provider_ || !keyword_provider_requested) ? 800 : 150;
241 }
242
243 // SearchSuggestionParser::Results ---------------------------------------------
244
Results()245 SearchSuggestionParser::Results::Results()
246 : verbatim_relevance(-1),
247 field_trial_triggered(false),
248 relevances_from_server(false) {}
249
~Results()250 SearchSuggestionParser::Results::~Results() {}
251
Clear()252 void SearchSuggestionParser::Results::Clear() {
253 suggest_results.clear();
254 navigation_results.clear();
255 verbatim_relevance = -1;
256 metadata.clear();
257 }
258
HasServerProvidedScores() const259 bool SearchSuggestionParser::Results::HasServerProvidedScores() const {
260 if (verbatim_relevance >= 0)
261 return true;
262
263 // Right now either all results of one type will be server-scored or they will
264 // all be locally scored, but in case we change this later, we'll just check
265 // them all.
266 for (SuggestResults::const_iterator i(suggest_results.begin());
267 i != suggest_results.end(); ++i) {
268 if (i->relevance_from_server())
269 return true;
270 }
271 for (NavigationResults::const_iterator i(navigation_results.begin());
272 i != navigation_results.end(); ++i) {
273 if (i->relevance_from_server())
274 return true;
275 }
276
277 return false;
278 }
279
280 // SearchSuggestionParser ------------------------------------------------------
281
282 // static
ExtractJsonData(const net::URLFetcher * source)283 std::string SearchSuggestionParser::ExtractJsonData(
284 const net::URLFetcher* source) {
285 const net::HttpResponseHeaders* const response_headers =
286 source->GetResponseHeaders();
287 std::string json_data;
288 source->GetResponseAsString(&json_data);
289
290 // JSON is supposed to be UTF-8, but some suggest service providers send
291 // JSON files in non-UTF-8 encodings. The actual encoding is usually
292 // specified in the Content-Type header field.
293 if (response_headers) {
294 std::string charset;
295 if (response_headers->GetCharset(&charset)) {
296 base::string16 data_16;
297 // TODO(jungshik): Switch to CodePageToUTF8 after it's added.
298 if (base::CodepageToUTF16(json_data, charset.c_str(),
299 base::OnStringConversionError::FAIL,
300 &data_16))
301 json_data = base::UTF16ToUTF8(data_16);
302 }
303 }
304 return json_data;
305 }
306
307 // static
DeserializeJsonData(std::string json_data)308 scoped_ptr<base::Value> SearchSuggestionParser::DeserializeJsonData(
309 std::string json_data) {
310 // The JSON response should be an array.
311 for (size_t response_start_index = json_data.find("["), i = 0;
312 response_start_index != std::string::npos && i < 5;
313 response_start_index = json_data.find("[", 1), i++) {
314 // Remove any XSSI guards to allow for JSON parsing.
315 if (response_start_index > 0)
316 json_data.erase(0, response_start_index);
317
318 JSONStringValueSerializer deserializer(json_data);
319 deserializer.set_allow_trailing_comma(true);
320 int error_code = 0;
321 scoped_ptr<base::Value> data(deserializer.Deserialize(&error_code, NULL));
322 if (error_code == 0)
323 return data.Pass();
324 }
325 return scoped_ptr<base::Value>();
326 }
327
328 // static
ParseSuggestResults(const base::Value & root_val,const AutocompleteInput & input,const AutocompleteSchemeClassifier & scheme_classifier,int default_result_relevance,const std::string & languages,bool is_keyword_result,Results * results)329 bool SearchSuggestionParser::ParseSuggestResults(
330 const base::Value& root_val,
331 const AutocompleteInput& input,
332 const AutocompleteSchemeClassifier& scheme_classifier,
333 int default_result_relevance,
334 const std::string& languages,
335 bool is_keyword_result,
336 Results* results) {
337 base::string16 query;
338 const base::ListValue* root_list = NULL;
339 const base::ListValue* results_list = NULL;
340
341 if (!root_val.GetAsList(&root_list) || !root_list->GetString(0, &query) ||
342 query != input.text() || !root_list->GetList(1, &results_list))
343 return false;
344
345 // 3rd element: Description list.
346 const base::ListValue* descriptions = NULL;
347 root_list->GetList(2, &descriptions);
348
349 // 4th element: Disregard the query URL list for now.
350
351 // Reset suggested relevance information.
352 results->verbatim_relevance = -1;
353
354 // 5th element: Optional key-value pairs from the Suggest server.
355 const base::ListValue* types = NULL;
356 const base::ListValue* relevances = NULL;
357 const base::ListValue* suggestion_details = NULL;
358 const base::DictionaryValue* extras = NULL;
359 int prefetch_index = -1;
360 if (root_list->GetDictionary(4, &extras)) {
361 extras->GetList("google:suggesttype", &types);
362
363 // Discard this list if its size does not match that of the suggestions.
364 if (extras->GetList("google:suggestrelevance", &relevances) &&
365 (relevances->GetSize() != results_list->GetSize()))
366 relevances = NULL;
367 extras->GetInteger("google:verbatimrelevance",
368 &results->verbatim_relevance);
369
370 // Check if the active suggest field trial (if any) has triggered either
371 // for the default provider or keyword provider.
372 results->field_trial_triggered = false;
373 extras->GetBoolean("google:fieldtrialtriggered",
374 &results->field_trial_triggered);
375
376 const base::DictionaryValue* client_data = NULL;
377 if (extras->GetDictionary("google:clientdata", &client_data) && client_data)
378 client_data->GetInteger("phi", &prefetch_index);
379
380 if (extras->GetList("google:suggestdetail", &suggestion_details) &&
381 suggestion_details->GetSize() != results_list->GetSize())
382 suggestion_details = NULL;
383
384 // Store the metadata that came with the response in case we need to pass it
385 // along with the prefetch query to Instant.
386 JSONStringValueSerializer json_serializer(&results->metadata);
387 json_serializer.Serialize(*extras);
388 }
389
390 // Clear the previous results now that new results are available.
391 results->suggest_results.clear();
392 results->navigation_results.clear();
393 results->answers_image_urls.clear();
394
395 base::string16 suggestion;
396 std::string type;
397 int relevance = default_result_relevance;
398 // Prohibit navsuggest in FORCED_QUERY mode. Users wants queries, not URLs.
399 const bool allow_navsuggest =
400 input.type() != metrics::OmniboxInputType::FORCED_QUERY;
401 const base::string16& trimmed_input =
402 base::CollapseWhitespace(input.text(), false);
403 for (size_t index = 0; results_list->GetString(index, &suggestion); ++index) {
404 // Google search may return empty suggestions for weird input characters,
405 // they make no sense at all and can cause problems in our code.
406 if (suggestion.empty())
407 continue;
408
409 // Apply valid suggested relevance scores; discard invalid lists.
410 if (relevances != NULL && !relevances->GetInteger(index, &relevance))
411 relevances = NULL;
412 AutocompleteMatchType::Type match_type =
413 AutocompleteMatchType::SEARCH_SUGGEST;
414 if (types && types->GetString(index, &type))
415 match_type = GetAutocompleteMatchType(type);
416 const base::DictionaryValue* suggestion_detail = NULL;
417 std::string deletion_url;
418
419 if (suggestion_details &&
420 suggestion_details->GetDictionary(index, &suggestion_detail))
421 suggestion_detail->GetString("du", &deletion_url);
422
423 if ((match_type == AutocompleteMatchType::NAVSUGGEST) ||
424 (match_type == AutocompleteMatchType::NAVSUGGEST_PERSONALIZED)) {
425 // Do not blindly trust the URL coming from the server to be valid.
426 GURL url(
427 url_fixer::FixupURL(base::UTF16ToUTF8(suggestion), std::string()));
428 if (url.is_valid() && allow_navsuggest) {
429 base::string16 title;
430 if (descriptions != NULL)
431 descriptions->GetString(index, &title);
432 results->navigation_results.push_back(NavigationResult(
433 scheme_classifier, url, match_type, title, deletion_url,
434 is_keyword_result, relevance, relevances != NULL, input.text(),
435 languages));
436 }
437 } else {
438 base::string16 match_contents = suggestion;
439 base::string16 match_contents_prefix;
440 base::string16 annotation;
441 base::string16 answer_contents;
442 base::string16 answer_type;
443 std::string suggest_query_params;
444
445 if (suggestion_details) {
446 suggestion_details->GetDictionary(index, &suggestion_detail);
447 if (suggestion_detail) {
448 suggestion_detail->GetString("t", &match_contents);
449 suggestion_detail->GetString("mp", &match_contents_prefix);
450 // Error correction for bad data from server.
451 if (match_contents.empty())
452 match_contents = suggestion;
453 suggestion_detail->GetString("a", &annotation);
454 suggestion_detail->GetString("q", &suggest_query_params);
455
456 // Extract Answers, if provided.
457 const base::DictionaryValue* answer_json = NULL;
458 if (suggestion_detail->GetDictionary("ansa", &answer_json)) {
459 match_type = AutocompleteMatchType::SEARCH_SUGGEST_ANSWER;
460 GetAnswersImageURLs(answer_json, &results->answers_image_urls);
461 std::string contents;
462 base::JSONWriter::Write(answer_json, &contents);
463 answer_contents = base::UTF8ToUTF16(contents);
464 suggestion_detail->GetString("ansb", &answer_type);
465 }
466 }
467 }
468
469 bool should_prefetch = static_cast<int>(index) == prefetch_index;
470 // TODO(kochi): Improve calculator suggestion presentation.
471 results->suggest_results.push_back(SuggestResult(
472 base::CollapseWhitespace(suggestion, false), match_type,
473 base::CollapseWhitespace(match_contents, false),
474 match_contents_prefix, annotation, answer_contents, answer_type,
475 suggest_query_params, deletion_url, is_keyword_result, relevance,
476 relevances != NULL, should_prefetch, trimmed_input));
477 }
478 }
479 results->relevances_from_server = relevances != NULL;
480 return true;
481 }
482
483 // static
GetAnswersImageURLs(const base::DictionaryValue * answer_json,std::vector<GURL> * urls)484 void SearchSuggestionParser::GetAnswersImageURLs(
485 const base::DictionaryValue* answer_json,
486 std::vector<GURL>* urls) {
487 DCHECK(answer_json);
488
489 const base::ListValue* lines = NULL;
490 if (!answer_json->GetList("l", &lines) || !lines || lines->GetSize() == 0)
491 return;
492
493 for (base::ListValue::const_iterator iter = lines->begin();
494 iter != lines->end();
495 ++iter) {
496 const base::DictionaryValue* line = NULL;
497 if (!(*iter)->GetAsDictionary(&line) || !line)
498 continue;
499
500 std::string image_host_and_path;
501 if (!line->GetString("il.i.d", &image_host_and_path) ||
502 image_host_and_path.empty())
503 continue;
504 // Concatenate scheme and host/path using only ':' as separator. This is
505 // due to the results delivering strings of the form '//host/path', which
506 // is web-speak for "use the enclosing page's scheme", but not a valid path
507 // of an URL.
508 GURL image_url(
509 GURL(std::string(url::kHttpsScheme) + ":" + image_host_and_path));
510 if (image_url.is_valid())
511 urls->push_back(image_url);
512 }
513 }
514