1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/omnibox/keyword_provider.h"
6
7 #include <algorithm>
8 #include <vector>
9
10 #include "base/strings/string16.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "components/metrics/proto/omnibox_input_type.pb.h"
14 #include "components/omnibox/autocomplete_match.h"
15 #include "components/omnibox/autocomplete_provider_listener.h"
16 #include "components/omnibox/keyword_extensions_delegate.h"
17 #include "components/search_engines/template_url.h"
18 #include "components/search_engines/template_url_service.h"
19 #include "grit/components_strings.h"
20 #include "net/base/escape.h"
21 #include "net/base/net_util.h"
22 #include "ui/base/l10n/l10n_util.h"
23
24 namespace {
25
26 // Helper functor for Start(), for sorting keyword matches by quality.
27 class CompareQuality {
28 public:
29 // A keyword is of higher quality when a greater fraction of it has been
30 // typed, that is, when it is shorter.
31 //
32 // TODO(pkasting): Most recent and most frequent keywords are probably
33 // better rankings than the fraction of the keyword typed. We should
34 // always put any exact matches first no matter what, since the code in
35 // Start() assumes this (and it makes sense).
operator ()(const TemplateURL * t_url1,const TemplateURL * t_url2) const36 bool operator()(const TemplateURL* t_url1, const TemplateURL* t_url2) const {
37 return t_url1->keyword().length() < t_url2->keyword().length();
38 }
39 };
40
41 // Helper for KeywordProvider::Start(), for ending keyword mode unless
42 // explicitly told otherwise.
43 class ScopedEndExtensionKeywordMode {
44 public:
45 explicit ScopedEndExtensionKeywordMode(KeywordExtensionsDelegate* delegate);
46 ~ScopedEndExtensionKeywordMode();
47
48 void StayInKeywordMode();
49
50 private:
51 KeywordExtensionsDelegate* delegate_;
52
53 DISALLOW_COPY_AND_ASSIGN(ScopedEndExtensionKeywordMode);
54 };
55
ScopedEndExtensionKeywordMode(KeywordExtensionsDelegate * delegate)56 ScopedEndExtensionKeywordMode::ScopedEndExtensionKeywordMode(
57 KeywordExtensionsDelegate* delegate)
58 : delegate_(delegate) {
59 }
60
~ScopedEndExtensionKeywordMode()61 ScopedEndExtensionKeywordMode::~ScopedEndExtensionKeywordMode() {
62 if (delegate_)
63 delegate_->MaybeEndExtensionKeywordMode();
64 }
65
StayInKeywordMode()66 void ScopedEndExtensionKeywordMode::StayInKeywordMode() {
67 delegate_ = NULL;
68 }
69
70 } // namespace
71
KeywordProvider(AutocompleteProviderListener * listener,TemplateURLService * model)72 KeywordProvider::KeywordProvider(
73 AutocompleteProviderListener* listener,
74 TemplateURLService* model)
75 : AutocompleteProvider(AutocompleteProvider::TYPE_KEYWORD),
76 listener_(listener),
77 model_(model) {
78 }
79
80 // static
SplitKeywordFromInput(const base::string16 & input,bool trim_leading_whitespace,base::string16 * remaining_input)81 base::string16 KeywordProvider::SplitKeywordFromInput(
82 const base::string16& input,
83 bool trim_leading_whitespace,
84 base::string16* remaining_input) {
85 // Find end of first token. The AutocompleteController has trimmed leading
86 // whitespace, so we need not skip over that.
87 const size_t first_white(input.find_first_of(base::kWhitespaceUTF16));
88 DCHECK_NE(0U, first_white);
89 if (first_white == base::string16::npos)
90 return input; // Only one token provided.
91
92 // Set |remaining_input| to everything after the first token.
93 DCHECK(remaining_input != NULL);
94 const size_t remaining_start = trim_leading_whitespace ?
95 input.find_first_not_of(base::kWhitespaceUTF16, first_white) :
96 first_white + 1;
97
98 if (remaining_start < input.length())
99 remaining_input->assign(input.begin() + remaining_start, input.end());
100
101 // Return first token as keyword.
102 return input.substr(0, first_white);
103 }
104
105 // static
SplitReplacementStringFromInput(const base::string16 & input,bool trim_leading_whitespace)106 base::string16 KeywordProvider::SplitReplacementStringFromInput(
107 const base::string16& input,
108 bool trim_leading_whitespace) {
109 // The input may contain leading whitespace, strip it.
110 base::string16 trimmed_input;
111 base::TrimWhitespace(input, base::TRIM_LEADING, &trimmed_input);
112
113 // And extract the replacement string.
114 base::string16 remaining_input;
115 SplitKeywordFromInput(trimmed_input, trim_leading_whitespace,
116 &remaining_input);
117 return remaining_input;
118 }
119
120 // static
GetSubstitutingTemplateURLForInput(TemplateURLService * model,AutocompleteInput * input)121 const TemplateURL* KeywordProvider::GetSubstitutingTemplateURLForInput(
122 TemplateURLService* model,
123 AutocompleteInput* input) {
124 if (!input->allow_exact_keyword_match())
125 return NULL;
126
127 base::string16 keyword, remaining_input;
128 if (!ExtractKeywordFromInput(*input, &keyword, &remaining_input))
129 return NULL;
130
131 DCHECK(model);
132 const TemplateURL* template_url = model->GetTemplateURLForKeyword(keyword);
133 if (template_url &&
134 template_url->SupportsReplacement(model->search_terms_data())) {
135 // Adjust cursor position iff it was set before, otherwise leave it as is.
136 size_t cursor_position = base::string16::npos;
137 // The adjustment assumes that the keyword was stripped from the beginning
138 // of the original input.
139 if (input->cursor_position() != base::string16::npos &&
140 !remaining_input.empty() &&
141 EndsWith(input->text(), remaining_input, true)) {
142 int offset = input->text().length() - input->cursor_position();
143 // The cursor should never be past the last character or before the
144 // first character.
145 DCHECK_GE(offset, 0);
146 DCHECK_LE(offset, static_cast<int>(input->text().length()));
147 if (offset <= 0) {
148 // Normalize the cursor to be exactly after the last character.
149 cursor_position = remaining_input.length();
150 } else {
151 // If somehow the cursor was before the remaining text, set it to 0,
152 // otherwise adjust it relative to the remaining text.
153 cursor_position = offset > static_cast<int>(remaining_input.length()) ?
154 0u : remaining_input.length() - offset;
155 }
156 }
157 input->UpdateText(remaining_input, cursor_position, input->parts());
158 return template_url;
159 }
160
161 return NULL;
162 }
163
GetKeywordForText(const base::string16 & text) const164 base::string16 KeywordProvider::GetKeywordForText(
165 const base::string16& text) const {
166 const base::string16 keyword(TemplateURLService::CleanUserInputKeyword(text));
167
168 if (keyword.empty())
169 return keyword;
170
171 TemplateURLService* url_service = GetTemplateURLService();
172 if (!url_service)
173 return base::string16();
174
175 // Don't provide a keyword if it doesn't support replacement.
176 const TemplateURL* const template_url =
177 url_service->GetTemplateURLForKeyword(keyword);
178 if (!template_url ||
179 !template_url->SupportsReplacement(url_service->search_terms_data()))
180 return base::string16();
181
182 // Don't provide a keyword for inactive/disabled extension keywords.
183 if ((template_url->GetType() == TemplateURL::OMNIBOX_API_EXTENSION) &&
184 extensions_delegate_ &&
185 !extensions_delegate_->IsEnabledExtension(template_url->GetExtensionId()))
186 return base::string16();
187
188 return keyword;
189 }
190
CreateVerbatimMatch(const base::string16 & text,const base::string16 & keyword,const AutocompleteInput & input)191 AutocompleteMatch KeywordProvider::CreateVerbatimMatch(
192 const base::string16& text,
193 const base::string16& keyword,
194 const AutocompleteInput& input) {
195 // A verbatim match is allowed to be the default match.
196 return CreateAutocompleteMatch(
197 GetTemplateURLService()->GetTemplateURLForKeyword(keyword), input,
198 keyword.length(), SplitReplacementStringFromInput(text, true), true, 0);
199 }
200
Start(const AutocompleteInput & input,bool minimal_changes)201 void KeywordProvider::Start(const AutocompleteInput& input,
202 bool minimal_changes) {
203 // This object ensures we end keyword mode if we exit the function without
204 // toggling keyword mode to on.
205 ScopedEndExtensionKeywordMode keyword_mode_toggle(extensions_delegate_.get());
206
207 matches_.clear();
208
209 if (!minimal_changes) {
210 done_ = true;
211
212 // Input has changed. Increment the input ID so that we can discard any
213 // stale extension suggestions that may be incoming.
214 if (extensions_delegate_)
215 extensions_delegate_->IncrementInputId();
216 }
217
218 // Split user input into a keyword and some query input.
219 //
220 // We want to suggest keywords even when users have started typing URLs, on
221 // the assumption that they might not realize they no longer need to go to a
222 // site to be able to search it. So we call CleanUserInputKeyword() to strip
223 // any initial scheme and/or "www.". NOTE: Any heuristics or UI used to
224 // automatically/manually create keywords will need to be in sync with
225 // whatever we do here!
226 //
227 // TODO(pkasting): http://crbug/347744 If someday we remember usage frequency
228 // for keywords, we might suggest keywords that haven't even been partially
229 // typed, if the user uses them enough and isn't obviously typing something
230 // else. In this case we'd consider all input here to be query input.
231 base::string16 keyword, remaining_input;
232 if (!ExtractKeywordFromInput(input, &keyword, &remaining_input))
233 return;
234
235 // Get the best matches for this keyword.
236 //
237 // NOTE: We could cache the previous keywords and reuse them here in the
238 // |minimal_changes| case, but since we'd still have to recalculate their
239 // relevances and we can just recreate the results synchronously anyway, we
240 // don't bother.
241 TemplateURLService::TemplateURLVector matches;
242 GetTemplateURLService()->FindMatchingKeywords(
243 keyword, !remaining_input.empty(), &matches);
244
245 for (TemplateURLService::TemplateURLVector::iterator i(matches.begin());
246 i != matches.end(); ) {
247 const TemplateURL* template_url = *i;
248
249 // Prune any extension keywords that are disallowed in incognito mode (if
250 // we're incognito), or disabled.
251 if (template_url->GetType() == TemplateURL::OMNIBOX_API_EXTENSION &&
252 extensions_delegate_ &&
253 !extensions_delegate_->IsEnabledExtension(
254 template_url->GetExtensionId())) {
255 i = matches.erase(i);
256 continue;
257 }
258
259 // Prune any substituting keywords if there is no substitution.
260 if (template_url->SupportsReplacement(
261 GetTemplateURLService()->search_terms_data()) &&
262 remaining_input.empty() &&
263 !input.allow_exact_keyword_match()) {
264 i = matches.erase(i);
265 continue;
266 }
267
268 ++i;
269 }
270 if (matches.empty())
271 return;
272 std::sort(matches.begin(), matches.end(), CompareQuality());
273
274 // Limit to one exact or three inexact matches, and mark them up for display
275 // in the autocomplete popup.
276 // Any exact match is going to be the highest quality match, and thus at the
277 // front of our vector.
278 if (matches.front()->keyword() == keyword) {
279 const TemplateURL* template_url = matches.front();
280 const bool is_extension_keyword =
281 template_url->GetType() == TemplateURL::OMNIBOX_API_EXTENSION;
282
283 // Only create an exact match if |remaining_input| is empty or if
284 // this is an extension keyword. If |remaining_input| is a
285 // non-empty non-extension keyword (i.e., a regular keyword that
286 // supports replacement and that has extra text following it),
287 // then SearchProvider creates the exact (a.k.a. verbatim) match.
288 if (!remaining_input.empty() && !is_extension_keyword)
289 return;
290
291 // TODO(pkasting): We should probably check that if the user explicitly
292 // typed a scheme, that scheme matches the one in |template_url|.
293
294 // When creating an exact match (either for the keyword itself, no
295 // remaining query or an extension keyword, possibly with remaining
296 // input), allow the match to be the default match.
297 matches_.push_back(CreateAutocompleteMatch(
298 template_url, input, keyword.length(), remaining_input, true, -1));
299
300 if (is_extension_keyword && extensions_delegate_) {
301 if (extensions_delegate_->Start(input, minimal_changes, template_url,
302 remaining_input))
303 keyword_mode_toggle.StayInKeywordMode();
304 }
305 } else {
306 if (matches.size() > kMaxMatches)
307 matches.erase(matches.begin() + kMaxMatches, matches.end());
308 for (TemplateURLService::TemplateURLVector::const_iterator i(
309 matches.begin()); i != matches.end(); ++i) {
310 matches_.push_back(CreateAutocompleteMatch(
311 *i, input, keyword.length(), remaining_input, false, -1));
312 }
313 }
314 }
315
Stop(bool clear_cached_results)316 void KeywordProvider::Stop(bool clear_cached_results) {
317 done_ = true;
318 if (extensions_delegate_)
319 extensions_delegate_->MaybeEndExtensionKeywordMode();
320 }
321
~KeywordProvider()322 KeywordProvider::~KeywordProvider() {}
323
324 // static
ExtractKeywordFromInput(const AutocompleteInput & input,base::string16 * keyword,base::string16 * remaining_input)325 bool KeywordProvider::ExtractKeywordFromInput(const AutocompleteInput& input,
326 base::string16* keyword,
327 base::string16* remaining_input) {
328 if ((input.type() == metrics::OmniboxInputType::INVALID) ||
329 (input.type() == metrics::OmniboxInputType::FORCED_QUERY))
330 return false;
331
332 *keyword = TemplateURLService::CleanUserInputKeyword(
333 SplitKeywordFromInput(input.text(), true, remaining_input));
334 return !keyword->empty();
335 }
336
337 // static
CalculateRelevance(metrics::OmniboxInputType::Type type,bool complete,bool supports_replacement,bool prefer_keyword,bool allow_exact_keyword_match)338 int KeywordProvider::CalculateRelevance(metrics::OmniboxInputType::Type type,
339 bool complete,
340 bool supports_replacement,
341 bool prefer_keyword,
342 bool allow_exact_keyword_match) {
343 // This function is responsible for scoring suggestions of keywords
344 // themselves and the suggestion of the verbatim query on an
345 // extension keyword. SearchProvider::CalculateRelevanceForKeywordVerbatim()
346 // scores verbatim query suggestions for non-extension keywords.
347 // These two functions are currently in sync, but there's no reason
348 // we couldn't decide in the future to score verbatim matches
349 // differently for extension and non-extension keywords. If you
350 // make such a change, however, you should update this comment to
351 // describe it, so it's clear why the functions diverge.
352 if (!complete)
353 return (type == metrics::OmniboxInputType::URL) ? 700 : 450;
354 if (!supports_replacement || (allow_exact_keyword_match && prefer_keyword))
355 return 1500;
356 return (allow_exact_keyword_match &&
357 (type == metrics::OmniboxInputType::QUERY)) ?
358 1450 : 1100;
359 }
360
CreateAutocompleteMatch(const TemplateURL * template_url,const AutocompleteInput & input,size_t prefix_length,const base::string16 & remaining_input,bool allowed_to_be_default_match,int relevance)361 AutocompleteMatch KeywordProvider::CreateAutocompleteMatch(
362 const TemplateURL* template_url,
363 const AutocompleteInput& input,
364 size_t prefix_length,
365 const base::string16& remaining_input,
366 bool allowed_to_be_default_match,
367 int relevance) {
368 DCHECK(template_url);
369 const bool supports_replacement =
370 template_url->url_ref().SupportsReplacement(
371 GetTemplateURLService()->search_terms_data());
372
373 // Create an edit entry of "[keyword] [remaining input]". This is helpful
374 // even when [remaining input] is empty, as the user can select the popup
375 // choice and immediately begin typing in query input.
376 const base::string16& keyword = template_url->keyword();
377 const bool keyword_complete = (prefix_length == keyword.length());
378 if (relevance < 0) {
379 relevance =
380 CalculateRelevance(input.type(), keyword_complete,
381 // When the user wants keyword matches to take
382 // preference, score them highly regardless of
383 // whether the input provides query text.
384 supports_replacement, input.prefer_keyword(),
385 input.allow_exact_keyword_match());
386 }
387 AutocompleteMatch match(this, relevance, false,
388 supports_replacement ? AutocompleteMatchType::SEARCH_OTHER_ENGINE :
389 AutocompleteMatchType::HISTORY_KEYWORD);
390 match.allowed_to_be_default_match = allowed_to_be_default_match;
391 match.fill_into_edit = keyword;
392 if (!remaining_input.empty() || supports_replacement)
393 match.fill_into_edit.push_back(L' ');
394 match.fill_into_edit.append(remaining_input);
395 // If we wanted to set |result.inline_autocompletion| correctly, we'd need
396 // CleanUserInputKeyword() to return the amount of adjustment it's made to
397 // the user's input. Because right now inexact keyword matches can't score
398 // more highly than a "what you typed" match from one of the other providers,
399 // we just don't bother to do this, and leave inline autocompletion off.
400
401 // Create destination URL and popup entry content by substituting user input
402 // into keyword templates.
403 FillInURLAndContents(remaining_input, template_url, &match);
404
405 match.keyword = keyword;
406 match.transition = ui::PAGE_TRANSITION_KEYWORD;
407
408 return match;
409 }
410
FillInURLAndContents(const base::string16 & remaining_input,const TemplateURL * element,AutocompleteMatch * match) const411 void KeywordProvider::FillInURLAndContents(
412 const base::string16& remaining_input,
413 const TemplateURL* element,
414 AutocompleteMatch* match) const {
415 DCHECK(!element->short_name().empty());
416 const TemplateURLRef& element_ref = element->url_ref();
417 DCHECK(element_ref.IsValid(GetTemplateURLService()->search_terms_data()));
418 int message_id = (element->GetType() == TemplateURL::OMNIBOX_API_EXTENSION) ?
419 IDS_EXTENSION_KEYWORD_COMMAND : IDS_KEYWORD_SEARCH;
420 if (remaining_input.empty()) {
421 // Allow extension keyword providers to accept empty string input. This is
422 // useful to allow extensions to do something in the case where no input is
423 // entered.
424 if (element_ref.SupportsReplacement(
425 GetTemplateURLService()->search_terms_data()) &&
426 (element->GetType() != TemplateURL::OMNIBOX_API_EXTENSION)) {
427 // No query input; return a generic, no-destination placeholder.
428 match->contents.assign(
429 l10n_util::GetStringFUTF16(message_id,
430 element->AdjustedShortNameForLocaleDirection(),
431 l10n_util::GetStringUTF16(IDS_EMPTY_KEYWORD_VALUE)));
432 match->contents_class.push_back(
433 ACMatchClassification(0, ACMatchClassification::DIM));
434 } else {
435 // Keyword that has no replacement text (aka a shorthand for a URL).
436 match->destination_url = GURL(element->url());
437 match->contents.assign(element->short_name());
438 AutocompleteMatch::ClassifyLocationInString(0, match->contents.length(),
439 match->contents.length(), ACMatchClassification::NONE,
440 &match->contents_class);
441 }
442 } else {
443 // Create destination URL by escaping user input and substituting into
444 // keyword template URL. The escaping here handles whitespace in user
445 // input, but we rely on later canonicalization functions to do more
446 // fixup to make the URL valid if necessary.
447 DCHECK(element_ref.SupportsReplacement(
448 GetTemplateURLService()->search_terms_data()));
449 TemplateURLRef::SearchTermsArgs search_terms_args(remaining_input);
450 search_terms_args.append_extra_query_params =
451 element == GetTemplateURLService()->GetDefaultSearchProvider();
452 match->destination_url = GURL(element_ref.ReplaceSearchTerms(
453 search_terms_args, GetTemplateURLService()->search_terms_data()));
454 std::vector<size_t> content_param_offsets;
455 match->contents.assign(l10n_util::GetStringFUTF16(message_id,
456 element->short_name(),
457 remaining_input,
458 &content_param_offsets));
459 DCHECK_EQ(2U, content_param_offsets.size());
460 AutocompleteMatch::ClassifyLocationInString(content_param_offsets[1],
461 remaining_input.length(), match->contents.length(),
462 ACMatchClassification::NONE, &match->contents_class);
463 }
464 }
465
GetTemplateURLService() const466 TemplateURLService* KeywordProvider::GetTemplateURLService() const {
467 // Make sure the model is loaded. This is cheap and quickly bails out if
468 // the model is already loaded.
469 model_->Load();
470 return model_;
471 }
472