• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/search_engines/template_url.h"
6 
7 #include <string>
8 #include <vector>
9 
10 #include "base/basictypes.h"
11 #include "base/command_line.h"
12 #include "base/format_macros.h"
13 #include "base/i18n/icu_string_conversions.h"
14 #include "base/i18n/rtl.h"
15 #include "base/logging.h"
16 #include "base/metrics/field_trial.h"
17 #include "base/rand_util.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/strings/string_split.h"
20 #include "base/strings/string_util.h"
21 #include "base/strings/stringprintf.h"
22 #include "base/strings/utf_string_conversions.h"
23 #include "chrome/browser/omnibox/omnibox_field_trial.h"
24 #include "chrome/browser/search/search.h"
25 #include "chrome/common/chrome_switches.h"
26 #include "components/google/core/browser/google_util.h"
27 #include "components/metrics/proto/omnibox_input_type.pb.h"
28 #include "components/search_engines/search_terms_data.h"
29 #include "extensions/common/constants.h"
30 #include "google_apis/google_api_keys.h"
31 #include "net/base/escape.h"
32 #include "net/base/mime_util.h"
33 #include "net/base/net_util.h"
34 #include "ui/base/l10n/l10n_util.h"
35 
36 namespace {
37 
38 // The TemplateURLRef has any number of terms that need to be replaced. Each of
39 // the terms is enclosed in braces. If the character preceeding the final
40 // brace is a ?, it indicates the term is optional and can be replaced with
41 // an empty string.
42 const char kStartParameter = '{';
43 const char kEndParameter = '}';
44 const char kOptional = '?';
45 
46 // Known parameters found in the URL.
47 const char kSearchTermsParameter[] = "searchTerms";
48 const char kSearchTermsParameterFull[] = "{searchTerms}";
49 const char kCountParameter[] = "count";
50 const char kStartIndexParameter[] = "startIndex";
51 const char kStartPageParameter[] = "startPage";
52 const char kLanguageParameter[] = "language";
53 const char kInputEncodingParameter[] = "inputEncoding";
54 const char kOutputEncodingParameter[] = "outputEncoding";
55 
56 const char kGoogleAssistedQueryStatsParameter[] = "google:assistedQueryStats";
57 
58 // Host/Domain Google searches are relative to.
59 const char kGoogleBaseURLParameter[] = "google:baseURL";
60 const char kGoogleBaseURLParameterFull[] = "{google:baseURL}";
61 
62 // Like google:baseURL, but for the Search Suggest capability.
63 const char kGoogleBaseSuggestURLParameter[] = "google:baseSuggestURL";
64 const char kGoogleBaseSuggestURLParameterFull[] = "{google:baseSuggestURL}";
65 const char kGoogleBookmarkBarPinnedParameter[] = "google:bookmarkBarPinned";
66 const char kGoogleContextualSearchContextData[] =
67     "google:contextualSearchContextData";
68 const char kGoogleContextualSearchVersion[] = "google:contextualSearchVersion";
69 const char kGoogleCurrentPageUrlParameter[] = "google:currentPageUrl";
70 const char kGoogleCursorPositionParameter[] = "google:cursorPosition";
71 const char kGoogleForceInstantResultsParameter[] = "google:forceInstantResults";
72 const char kGoogleImageSearchSource[] = "google:imageSearchSource";
73 const char kGoogleImageThumbnailParameter[] = "google:imageThumbnail";
74 const char kGoogleImageOriginalWidth[] = "google:imageOriginalWidth";
75 const char kGoogleImageOriginalHeight[] = "google:imageOriginalHeight";
76 const char kGoogleImageURLParameter[] = "google:imageURL";
77 const char kGoogleInputTypeParameter[] = "google:inputType";
78 const char kGoogleInstantExtendedEnabledParameter[] =
79     "google:instantExtendedEnabledParameter";
80 const char kGoogleInstantExtendedEnabledKey[] =
81     "google:instantExtendedEnabledKey";
82 const char kGoogleInstantExtendedEnabledKeyFull[] =
83     "{google:instantExtendedEnabledKey}";
84 const char kGoogleNTPIsThemedParameter[] = "google:ntpIsThemedParameter";
85 const char kGoogleOmniboxStartMarginParameter[] =
86     "google:omniboxStartMarginParameter";
87 const char kGoogleOriginalQueryForSuggestionParameter[] =
88     "google:originalQueryForSuggestion";
89 const char kGooglePageClassificationParameter[] = "google:pageClassification";
90 const char kGoogleRLZParameter[] = "google:RLZ";
91 const char kGoogleSearchClient[] = "google:searchClient";
92 const char kGoogleSearchFieldtrialParameter[] =
93     "google:searchFieldtrialParameter";
94 const char kGoogleSearchVersion[] = "google:searchVersion";
95 const char kGoogleSessionToken[] = "google:sessionToken";
96 const char kGoogleSourceIdParameter[] = "google:sourceId";
97 const char kGoogleSuggestAPIKeyParameter[] = "google:suggestAPIKeyParameter";
98 const char kGoogleSuggestClient[] = "google:suggestClient";
99 const char kGoogleSuggestRequestId[] = "google:suggestRid";
100 
101 // Same as kSearchTermsParameter, with no escaping.
102 const char kGoogleUnescapedSearchTermsParameter[] =
103     "google:unescapedSearchTerms";
104 const char kGoogleUnescapedSearchTermsParameterFull[] =
105     "{google:unescapedSearchTerms}";
106 
107 // Display value for kSearchTermsParameter.
108 const char kDisplaySearchTerms[] = "%s";
109 
110 // Display value for kGoogleUnescapedSearchTermsParameter.
111 const char kDisplayUnescapedSearchTerms[] = "%S";
112 
113 // Used if the count parameter is not optional. Indicates we want 10 search
114 // results.
115 const char kDefaultCount[] = "10";
116 
117 // Used if the parameter kOutputEncodingParameter is required.
118 const char kOutputEncodingType[] = "UTF-8";
119 
120 // Attempts to encode |terms| and |original_query| in |encoding| and escape
121 // them.  |terms| may be escaped as path or query depending on |is_in_query|;
122 // |original_query| is always escaped as query.  Returns whether the encoding
123 // process succeeded.
TryEncoding(const base::string16 & terms,const base::string16 & original_query,const char * encoding,bool is_in_query,base::string16 * escaped_terms,base::string16 * escaped_original_query)124 bool TryEncoding(const base::string16& terms,
125                  const base::string16& original_query,
126                  const char* encoding,
127                  bool is_in_query,
128                  base::string16* escaped_terms,
129                  base::string16* escaped_original_query) {
130   DCHECK(escaped_terms);
131   DCHECK(escaped_original_query);
132   std::string encoded_terms;
133   if (!base::UTF16ToCodepage(terms, encoding,
134       base::OnStringConversionError::SKIP, &encoded_terms))
135     return false;
136   *escaped_terms = base::UTF8ToUTF16(is_in_query ?
137       net::EscapeQueryParamValue(encoded_terms, true) :
138       net::EscapePath(encoded_terms));
139   if (original_query.empty())
140     return true;
141   std::string encoded_original_query;
142   if (!base::UTF16ToCodepage(original_query, encoding,
143       base::OnStringConversionError::SKIP, &encoded_original_query))
144     return false;
145   *escaped_original_query = base::UTF8ToUTF16(
146       net::EscapeQueryParamValue(encoded_original_query, true));
147   return true;
148 }
149 
150 // Extract query key and host given a list of parameters coming from the URL
151 // query or ref.
FindSearchTermsKey(const std::string & params)152 std::string FindSearchTermsKey(const std::string& params) {
153   if (params.empty())
154     return std::string();
155   url::Component query, key, value;
156   query.len = static_cast<int>(params.size());
157   while (url::ExtractQueryKeyValue(params.c_str(), &query, &key, &value)) {
158     if (key.is_nonempty() && value.is_nonempty()) {
159       std::string value_string = params.substr(value.begin, value.len);
160       if (value_string.find(kSearchTermsParameterFull, 0) !=
161           std::string::npos ||
162           value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) !=
163           std::string::npos) {
164         return params.substr(key.begin, key.len);
165       }
166     }
167   }
168   return std::string();
169 }
170 
IsTemplateParameterString(const std::string & param)171 bool IsTemplateParameterString(const std::string& param) {
172   return (param.length() > 2) && (*(param.begin()) == kStartParameter) &&
173       (*(param.rbegin()) == kEndParameter);
174 }
175 
ShowingSearchTermsOnSRP()176 bool ShowingSearchTermsOnSRP() {
177   return chrome::IsInstantExtendedAPIEnabled() &&
178       chrome::IsQueryExtractionEnabled();
179 }
180 
181 }  // namespace
182 
183 
184 // TemplateURLRef::SearchTermsArgs --------------------------------------------
185 
SearchTermsArgs(const base::string16 & search_terms)186 TemplateURLRef::SearchTermsArgs::SearchTermsArgs(
187     const base::string16& search_terms)
188     : search_terms(search_terms),
189       input_type(metrics::OmniboxInputType::INVALID),
190       accepted_suggestion(NO_SUGGESTIONS_AVAILABLE),
191       cursor_position(base::string16::npos),
192       omnibox_start_margin(-1),
193       page_classification(metrics::OmniboxEventProto::INVALID_SPEC),
194       bookmark_bar_pinned(false),
195       append_extra_query_params(false),
196       force_instant_results(false),
197       from_app_list(false),
198       contextual_search_params(ContextualSearchParams()) {
199 }
200 
~SearchTermsArgs()201 TemplateURLRef::SearchTermsArgs::~SearchTermsArgs() {
202 }
203 
204 TemplateURLRef::SearchTermsArgs::ContextualSearchParams::
ContextualSearchParams()205     ContextualSearchParams()
206     : version(-1),
207       start(base::string16::npos),
208       end(base::string16::npos) {
209 }
210 
211 TemplateURLRef::SearchTermsArgs::ContextualSearchParams::
ContextualSearchParams(const int version,const size_t start,const size_t end,const std::string & selection,const std::string & content,const std::string & base_page_url,const std::string & encoding)212     ContextualSearchParams(
213         const int version,
214         const size_t start,
215         const size_t end,
216         const std::string& selection,
217         const std::string& content,
218         const std::string& base_page_url,
219         const std::string& encoding)
220     : version(version),
221       start(start),
222       end(end),
223       selection(selection),
224       content(content),
225       base_page_url(base_page_url),
226       encoding(encoding) {
227 }
228 
229 TemplateURLRef::SearchTermsArgs::ContextualSearchParams::
~ContextualSearchParams()230     ~ContextualSearchParams() {
231 }
232 
233 // TemplateURLRef -------------------------------------------------------------
234 
TemplateURLRef(TemplateURL * owner,Type type)235 TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type)
236     : owner_(owner),
237       type_(type),
238       index_in_owner_(-1),
239       parsed_(false),
240       valid_(false),
241       supports_replacements_(false),
242       search_term_key_location_(url::Parsed::QUERY),
243       prepopulated_(false),
244       showing_search_terms_(ShowingSearchTermsOnSRP()) {
245   DCHECK(owner_);
246   DCHECK_NE(INDEXED, type_);
247 }
248 
TemplateURLRef(TemplateURL * owner,size_t index_in_owner)249 TemplateURLRef::TemplateURLRef(TemplateURL* owner, size_t index_in_owner)
250     : owner_(owner),
251       type_(INDEXED),
252       index_in_owner_(index_in_owner),
253       parsed_(false),
254       valid_(false),
255       supports_replacements_(false),
256       search_term_key_location_(url::Parsed::QUERY),
257       prepopulated_(false),
258       showing_search_terms_(ShowingSearchTermsOnSRP()) {
259   DCHECK(owner_);
260   DCHECK_LT(index_in_owner_, owner_->URLCount());
261 }
262 
~TemplateURLRef()263 TemplateURLRef::~TemplateURLRef() {
264 }
265 
GetURL() const266 std::string TemplateURLRef::GetURL() const {
267   switch (type_) {
268     case SEARCH:            return owner_->url();
269     case SUGGEST:           return owner_->suggestions_url();
270     case INSTANT:           return owner_->instant_url();
271     case IMAGE:             return owner_->image_url();
272     case NEW_TAB:           return owner_->new_tab_url();
273     case CONTEXTUAL_SEARCH: return owner_->contextual_search_url();
274     case INDEXED:           return owner_->GetURL(index_in_owner_);
275     default:       NOTREACHED(); return std::string();  // NOLINT
276   }
277 }
278 
GetPostParamsString() const279 std::string TemplateURLRef::GetPostParamsString() const {
280   switch (type_) {
281     case INDEXED:
282     case SEARCH:            return owner_->search_url_post_params();
283     case SUGGEST:           return owner_->suggestions_url_post_params();
284     case INSTANT:           return owner_->instant_url_post_params();
285     case NEW_TAB:           return std::string();
286     case CONTEXTUAL_SEARCH: return std::string();
287     case IMAGE:             return owner_->image_url_post_params();
288     default:      NOTREACHED(); return std::string();  // NOLINT
289   }
290 }
291 
UsesPOSTMethod(const SearchTermsData & search_terms_data) const292 bool TemplateURLRef::UsesPOSTMethod(
293     const SearchTermsData& search_terms_data) const {
294   ParseIfNecessary(search_terms_data);
295   return !post_params_.empty();
296 }
297 
EncodeFormData(const PostParams & post_params,PostContent * post_content) const298 bool TemplateURLRef::EncodeFormData(const PostParams& post_params,
299                                     PostContent* post_content) const {
300   if (post_params.empty())
301     return true;
302   if (!post_content)
303     return false;
304 
305   const char kUploadDataMIMEType[] = "multipart/form-data; boundary=";
306   const char kMultipartBoundary[] = "----+*+----%016" PRIx64 "----+*+----";
307   // Each name/value pair is stored in a body part which is preceded by a
308   // boundary delimiter line. Uses random number generator here to create
309   // a unique boundary delimiter for form data encoding.
310   std::string boundary = base::StringPrintf(kMultipartBoundary,
311                                             base::RandUint64());
312   // Sets the content MIME type.
313   post_content->first = kUploadDataMIMEType;
314   post_content->first += boundary;
315   // Encodes the post parameters.
316   std::string* post_data = &post_content->second;
317   post_data->clear();
318   for (PostParams::const_iterator param = post_params.begin();
319        param != post_params.end(); ++param) {
320     DCHECK(!param->first.empty());
321     net::AddMultipartValueForUpload(param->first, param->second, boundary,
322                                     std::string(), post_data);
323   }
324   net::AddMultipartFinalDelimiterForUpload(boundary, post_data);
325   return true;
326 }
327 
SupportsReplacement(const SearchTermsData & search_terms_data) const328 bool TemplateURLRef::SupportsReplacement(
329     const SearchTermsData& search_terms_data) const {
330   ParseIfNecessary(search_terms_data);
331   return valid_ && supports_replacements_;
332 }
333 
ReplaceSearchTerms(const SearchTermsArgs & search_terms_args,const SearchTermsData & search_terms_data,PostContent * post_content) const334 std::string TemplateURLRef::ReplaceSearchTerms(
335     const SearchTermsArgs& search_terms_args,
336     const SearchTermsData& search_terms_data,
337     PostContent* post_content) const {
338   ParseIfNecessary(search_terms_data);
339   if (!valid_)
340     return std::string();
341 
342   std::string url(HandleReplacements(search_terms_args, search_terms_data,
343                                      post_content));
344 
345   GURL gurl(url);
346   if (!gurl.is_valid())
347     return url;
348 
349   std::vector<std::string> query_params;
350   if (search_terms_args.append_extra_query_params) {
351     std::string extra_params(
352         CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
353             switches::kExtraSearchQueryParams));
354     if (!extra_params.empty())
355       query_params.push_back(extra_params);
356   }
357   if (!search_terms_args.suggest_query_params.empty())
358     query_params.push_back(search_terms_args.suggest_query_params);
359   if (!gurl.query().empty())
360     query_params.push_back(gurl.query());
361 
362   if (query_params.empty())
363     return url;
364 
365   GURL::Replacements replacements;
366   std::string query_str = JoinString(query_params, "&");
367   replacements.SetQueryStr(query_str);
368   return gurl.ReplaceComponents(replacements).possibly_invalid_spec();
369 }
370 
IsValid(const SearchTermsData & search_terms_data) const371 bool TemplateURLRef::IsValid(const SearchTermsData& search_terms_data) const {
372   ParseIfNecessary(search_terms_data);
373   return valid_;
374 }
375 
DisplayURL(const SearchTermsData & search_terms_data) const376 base::string16 TemplateURLRef::DisplayURL(
377     const SearchTermsData& search_terms_data) const {
378   ParseIfNecessary(search_terms_data);
379   base::string16 result(base::UTF8ToUTF16(GetURL()));
380   if (valid_ && !replacements_.empty()) {
381     ReplaceSubstringsAfterOffset(&result, 0,
382                                  base::ASCIIToUTF16(kSearchTermsParameterFull),
383                                  base::ASCIIToUTF16(kDisplaySearchTerms));
384     ReplaceSubstringsAfterOffset(&result, 0,
385         base::ASCIIToUTF16(kGoogleUnescapedSearchTermsParameterFull),
386         base::ASCIIToUTF16(kDisplayUnescapedSearchTerms));
387   }
388   return result;
389 }
390 
391 // static
DisplayURLToURLRef(const base::string16 & display_url)392 std::string TemplateURLRef::DisplayURLToURLRef(
393     const base::string16& display_url) {
394   base::string16 result = display_url;
395   ReplaceSubstringsAfterOffset(&result, 0,
396                                base::ASCIIToUTF16(kDisplaySearchTerms),
397                                base::ASCIIToUTF16(kSearchTermsParameterFull));
398   ReplaceSubstringsAfterOffset(
399       &result, 0,
400       base::ASCIIToUTF16(kDisplayUnescapedSearchTerms),
401       base::ASCIIToUTF16(kGoogleUnescapedSearchTermsParameterFull));
402   return base::UTF16ToUTF8(result);
403 }
404 
GetHost(const SearchTermsData & search_terms_data) const405 const std::string& TemplateURLRef::GetHost(
406     const SearchTermsData& search_terms_data) const {
407   ParseIfNecessary(search_terms_data);
408   return host_;
409 }
410 
GetPath(const SearchTermsData & search_terms_data) const411 const std::string& TemplateURLRef::GetPath(
412     const SearchTermsData& search_terms_data) const {
413   ParseIfNecessary(search_terms_data);
414   return path_;
415 }
416 
GetSearchTermKey(const SearchTermsData & search_terms_data) const417 const std::string& TemplateURLRef::GetSearchTermKey(
418     const SearchTermsData& search_terms_data) const {
419   ParseIfNecessary(search_terms_data);
420   return search_term_key_;
421 }
422 
SearchTermToString16(const std::string & term) const423 base::string16 TemplateURLRef::SearchTermToString16(
424     const std::string& term) const {
425   const std::vector<std::string>& encodings = owner_->input_encodings();
426   base::string16 result;
427 
428   std::string unescaped = net::UnescapeURLComponent(
429       term,
430       net::UnescapeRule::REPLACE_PLUS_WITH_SPACE |
431       net::UnescapeRule::URL_SPECIAL_CHARS);
432   for (size_t i = 0; i < encodings.size(); ++i) {
433     if (base::CodepageToUTF16(unescaped, encodings[i].c_str(),
434                               base::OnStringConversionError::FAIL, &result))
435       return result;
436   }
437 
438   // Always fall back on UTF-8 if it works.
439   if (base::CodepageToUTF16(unescaped, base::kCodepageUTF8,
440                             base::OnStringConversionError::FAIL, &result))
441     return result;
442 
443   // When nothing worked, just use the escaped text. We have no idea what the
444   // encoding is. We need to substitute spaces for pluses ourselves since we're
445   // not sending it through an unescaper.
446   result = base::UTF8ToUTF16(term);
447   std::replace(result.begin(), result.end(), '+', ' ');
448   return result;
449 }
450 
HasGoogleBaseURLs(const SearchTermsData & search_terms_data) const451 bool TemplateURLRef::HasGoogleBaseURLs(
452     const SearchTermsData& search_terms_data) const {
453   ParseIfNecessary(search_terms_data);
454   for (size_t i = 0; i < replacements_.size(); ++i) {
455     if ((replacements_[i].type == GOOGLE_BASE_URL) ||
456         (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL))
457       return true;
458   }
459   return false;
460 }
461 
ExtractSearchTermsFromURL(const GURL & url,base::string16 * search_terms,const SearchTermsData & search_terms_data,url::Parsed::ComponentType * search_terms_component,url::Component * search_terms_position) const462 bool TemplateURLRef::ExtractSearchTermsFromURL(
463     const GURL& url,
464     base::string16* search_terms,
465     const SearchTermsData& search_terms_data,
466     url::Parsed::ComponentType* search_terms_component,
467     url::Component* search_terms_position) const {
468   DCHECK(search_terms);
469   search_terms->clear();
470 
471   ParseIfNecessary(search_terms_data);
472 
473   // We need a search term in the template URL to extract something.
474   if (search_term_key_.empty())
475     return false;
476 
477   // TODO(beaudoin): Support patterns of the form http://foo/{searchTerms}/
478   // See crbug.com/153798
479 
480   // Fill-in the replacements. We don't care about search terms in the pattern,
481   // so we use the empty string.
482   // Currently we assume the search term only shows in URL, not in post params.
483   GURL pattern(ReplaceSearchTerms(SearchTermsArgs(base::string16()),
484                                   search_terms_data, NULL));
485   // Host, path and port must match.
486   if (url.port() != pattern.port() ||
487       url.host() != host_ ||
488       url.path() != path_) {
489     return false;
490   }
491 
492   // Parameter must be present either in the query or the ref.
493   const std::string& params(
494       (search_term_key_location_ == url::Parsed::QUERY) ?
495           url.query() : url.ref());
496 
497   url::Component query, key, value;
498   query.len = static_cast<int>(params.size());
499   bool key_found = false;
500   while (url::ExtractQueryKeyValue(params.c_str(), &query, &key, &value)) {
501     if (key.is_nonempty()) {
502       if (params.substr(key.begin, key.len) == search_term_key_) {
503         // Fail if search term key is found twice.
504         if (key_found) {
505           search_terms->clear();
506           return false;
507         }
508         key_found = true;
509         // Extract the search term.
510         *search_terms = net::UnescapeAndDecodeUTF8URLComponent(
511             params.substr(value.begin, value.len),
512             net::UnescapeRule::SPACES |
513                 net::UnescapeRule::URL_SPECIAL_CHARS |
514                 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE);
515         if (search_terms_component)
516           *search_terms_component = search_term_key_location_;
517         if (search_terms_position)
518           *search_terms_position = value;
519       }
520     }
521   }
522   return key_found;
523 }
524 
InvalidateCachedValues() const525 void TemplateURLRef::InvalidateCachedValues() const {
526   supports_replacements_ = valid_ = parsed_ = false;
527   host_.clear();
528   path_.clear();
529   search_term_key_.clear();
530   replacements_.clear();
531   post_params_.clear();
532 }
533 
ParseParameter(size_t start,size_t end,std::string * url,Replacements * replacements) const534 bool TemplateURLRef::ParseParameter(size_t start,
535                                     size_t end,
536                                     std::string* url,
537                                     Replacements* replacements) const {
538   DCHECK(start != std::string::npos &&
539          end != std::string::npos && end > start);
540   size_t length = end - start - 1;
541   bool optional = false;
542   if ((*url)[end - 1] == kOptional) {
543     optional = true;
544     length--;
545   }
546   std::string parameter(url->substr(start + 1, length));
547   std::string full_parameter(url->substr(start, end - start + 1));
548   // Remove the parameter from the string.  For parameters who replacement is
549   // constant and already known, just replace them directly.  For other cases,
550   // like parameters whose values may change over time, use |replacements|.
551   url->erase(start, end - start + 1);
552   if (parameter == kSearchTermsParameter) {
553     replacements->push_back(Replacement(SEARCH_TERMS, start));
554   } else if (parameter == kCountParameter) {
555     if (!optional)
556       url->insert(start, kDefaultCount);
557   } else if (parameter == kGoogleAssistedQueryStatsParameter) {
558     replacements->push_back(Replacement(GOOGLE_ASSISTED_QUERY_STATS, start));
559   } else if (parameter == kGoogleBaseURLParameter) {
560     replacements->push_back(Replacement(GOOGLE_BASE_URL, start));
561   } else if (parameter == kGoogleBaseSuggestURLParameter) {
562     replacements->push_back(Replacement(GOOGLE_BASE_SUGGEST_URL, start));
563   } else if (parameter == kGoogleBookmarkBarPinnedParameter) {
564     replacements->push_back(Replacement(GOOGLE_BOOKMARK_BAR_PINNED, start));
565   } else if (parameter == kGoogleCurrentPageUrlParameter) {
566     replacements->push_back(Replacement(GOOGLE_CURRENT_PAGE_URL, start));
567   } else if (parameter == kGoogleCursorPositionParameter) {
568     replacements->push_back(Replacement(GOOGLE_CURSOR_POSITION, start));
569   } else if (parameter == kGoogleForceInstantResultsParameter) {
570     replacements->push_back(Replacement(GOOGLE_FORCE_INSTANT_RESULTS, start));
571   } else if (parameter == kGoogleImageOriginalHeight) {
572     replacements->push_back(
573         Replacement(TemplateURLRef::GOOGLE_IMAGE_ORIGINAL_HEIGHT, start));
574   } else if (parameter == kGoogleImageOriginalWidth) {
575     replacements->push_back(
576         Replacement(TemplateURLRef::GOOGLE_IMAGE_ORIGINAL_WIDTH, start));
577   } else if (parameter == kGoogleImageSearchSource) {
578     replacements->push_back(
579         Replacement(TemplateURLRef::GOOGLE_IMAGE_SEARCH_SOURCE, start));
580   } else if (parameter == kGoogleImageThumbnailParameter) {
581     replacements->push_back(
582         Replacement(TemplateURLRef::GOOGLE_IMAGE_THUMBNAIL, start));
583   } else if (parameter == kGoogleImageURLParameter) {
584     replacements->push_back(Replacement(TemplateURLRef::GOOGLE_IMAGE_URL,
585                                         start));
586   } else if (parameter == kGoogleInputTypeParameter) {
587     replacements->push_back(Replacement(TemplateURLRef::GOOGLE_INPUT_TYPE,
588                                         start));
589   } else if (parameter == kGoogleInstantExtendedEnabledParameter) {
590     replacements->push_back(Replacement(GOOGLE_INSTANT_EXTENDED_ENABLED,
591                                         start));
592   } else if (parameter == kGoogleInstantExtendedEnabledKey) {
593     url->insert(start, google_util::kInstantExtendedAPIParam);
594   } else if (parameter == kGoogleNTPIsThemedParameter) {
595     replacements->push_back(Replacement(GOOGLE_NTP_IS_THEMED, start));
596   } else if (parameter == kGoogleOmniboxStartMarginParameter) {
597     replacements->push_back(Replacement(GOOGLE_OMNIBOX_START_MARGIN, start));
598   } else if (parameter == kGoogleContextualSearchVersion) {
599     replacements->push_back(
600         Replacement(GOOGLE_CONTEXTUAL_SEARCH_VERSION, start));
601   } else if (parameter == kGoogleContextualSearchContextData) {
602     replacements->push_back(
603         Replacement(GOOGLE_CONTEXTUAL_SEARCH_CONTEXT_DATA, start));
604   } else if (parameter == kGoogleOriginalQueryForSuggestionParameter) {
605     replacements->push_back(Replacement(GOOGLE_ORIGINAL_QUERY_FOR_SUGGESTION,
606                                         start));
607   } else if (parameter == kGooglePageClassificationParameter) {
608     replacements->push_back(Replacement(GOOGLE_PAGE_CLASSIFICATION, start));
609   } else if (parameter == kGoogleRLZParameter) {
610     replacements->push_back(Replacement(GOOGLE_RLZ, start));
611   } else if (parameter == kGoogleSearchClient) {
612     replacements->push_back(Replacement(GOOGLE_SEARCH_CLIENT, start));
613   } else if (parameter == kGoogleSearchFieldtrialParameter) {
614     replacements->push_back(Replacement(GOOGLE_SEARCH_FIELDTRIAL_GROUP, start));
615   } else if (parameter == kGoogleSearchVersion) {
616     if (OmniboxFieldTrial::EnableAnswersInSuggest())
617       url->insert(start, "gs_rn=42&");
618   } else if (parameter == kGoogleSessionToken) {
619     replacements->push_back(Replacement(GOOGLE_SESSION_TOKEN, start));
620   } else if (parameter == kGoogleSourceIdParameter) {
621 #if defined(OS_ANDROID)
622     url->insert(start, "sourceid=chrome-mobile&");
623 #else
624     url->insert(start, "sourceid=chrome&");
625 #endif
626   } else if (parameter == kGoogleSuggestAPIKeyParameter) {
627     url->insert(start,
628                 net::EscapeQueryParamValue(google_apis::GetAPIKey(), false));
629   } else if (parameter == kGoogleSuggestClient) {
630     replacements->push_back(Replacement(GOOGLE_SUGGEST_CLIENT, start));
631   } else if (parameter == kGoogleSuggestRequestId) {
632     replacements->push_back(Replacement(GOOGLE_SUGGEST_REQUEST_ID, start));
633   } else if (parameter == kGoogleUnescapedSearchTermsParameter) {
634     replacements->push_back(Replacement(GOOGLE_UNESCAPED_SEARCH_TERMS, start));
635   } else if (parameter == kInputEncodingParameter) {
636     replacements->push_back(Replacement(ENCODING, start));
637   } else if (parameter == kLanguageParameter) {
638     replacements->push_back(Replacement(LANGUAGE, start));
639   } else if (parameter == kOutputEncodingParameter) {
640     if (!optional)
641       url->insert(start, kOutputEncodingType);
642   } else if ((parameter == kStartIndexParameter) ||
643              (parameter == kStartPageParameter)) {
644     // We don't support these.
645     if (!optional)
646       url->insert(start, "1");
647   } else if (!prepopulated_) {
648     // If it's a prepopulated URL, we know that it's safe to remove unknown
649     // parameters, so just ignore this and return true below. Otherwise it could
650     // be some garbage but can also be a javascript block. Put it back.
651     url->insert(start, full_parameter);
652     return false;
653   }
654   return true;
655 }
656 
ParseURL(const std::string & url,Replacements * replacements,PostParams * post_params,bool * valid) const657 std::string TemplateURLRef::ParseURL(const std::string& url,
658                                      Replacements* replacements,
659                                      PostParams* post_params,
660                                      bool* valid) const {
661   *valid = false;
662   std::string parsed_url = url;
663   for (size_t last = 0; last != std::string::npos; ) {
664     last = parsed_url.find(kStartParameter, last);
665     if (last != std::string::npos) {
666       size_t template_end = parsed_url.find(kEndParameter, last);
667       if (template_end != std::string::npos) {
668         // Since we allow Javascript in the URL, {} pairs could be nested. Match
669         // only leaf pairs with supported parameters.
670         size_t next_template_start = parsed_url.find(kStartParameter, last + 1);
671         if (next_template_start == std::string::npos ||
672             next_template_start > template_end) {
673           // If successful, ParseParameter erases from the string as such no
674           // need to update |last|. If failed, move |last| to the end of pair.
675           if (!ParseParameter(last, template_end, &parsed_url, replacements)) {
676             // |template_end| + 1 may be beyond the end of the string.
677             last = template_end;
678           }
679         } else {
680           last = next_template_start;
681         }
682       } else {
683         // Open brace without a closing brace, return.
684         return std::string();
685       }
686     }
687   }
688 
689   // Handles the post parameters.
690   const std::string& post_params_string = GetPostParamsString();
691   if (!post_params_string.empty()) {
692     typedef std::vector<std::string> Strings;
693     Strings param_list;
694     base::SplitString(post_params_string, ',', &param_list);
695 
696     for (Strings::const_iterator iterator = param_list.begin();
697          iterator != param_list.end(); ++iterator) {
698       Strings parts;
699       // The '=' delimiter is required and the name must be not empty.
700       base::SplitString(*iterator, '=', &parts);
701       if ((parts.size() != 2U) || parts[0].empty())
702         return std::string();
703 
704       std::string& value = parts[1];
705       size_t replacements_size = replacements->size();
706       if (IsTemplateParameterString(value))
707         ParseParameter(0, value.length() - 1, &value, replacements);
708       post_params->push_back(std::make_pair(parts[0], value));
709       // If there was a replacement added, points its index to last added
710       // PostParam.
711       if (replacements->size() > replacements_size) {
712         DCHECK_EQ(replacements_size + 1, replacements->size());
713         Replacement* r = &replacements->back();
714         r->is_post_param = true;
715         r->index = post_params->size() - 1;
716       }
717     }
718     DCHECK(!post_params->empty());
719   }
720 
721   *valid = true;
722   return parsed_url;
723 }
724 
ParseIfNecessary(const SearchTermsData & search_terms_data) const725 void TemplateURLRef::ParseIfNecessary(
726     const SearchTermsData& search_terms_data) const {
727   if (!parsed_) {
728     InvalidateCachedValues();
729     parsed_ = true;
730     parsed_url_ = ParseURL(GetURL(), &replacements_, &post_params_, &valid_);
731     supports_replacements_ = false;
732     if (valid_) {
733       bool has_only_one_search_term = false;
734       for (Replacements::const_iterator i = replacements_.begin();
735            i != replacements_.end(); ++i) {
736         if ((i->type == SEARCH_TERMS) ||
737             (i->type == GOOGLE_UNESCAPED_SEARCH_TERMS)) {
738           if (has_only_one_search_term) {
739             has_only_one_search_term = false;
740             break;
741           }
742           has_only_one_search_term = true;
743           supports_replacements_ = true;
744         }
745       }
746       // Only parse the host/key if there is one search term. Technically there
747       // could be more than one term, but it's uncommon; so we punt.
748       if (has_only_one_search_term)
749         ParseHostAndSearchTermKey(search_terms_data);
750     }
751   }
752 }
753 
ParseHostAndSearchTermKey(const SearchTermsData & search_terms_data) const754 void TemplateURLRef::ParseHostAndSearchTermKey(
755     const SearchTermsData& search_terms_data) const {
756   std::string url_string(GetURL());
757   ReplaceSubstringsAfterOffset(&url_string, 0,
758                                kGoogleBaseURLParameterFull,
759                                search_terms_data.GoogleBaseURLValue());
760   ReplaceSubstringsAfterOffset(&url_string, 0,
761                                kGoogleBaseSuggestURLParameterFull,
762                                search_terms_data.GoogleBaseSuggestURLValue());
763 
764   search_term_key_.clear();
765   host_.clear();
766   path_.clear();
767   search_term_key_location_ = url::Parsed::REF;
768 
769   GURL url(url_string);
770   if (!url.is_valid())
771     return;
772 
773   std::string query_key = FindSearchTermsKey(url.query());
774   std::string ref_key = FindSearchTermsKey(url.ref());
775   if (query_key.empty() == ref_key.empty())
776     return;  // No key or multiple keys found.  We only handle having one key.
777   search_term_key_ = query_key.empty() ? ref_key : query_key;
778   search_term_key_location_ =
779       query_key.empty() ? url::Parsed::REF : url::Parsed::QUERY;
780   host_ = url.host();
781   path_ = url.path();
782 }
783 
HandleReplacement(const std::string & name,const std::string & value,const Replacement & replacement,std::string * url) const784 void TemplateURLRef::HandleReplacement(const std::string& name,
785                                        const std::string& value,
786                                        const Replacement& replacement,
787                                        std::string* url) const {
788   size_t pos = replacement.index;
789   if (replacement.is_post_param) {
790     DCHECK_LT(pos, post_params_.size());
791     DCHECK(!post_params_[pos].first.empty());
792     post_params_[pos].second = value;
793   } else {
794     url->insert(pos, name.empty() ? value : (name + "=" + value + "&"));
795   }
796 }
797 
HandleReplacements(const SearchTermsArgs & search_terms_args,const SearchTermsData & search_terms_data,PostContent * post_content) const798 std::string TemplateURLRef::HandleReplacements(
799     const SearchTermsArgs& search_terms_args,
800     const SearchTermsData& search_terms_data,
801     PostContent* post_content) const {
802   if (replacements_.empty()) {
803     if (!post_params_.empty())
804       EncodeFormData(post_params_, post_content);
805     return parsed_url_;
806   }
807 
808   // Determine if the search terms are in the query or before. We're escaping
809   // space as '+' in the former case and as '%20' in the latter case.
810   bool is_in_query = true;
811   for (Replacements::iterator i = replacements_.begin();
812        i != replacements_.end(); ++i) {
813     if (i->type == SEARCH_TERMS) {
814       base::string16::size_type query_start = parsed_url_.find('?');
815       is_in_query = query_start != base::string16::npos &&
816           (static_cast<base::string16::size_type>(i->index) > query_start);
817       break;
818     }
819   }
820 
821   std::string input_encoding;
822   base::string16 encoded_terms;
823   base::string16 encoded_original_query;
824   owner_->EncodeSearchTerms(search_terms_args, is_in_query, &input_encoding,
825                             &encoded_terms, &encoded_original_query);
826 
827   std::string url = parsed_url_;
828 
829   // replacements_ is ordered in ascending order, as such we need to iterate
830   // from the back.
831   for (Replacements::reverse_iterator i = replacements_.rbegin();
832        i != replacements_.rend(); ++i) {
833     switch (i->type) {
834       case ENCODING:
835         HandleReplacement(std::string(), input_encoding, *i, &url);
836         break;
837 
838       case GOOGLE_ASSISTED_QUERY_STATS:
839         DCHECK(!i->is_post_param);
840         if (!search_terms_args.assisted_query_stats.empty()) {
841           // Get the base URL without substituting AQS to avoid infinite
842           // recursion.  We need the URL to find out if it meets all
843           // AQS requirements (e.g. HTTPS protocol check).
844           // See TemplateURLRef::SearchTermsArgs for more details.
845           SearchTermsArgs search_terms_args_without_aqs(search_terms_args);
846           search_terms_args_without_aqs.assisted_query_stats.clear();
847           GURL base_url(ReplaceSearchTerms(
848               search_terms_args_without_aqs, search_terms_data, NULL));
849           if (base_url.SchemeIs(url::kHttpsScheme)) {
850             HandleReplacement(
851                 "aqs", search_terms_args.assisted_query_stats, *i, &url);
852           }
853         }
854         break;
855 
856       case GOOGLE_BASE_URL:
857         DCHECK(!i->is_post_param);
858         HandleReplacement(
859             std::string(), search_terms_data.GoogleBaseURLValue(), *i, &url);
860         break;
861 
862       case GOOGLE_BASE_SUGGEST_URL:
863         DCHECK(!i->is_post_param);
864         HandleReplacement(
865             std::string(), search_terms_data.GoogleBaseSuggestURLValue(), *i,
866             &url);
867         break;
868 
869       case GOOGLE_BOOKMARK_BAR_PINNED:
870         if (showing_search_terms_) {
871           // Log whether the bookmark bar is pinned when the user is seeing
872           // InstantExtended on the SRP.
873           DCHECK(!i->is_post_param);
874           HandleReplacement(
875               "bmbp", search_terms_args.bookmark_bar_pinned ? "1" : "0", *i,
876               &url);
877         }
878         break;
879 
880       case GOOGLE_CURRENT_PAGE_URL:
881         DCHECK(!i->is_post_param);
882         if (!search_terms_args.current_page_url.empty()) {
883           const std::string& escaped_current_page_url =
884               net::EscapeQueryParamValue(search_terms_args.current_page_url,
885                                          true);
886           HandleReplacement("url", escaped_current_page_url, *i, &url);
887         }
888         break;
889 
890       case GOOGLE_CURSOR_POSITION:
891         DCHECK(!i->is_post_param);
892         if (search_terms_args.cursor_position != base::string16::npos)
893           HandleReplacement(
894               "cp",
895               base::StringPrintf("%" PRIuS, search_terms_args.cursor_position),
896               *i,
897               &url);
898         break;
899 
900       case GOOGLE_FORCE_INSTANT_RESULTS:
901         DCHECK(!i->is_post_param);
902         HandleReplacement(std::string(),
903                           chrome::ForceInstantResultsParam(
904                               search_terms_args.force_instant_results),
905                           *i,
906                           &url);
907         break;
908 
909       case GOOGLE_INPUT_TYPE:
910         DCHECK(!i->is_post_param);
911         HandleReplacement(
912             "oit", base::IntToString(search_terms_args.input_type), *i, &url);
913         break;
914 
915       case GOOGLE_INSTANT_EXTENDED_ENABLED:
916         DCHECK(!i->is_post_param);
917         HandleReplacement(std::string(),
918                           chrome::InstantExtendedEnabledParam(type_ == SEARCH),
919                           *i,
920                           &url);
921         break;
922 
923       case GOOGLE_NTP_IS_THEMED:
924         DCHECK(!i->is_post_param);
925         HandleReplacement(
926             std::string(), search_terms_data.NTPIsThemedParam(), *i, &url);
927         break;
928 
929       case GOOGLE_OMNIBOX_START_MARGIN:
930         DCHECK(!i->is_post_param);
931         if (search_terms_args.omnibox_start_margin >= 0) {
932           HandleReplacement(
933               "es_sm",
934               base::IntToString(search_terms_args.omnibox_start_margin),
935               *i,
936               &url);
937         }
938         break;
939 
940       case GOOGLE_CONTEXTUAL_SEARCH_VERSION:
941         if (search_terms_args.contextual_search_params.version >= 0) {
942           HandleReplacement(
943               "ctxs",
944               base::IntToString(
945                   search_terms_args.contextual_search_params.version),
946               *i,
947               &url);
948         }
949         break;
950 
951       case GOOGLE_CONTEXTUAL_SEARCH_CONTEXT_DATA: {
952         DCHECK(!i->is_post_param);
953         std::string context_data;
954 
955         const SearchTermsArgs::ContextualSearchParams& params =
956             search_terms_args.contextual_search_params;
957 
958         if (params.start != std::string::npos) {
959           context_data.append("ctxs_start=" + base::IntToString(
960               params.start) + "&");
961         }
962 
963         if (params.end != std::string::npos) {
964           context_data.append("ctxs_end=" + base::IntToString(
965               params.end) + "&");
966         }
967 
968         if (!params.selection.empty())
969           context_data.append("q=" + params.selection + "&");
970 
971         if (!params.content.empty())
972           context_data.append("ctxs_content=" + params.content + "&");
973 
974         if (!params.base_page_url.empty())
975           context_data.append("ctxs_url=" + params.base_page_url + "&");
976 
977         if (!params.encoding.empty()) {
978           context_data.append("ctxs_encoding=" + params.encoding + "&");
979         }
980 
981         HandleReplacement(std::string(), context_data, *i, &url);
982         break;
983       }
984 
985       case GOOGLE_ORIGINAL_QUERY_FOR_SUGGESTION:
986         DCHECK(!i->is_post_param);
987         if (search_terms_args.accepted_suggestion >= 0 ||
988             !search_terms_args.assisted_query_stats.empty()) {
989           HandleReplacement(
990               "oq", base::UTF16ToUTF8(encoded_original_query), *i, &url);
991         }
992         break;
993 
994       case GOOGLE_PAGE_CLASSIFICATION:
995         if (search_terms_args.page_classification !=
996             metrics::OmniboxEventProto::INVALID_SPEC) {
997           HandleReplacement(
998               "pgcl", base::IntToString(search_terms_args.page_classification),
999               *i, &url);
1000         }
1001         break;
1002 
1003       case GOOGLE_RLZ: {
1004         DCHECK(!i->is_post_param);
1005         // On platforms that don't have RLZ, we still want this branch
1006         // to happen so that we replace the RLZ template with the
1007         // empty string.  (If we don't handle this case, we hit a
1008         // NOTREACHED below.)
1009         base::string16 rlz_string = search_terms_data.GetRlzParameterValue(
1010             search_terms_args.from_app_list);
1011         if (!rlz_string.empty()) {
1012           HandleReplacement("rlz", base::UTF16ToUTF8(rlz_string), *i, &url);
1013         }
1014         break;
1015       }
1016 
1017       case GOOGLE_SEARCH_CLIENT: {
1018         DCHECK(!i->is_post_param);
1019         std::string client = search_terms_data.GetSearchClient();
1020         if (!client.empty())
1021           HandleReplacement("client", client, *i, &url);
1022         break;
1023       }
1024 
1025       case GOOGLE_SEARCH_FIELDTRIAL_GROUP:
1026         // We are not currently running any fieldtrials that modulate the search
1027         // url.  If we do, then we'd have some conditional insert such as:
1028         // url.insert(i->index, used_www ? "gcx=w&" : "gcx=c&");
1029         break;
1030 
1031       case GOOGLE_SESSION_TOKEN: {
1032         std::string token = search_terms_args.session_token;
1033         if (!token.empty())
1034           HandleReplacement("psi", token, *i, &url);
1035         break;
1036       }
1037 
1038       case GOOGLE_SUGGEST_CLIENT:
1039         HandleReplacement(
1040             std::string(), search_terms_data.GetSuggestClient(), *i, &url);
1041         break;
1042 
1043       case GOOGLE_SUGGEST_REQUEST_ID:
1044         HandleReplacement(
1045             std::string(), search_terms_data.GetSuggestRequestIdentifier(), *i,
1046             &url);
1047         break;
1048 
1049       case GOOGLE_UNESCAPED_SEARCH_TERMS: {
1050         std::string unescaped_terms;
1051         base::UTF16ToCodepage(search_terms_args.search_terms,
1052                               input_encoding.c_str(),
1053                               base::OnStringConversionError::SKIP,
1054                               &unescaped_terms);
1055         HandleReplacement(std::string(), unescaped_terms, *i, &url);
1056         break;
1057       }
1058 
1059       case LANGUAGE:
1060         HandleReplacement(
1061             std::string(), search_terms_data.GetApplicationLocale(), *i, &url);
1062         break;
1063 
1064       case SEARCH_TERMS:
1065         HandleReplacement(
1066             std::string(), base::UTF16ToUTF8(encoded_terms), *i, &url);
1067         break;
1068 
1069       case GOOGLE_IMAGE_THUMBNAIL:
1070         HandleReplacement(
1071             std::string(), search_terms_args.image_thumbnail_content, *i, &url);
1072         break;
1073 
1074       case GOOGLE_IMAGE_URL:
1075         if (search_terms_args.image_url.is_valid()) {
1076           HandleReplacement(
1077               std::string(), search_terms_args.image_url.spec(), *i, &url);
1078         }
1079         break;
1080 
1081       case GOOGLE_IMAGE_ORIGINAL_WIDTH:
1082         if (!search_terms_args.image_original_size.IsEmpty()) {
1083           HandleReplacement(
1084               std::string(),
1085               base::IntToString(search_terms_args.image_original_size.width()),
1086               *i, &url);
1087         }
1088         break;
1089 
1090       case GOOGLE_IMAGE_ORIGINAL_HEIGHT:
1091         if (!search_terms_args.image_original_size.IsEmpty()) {
1092           HandleReplacement(
1093               std::string(),
1094               base::IntToString(search_terms_args.image_original_size.height()),
1095               *i, &url);
1096         }
1097         break;
1098 
1099       case GOOGLE_IMAGE_SEARCH_SOURCE:
1100         HandleReplacement(
1101             std::string(), search_terms_data.GoogleImageSearchSource(), *i,
1102             &url);
1103         break;
1104 
1105       default:
1106         NOTREACHED();
1107         break;
1108     }
1109   }
1110 
1111   if (!post_params_.empty())
1112     EncodeFormData(post_params_, post_content);
1113 
1114   return url;
1115 }
1116 
1117 
1118 // TemplateURL ----------------------------------------------------------------
1119 
TemplateURL(const TemplateURLData & data)1120 TemplateURL::TemplateURL(const TemplateURLData& data)
1121     : data_(data),
1122       url_ref_(this, TemplateURLRef::SEARCH),
1123       suggestions_url_ref_(this,
1124                            TemplateURLRef::SUGGEST),
1125       instant_url_ref_(this,
1126                        TemplateURLRef::INSTANT),
1127       image_url_ref_(this, TemplateURLRef::IMAGE),
1128       new_tab_url_ref_(this, TemplateURLRef::NEW_TAB),
1129       contextual_search_url_ref_(this, TemplateURLRef::CONTEXTUAL_SEARCH) {
1130   SetPrepopulateId(data_.prepopulate_id);
1131 
1132   if (data_.search_terms_replacement_key ==
1133       kGoogleInstantExtendedEnabledKeyFull) {
1134     data_.search_terms_replacement_key = google_util::kInstantExtendedAPIParam;
1135   }
1136 }
1137 
~TemplateURL()1138 TemplateURL::~TemplateURL() {
1139 }
1140 
1141 // static
GenerateKeyword(const GURL & url)1142 base::string16 TemplateURL::GenerateKeyword(const GURL& url) {
1143   DCHECK(url.is_valid());
1144   // Strip "www." off the front of the keyword; otherwise the keyword won't work
1145   // properly.  See http://code.google.com/p/chromium/issues/detail?id=6984 .
1146   // Special case: if the host was exactly "www." (not sure this can happen but
1147   // perhaps with some weird intranet and custom DNS server?), ensure we at
1148   // least don't return the empty string.
1149   base::string16 keyword(net::StripWWWFromHost(url));
1150   return keyword.empty() ? base::ASCIIToUTF16("www") : keyword;
1151 }
1152 
1153 // static
GenerateFaviconURL(const GURL & url)1154 GURL TemplateURL::GenerateFaviconURL(const GURL& url) {
1155   DCHECK(url.is_valid());
1156   GURL::Replacements rep;
1157 
1158   const char favicon_path[] = "/favicon.ico";
1159   int favicon_path_len = arraysize(favicon_path) - 1;
1160 
1161   rep.SetPath(favicon_path, url::Component(0, favicon_path_len));
1162   rep.ClearUsername();
1163   rep.ClearPassword();
1164   rep.ClearQuery();
1165   rep.ClearRef();
1166   return url.ReplaceComponents(rep);
1167 }
1168 
1169 // static
MatchesData(const TemplateURL * t_url,const TemplateURLData * data,const SearchTermsData & search_terms_data)1170 bool TemplateURL::MatchesData(const TemplateURL* t_url,
1171                               const TemplateURLData* data,
1172                               const SearchTermsData& search_terms_data) {
1173   if (!t_url || !data)
1174     return !t_url && !data;
1175 
1176   return (t_url->short_name() == data->short_name) &&
1177       t_url->HasSameKeywordAs(*data, search_terms_data) &&
1178       (t_url->url() == data->url()) &&
1179       (t_url->suggestions_url() == data->suggestions_url) &&
1180       (t_url->instant_url() == data->instant_url) &&
1181       (t_url->image_url() == data->image_url) &&
1182       (t_url->new_tab_url() == data->new_tab_url) &&
1183       (t_url->search_url_post_params() == data->search_url_post_params) &&
1184       (t_url->suggestions_url_post_params() ==
1185           data->suggestions_url_post_params) &&
1186       (t_url->instant_url_post_params() == data->instant_url_post_params) &&
1187       (t_url->image_url_post_params() == data->image_url_post_params) &&
1188       (t_url->favicon_url() == data->favicon_url) &&
1189       (t_url->safe_for_autoreplace() == data->safe_for_autoreplace) &&
1190       (t_url->show_in_default_list() == data->show_in_default_list) &&
1191       (t_url->input_encodings() == data->input_encodings) &&
1192       (t_url->alternate_urls() == data->alternate_urls) &&
1193       (t_url->search_terms_replacement_key() ==
1194           data->search_terms_replacement_key);
1195 }
1196 
AdjustedShortNameForLocaleDirection() const1197 base::string16 TemplateURL::AdjustedShortNameForLocaleDirection() const {
1198   base::string16 bidi_safe_short_name = data_.short_name;
1199   base::i18n::AdjustStringForLocaleDirection(&bidi_safe_short_name);
1200   return bidi_safe_short_name;
1201 }
1202 
ShowInDefaultList(const SearchTermsData & search_terms_data) const1203 bool TemplateURL::ShowInDefaultList(
1204     const SearchTermsData& search_terms_data) const {
1205   return data_.show_in_default_list &&
1206       url_ref_.SupportsReplacement(search_terms_data);
1207 }
1208 
SupportsReplacement(const SearchTermsData & search_terms_data) const1209 bool TemplateURL::SupportsReplacement(
1210     const SearchTermsData& search_terms_data) const {
1211   return url_ref_.SupportsReplacement(search_terms_data);
1212 }
1213 
HasGoogleBaseURLs(const SearchTermsData & search_terms_data) const1214 bool TemplateURL::HasGoogleBaseURLs(
1215     const SearchTermsData& search_terms_data) const {
1216   return url_ref_.HasGoogleBaseURLs(search_terms_data) ||
1217       suggestions_url_ref_.HasGoogleBaseURLs(search_terms_data) ||
1218       instant_url_ref_.HasGoogleBaseURLs(search_terms_data) ||
1219       image_url_ref_.HasGoogleBaseURLs(search_terms_data) ||
1220       new_tab_url_ref_.HasGoogleBaseURLs(search_terms_data);
1221 }
1222 
IsGoogleSearchURLWithReplaceableKeyword(const SearchTermsData & search_terms_data) const1223 bool TemplateURL::IsGoogleSearchURLWithReplaceableKeyword(
1224     const SearchTermsData& search_terms_data) const {
1225   return (GetType() == NORMAL) &&
1226       url_ref_.HasGoogleBaseURLs(search_terms_data) &&
1227       google_util::IsGoogleHostname(base::UTF16ToUTF8(data_.keyword()),
1228                                     google_util::DISALLOW_SUBDOMAIN);
1229 }
1230 
HasSameKeywordAs(const TemplateURLData & other,const SearchTermsData & search_terms_data) const1231 bool TemplateURL::HasSameKeywordAs(
1232     const TemplateURLData& other,
1233     const SearchTermsData& search_terms_data) const {
1234   return (data_.keyword() == other.keyword()) ||
1235       (IsGoogleSearchURLWithReplaceableKeyword(search_terms_data) &&
1236        TemplateURL(other).IsGoogleSearchURLWithReplaceableKeyword(
1237            search_terms_data));
1238 }
1239 
GetType() const1240 TemplateURL::Type TemplateURL::GetType() const {
1241   if (extension_info_)
1242     return NORMAL_CONTROLLED_BY_EXTENSION;
1243   return GURL(data_.url()).SchemeIs(extensions::kExtensionScheme) ?
1244       OMNIBOX_API_EXTENSION : NORMAL;
1245 }
1246 
GetExtensionId() const1247 std::string TemplateURL::GetExtensionId() const {
1248   DCHECK_NE(NORMAL, GetType());
1249   return extension_info_ ?
1250       extension_info_->extension_id : GURL(data_.url()).host();
1251 }
1252 
URLCount() const1253 size_t TemplateURL::URLCount() const {
1254   // Add 1 for the regular search URL.
1255   return data_.alternate_urls.size() + 1;
1256 }
1257 
GetURL(size_t index) const1258 const std::string& TemplateURL::GetURL(size_t index) const {
1259   DCHECK_LT(index, URLCount());
1260 
1261   return (index < data_.alternate_urls.size()) ?
1262       data_.alternate_urls[index] : url();
1263 }
1264 
ExtractSearchTermsFromURL(const GURL & url,const SearchTermsData & search_terms_data,base::string16 * search_terms)1265 bool TemplateURL::ExtractSearchTermsFromURL(
1266     const GURL& url,
1267     const SearchTermsData& search_terms_data,
1268     base::string16* search_terms) {
1269   return FindSearchTermsInURL(url, search_terms_data, search_terms, NULL, NULL);
1270 }
1271 
IsSearchURL(const GURL & url,const SearchTermsData & search_terms_data)1272 bool TemplateURL::IsSearchURL(
1273     const GURL& url,
1274     const SearchTermsData& search_terms_data) {
1275   base::string16 search_terms;
1276   return ExtractSearchTermsFromURL(url, search_terms_data, &search_terms) &&
1277       !search_terms.empty();
1278 }
1279 
HasSearchTermsReplacementKey(const GURL & url) const1280 bool TemplateURL::HasSearchTermsReplacementKey(const GURL& url) const {
1281   // Look for the key both in the query and the ref.
1282   std::string params[] = {url.query(), url.ref()};
1283 
1284   for (int i = 0; i < 2; ++i) {
1285     url::Component query, key, value;
1286     query.len = static_cast<int>(params[i].size());
1287     while (url::ExtractQueryKeyValue(params[i].c_str(), &query, &key, &value)) {
1288       if (key.is_nonempty() &&
1289           params[i].substr(key.begin, key.len) ==
1290               search_terms_replacement_key()) {
1291         return true;
1292       }
1293     }
1294   }
1295   return false;
1296 }
1297 
ReplaceSearchTermsInURL(const GURL & url,const TemplateURLRef::SearchTermsArgs & search_terms_args,const SearchTermsData & search_terms_data,GURL * result)1298 bool TemplateURL::ReplaceSearchTermsInURL(
1299     const GURL& url,
1300     const TemplateURLRef::SearchTermsArgs& search_terms_args,
1301     const SearchTermsData& search_terms_data,
1302     GURL* result) {
1303   // TODO(beaudoin): Use AQS from |search_terms_args| too.
1304   url::Parsed::ComponentType search_term_component;
1305   url::Component search_terms_position;
1306   base::string16 search_terms;
1307   if (!FindSearchTermsInURL(url, search_terms_data, &search_terms,
1308                             &search_term_component, &search_terms_position)) {
1309     return false;
1310   }
1311   DCHECK(search_terms_position.is_nonempty());
1312 
1313   // FindSearchTermsInURL only returns true for search terms in the query or
1314   // ref, so we can call EncodeSearchTerm with |is_in_query| = true, since query
1315   // and ref are encoded in the same way.
1316   std::string input_encoding;
1317   base::string16 encoded_terms;
1318   base::string16 encoded_original_query;
1319   EncodeSearchTerms(search_terms_args, true, &input_encoding,
1320                     &encoded_terms, &encoded_original_query);
1321 
1322   std::string old_params(
1323       (search_term_component == url::Parsed::REF) ? url.ref() : url.query());
1324   std::string new_params(old_params, 0, search_terms_position.begin);
1325   new_params += base::UTF16ToUTF8(search_terms_args.search_terms);
1326   new_params += old_params.substr(search_terms_position.end());
1327   url::StdStringReplacements<std::string> replacements;
1328   if (search_term_component == url::Parsed::REF)
1329     replacements.SetRefStr(new_params);
1330   else
1331     replacements.SetQueryStr(new_params);
1332   *result = url.ReplaceComponents(replacements);
1333   return true;
1334 }
1335 
EncodeSearchTerms(const TemplateURLRef::SearchTermsArgs & search_terms_args,bool is_in_query,std::string * input_encoding,base::string16 * encoded_terms,base::string16 * encoded_original_query) const1336 void TemplateURL::EncodeSearchTerms(
1337     const TemplateURLRef::SearchTermsArgs& search_terms_args,
1338     bool is_in_query,
1339     std::string* input_encoding,
1340     base::string16* encoded_terms,
1341     base::string16* encoded_original_query) const {
1342 
1343   std::vector<std::string> encodings(input_encodings());
1344   if (std::find(encodings.begin(), encodings.end(), "UTF-8") == encodings.end())
1345     encodings.push_back("UTF-8");
1346   for (std::vector<std::string>::const_iterator i(encodings.begin());
1347        i != encodings.end(); ++i) {
1348     if (TryEncoding(search_terms_args.search_terms,
1349                     search_terms_args.original_query, i->c_str(),
1350                     is_in_query, encoded_terms, encoded_original_query)) {
1351       *input_encoding = *i;
1352       return;
1353     }
1354   }
1355   NOTREACHED();
1356 }
1357 
GenerateSearchURL(const SearchTermsData & search_terms_data) const1358 GURL TemplateURL::GenerateSearchURL(
1359     const SearchTermsData& search_terms_data) const {
1360   if (!url_ref_.IsValid(search_terms_data))
1361     return GURL();
1362 
1363   if (!url_ref_.SupportsReplacement(search_terms_data))
1364     return GURL(url());
1365 
1366   // Use something obscure for the search terms argument so that in the rare
1367   // case the term replaces the URL it's unlikely another keyword would have the
1368   // same url.
1369   // TODO(jnd): Add additional parameters to get post data when the search URL
1370   // has post parameters.
1371   return GURL(url_ref_.ReplaceSearchTerms(
1372       TemplateURLRef::SearchTermsArgs(
1373           base::ASCIIToUTF16("blah.blah.blah.blah.blah")),
1374       search_terms_data, NULL));
1375 }
1376 
CopyFrom(const TemplateURL & other)1377 void TemplateURL::CopyFrom(const TemplateURL& other) {
1378   if (this == &other)
1379     return;
1380 
1381   data_ = other.data_;
1382   url_ref_.InvalidateCachedValues();
1383   suggestions_url_ref_.InvalidateCachedValues();
1384   instant_url_ref_.InvalidateCachedValues();
1385   SetPrepopulateId(other.data_.prepopulate_id);
1386 }
1387 
SetURL(const std::string & url)1388 void TemplateURL::SetURL(const std::string& url) {
1389   data_.SetURL(url);
1390   url_ref_.InvalidateCachedValues();
1391 }
1392 
SetPrepopulateId(int id)1393 void TemplateURL::SetPrepopulateId(int id) {
1394   data_.prepopulate_id = id;
1395   const bool prepopulated = id > 0;
1396   url_ref_.prepopulated_ = prepopulated;
1397   suggestions_url_ref_.prepopulated_ = prepopulated;
1398   instant_url_ref_.prepopulated_ = prepopulated;
1399 }
1400 
ResetKeywordIfNecessary(const SearchTermsData & search_terms_data,bool force)1401 void TemplateURL::ResetKeywordIfNecessary(
1402     const SearchTermsData& search_terms_data,
1403     bool force) {
1404   if (IsGoogleSearchURLWithReplaceableKeyword(search_terms_data) || force) {
1405     DCHECK(GetType() != OMNIBOX_API_EXTENSION);
1406     GURL url(GenerateSearchURL(search_terms_data));
1407     if (url.is_valid())
1408       data_.SetKeyword(GenerateKeyword(url));
1409   }
1410 }
1411 
FindSearchTermsInURL(const GURL & url,const SearchTermsData & search_terms_data,base::string16 * search_terms,url::Parsed::ComponentType * search_term_component,url::Component * search_terms_position)1412 bool TemplateURL::FindSearchTermsInURL(
1413     const GURL& url,
1414     const SearchTermsData& search_terms_data,
1415     base::string16* search_terms,
1416     url::Parsed::ComponentType* search_term_component,
1417     url::Component* search_terms_position) {
1418   DCHECK(search_terms);
1419   search_terms->clear();
1420 
1421   // Try to match with every pattern.
1422   for (size_t i = 0; i < URLCount(); ++i) {
1423     TemplateURLRef ref(this, i);
1424     if (ref.ExtractSearchTermsFromURL(url, search_terms, search_terms_data,
1425         search_term_component, search_terms_position)) {
1426       // If ExtractSearchTermsFromURL() returns true and |search_terms| is empty
1427       // it means the pattern matched but no search terms were present. In this
1428       // case we fail immediately without looking for matches in subsequent
1429       // patterns. This means that given patterns
1430       //    [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ],
1431       // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would
1432       // return false. This is important for at least Google, where such URLs
1433       // are invalid.
1434       return !search_terms->empty();
1435     }
1436   }
1437   return false;
1438 }
1439