• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/search_engines/template_url.h"
6 
7 #include <string>
8 #include <vector>
9 
10 #include "base/basictypes.h"
11 #include "base/command_line.h"
12 #include "base/format_macros.h"
13 #include "base/i18n/icu_string_conversions.h"
14 #include "base/i18n/rtl.h"
15 #include "base/logging.h"
16 #include "base/metrics/field_trial.h"
17 #include "base/rand_util.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/strings/string_split.h"
20 #include "base/strings/string_util.h"
21 #include "base/strings/stringprintf.h"
22 #include "base/strings/utf_string_conversions.h"
23 #include "components/google/core/browser/google_util.h"
24 #include "components/metrics/proto/omnibox_input_type.pb.h"
25 #include "components/search_engines/search_engines_switches.h"
26 #include "components/search_engines/search_terms_data.h"
27 #include "google_apis/google_api_keys.h"
28 #include "net/base/escape.h"
29 #include "net/base/mime_util.h"
30 #include "net/base/net_util.h"
31 
32 namespace {
33 
34 // The TemplateURLRef has any number of terms that need to be replaced. Each of
35 // the terms is enclosed in braces. If the character preceeding the final
36 // brace is a ?, it indicates the term is optional and can be replaced with
37 // an empty string.
38 const char kStartParameter = '{';
39 const char kEndParameter = '}';
40 const char kOptional = '?';
41 
42 // Known parameters found in the URL.
43 const char kSearchTermsParameter[] = "searchTerms";
44 const char kSearchTermsParameterFull[] = "{searchTerms}";
45 const char kCountParameter[] = "count";
46 const char kStartIndexParameter[] = "startIndex";
47 const char kStartPageParameter[] = "startPage";
48 const char kLanguageParameter[] = "language";
49 const char kInputEncodingParameter[] = "inputEncoding";
50 const char kOutputEncodingParameter[] = "outputEncoding";
51 
52 const char kGoogleAssistedQueryStatsParameter[] = "google:assistedQueryStats";
53 
54 // Host/Domain Google searches are relative to.
55 const char kGoogleBaseURLParameter[] = "google:baseURL";
56 const char kGoogleBaseURLParameterFull[] = "{google:baseURL}";
57 
58 // Like google:baseURL, but for the Search Suggest capability.
59 const char kGoogleBaseSuggestURLParameter[] = "google:baseSuggestURL";
60 const char kGoogleBaseSuggestURLParameterFull[] = "{google:baseSuggestURL}";
61 const char kGoogleBookmarkBarPinnedParameter[] = "google:bookmarkBarPinned";
62 const char kGoogleContextualSearchContextData[] =
63     "google:contextualSearchContextData";
64 const char kGoogleContextualSearchVersion[] = "google:contextualSearchVersion";
65 const char kGoogleCurrentPageUrlParameter[] = "google:currentPageUrl";
66 const char kGoogleCursorPositionParameter[] = "google:cursorPosition";
67 const char kGoogleForceInstantResultsParameter[] = "google:forceInstantResults";
68 const char kGoogleImageSearchSource[] = "google:imageSearchSource";
69 const char kGoogleImageThumbnailParameter[] = "google:imageThumbnail";
70 const char kGoogleImageOriginalWidth[] = "google:imageOriginalWidth";
71 const char kGoogleImageOriginalHeight[] = "google:imageOriginalHeight";
72 const char kGoogleImageURLParameter[] = "google:imageURL";
73 const char kGoogleInputTypeParameter[] = "google:inputType";
74 const char kGoogleInstantExtendedEnabledParameter[] =
75     "google:instantExtendedEnabledParameter";
76 const char kGoogleInstantExtendedEnabledKey[] =
77     "google:instantExtendedEnabledKey";
78 const char kGoogleInstantExtendedEnabledKeyFull[] =
79     "{google:instantExtendedEnabledKey}";
80 const char kGoogleNTPIsThemedParameter[] = "google:ntpIsThemedParameter";
81 const char kGoogleOmniboxStartMarginParameter[] =
82     "google:omniboxStartMarginParameter";
83 const char kGoogleOriginalQueryForSuggestionParameter[] =
84     "google:originalQueryForSuggestion";
85 const char kGooglePageClassificationParameter[] = "google:pageClassification";
86 const char kGooglePrefetchQuery[] = "google:prefetchQuery";
87 const char kGoogleRLZParameter[] = "google:RLZ";
88 const char kGoogleSearchClient[] = "google:searchClient";
89 const char kGoogleSearchFieldtrialParameter[] =
90     "google:searchFieldtrialParameter";
91 const char kGoogleSearchVersion[] = "google:searchVersion";
92 const char kGoogleSessionToken[] = "google:sessionToken";
93 const char kGoogleSourceIdParameter[] = "google:sourceId";
94 const char kGoogleSuggestAPIKeyParameter[] = "google:suggestAPIKeyParameter";
95 const char kGoogleSuggestClient[] = "google:suggestClient";
96 const char kGoogleSuggestRequestId[] = "google:suggestRid";
97 
98 // Same as kSearchTermsParameter, with no escaping.
99 const char kGoogleUnescapedSearchTermsParameter[] =
100     "google:unescapedSearchTerms";
101 const char kGoogleUnescapedSearchTermsParameterFull[] =
102     "{google:unescapedSearchTerms}";
103 
104 // Display value for kSearchTermsParameter.
105 const char kDisplaySearchTerms[] = "%s";
106 
107 // Display value for kGoogleUnescapedSearchTermsParameter.
108 const char kDisplayUnescapedSearchTerms[] = "%S";
109 
110 // Used if the count parameter is not optional. Indicates we want 10 search
111 // results.
112 const char kDefaultCount[] = "10";
113 
114 // Used if the parameter kOutputEncodingParameter is required.
115 const char kOutputEncodingType[] = "UTF-8";
116 
117 // Attempts to encode |terms| and |original_query| in |encoding| and escape
118 // them.  |terms| may be escaped as path or query depending on |is_in_query|;
119 // |original_query| is always escaped as query.  Returns whether the encoding
120 // process succeeded.
TryEncoding(const base::string16 & terms,const base::string16 & original_query,const char * encoding,bool is_in_query,base::string16 * escaped_terms,base::string16 * escaped_original_query)121 bool TryEncoding(const base::string16& terms,
122                  const base::string16& original_query,
123                  const char* encoding,
124                  bool is_in_query,
125                  base::string16* escaped_terms,
126                  base::string16* escaped_original_query) {
127   DCHECK(escaped_terms);
128   DCHECK(escaped_original_query);
129   std::string encoded_terms;
130   if (!base::UTF16ToCodepage(terms, encoding,
131       base::OnStringConversionError::SKIP, &encoded_terms))
132     return false;
133   *escaped_terms = base::UTF8ToUTF16(is_in_query ?
134       net::EscapeQueryParamValue(encoded_terms, true) :
135       net::EscapePath(encoded_terms));
136   if (original_query.empty())
137     return true;
138   std::string encoded_original_query;
139   if (!base::UTF16ToCodepage(original_query, encoding,
140       base::OnStringConversionError::SKIP, &encoded_original_query))
141     return false;
142   *escaped_original_query = base::UTF8ToUTF16(
143       net::EscapeQueryParamValue(encoded_original_query, true));
144   return true;
145 }
146 
147 // Extract query key and host given a list of parameters coming from the URL
148 // query or ref.
FindSearchTermsKey(const std::string & params)149 std::string FindSearchTermsKey(const std::string& params) {
150   if (params.empty())
151     return std::string();
152   url::Component query, key, value;
153   query.len = static_cast<int>(params.size());
154   while (url::ExtractQueryKeyValue(params.c_str(), &query, &key, &value)) {
155     if (key.is_nonempty() && value.is_nonempty()) {
156       std::string value_string = params.substr(value.begin, value.len);
157       if (value_string.find(kSearchTermsParameterFull, 0) !=
158           std::string::npos ||
159           value_string.find(kGoogleUnescapedSearchTermsParameterFull, 0) !=
160           std::string::npos) {
161         return params.substr(key.begin, key.len);
162       }
163     }
164   }
165   return std::string();
166 }
167 
IsTemplateParameterString(const std::string & param)168 bool IsTemplateParameterString(const std::string& param) {
169   return (param.length() > 2) && (*(param.begin()) == kStartParameter) &&
170       (*(param.rbegin()) == kEndParameter);
171 }
172 
173 }  // namespace
174 
175 
176 // TemplateURLRef::SearchTermsArgs --------------------------------------------
177 
SearchTermsArgs(const base::string16 & search_terms)178 TemplateURLRef::SearchTermsArgs::SearchTermsArgs(
179     const base::string16& search_terms)
180     : search_terms(search_terms),
181       input_type(metrics::OmniboxInputType::INVALID),
182       accepted_suggestion(NO_SUGGESTIONS_AVAILABLE),
183       cursor_position(base::string16::npos),
184       enable_omnibox_start_margin(false),
185       page_classification(metrics::OmniboxEventProto::INVALID_SPEC),
186       bookmark_bar_pinned(false),
187       append_extra_query_params(false),
188       force_instant_results(false),
189       from_app_list(false),
190       contextual_search_params(ContextualSearchParams()) {
191 }
192 
~SearchTermsArgs()193 TemplateURLRef::SearchTermsArgs::~SearchTermsArgs() {
194 }
195 
196 TemplateURLRef::SearchTermsArgs::ContextualSearchParams::
ContextualSearchParams()197     ContextualSearchParams()
198     : version(-1),
199       start(base::string16::npos),
200       end(base::string16::npos),
201       resolve(true) {
202 }
203 
204 TemplateURLRef::SearchTermsArgs::ContextualSearchParams::
ContextualSearchParams(const int version,const std::string & selection,const std::string & base_page_url,const bool resolve)205     ContextualSearchParams(
206         const int version,
207         const std::string& selection,
208         const std::string& base_page_url,
209         const bool resolve)
210     : version(version),
211       start(base::string16::npos),
212       end(base::string16::npos),
213       selection(selection),
214       base_page_url(base_page_url),
215       resolve(resolve) {
216 }
217 
218 TemplateURLRef::SearchTermsArgs::ContextualSearchParams::
ContextualSearchParams(const int version,const size_t start,const size_t end,const std::string & selection,const std::string & content,const std::string & base_page_url,const std::string & encoding,const bool resolve)219     ContextualSearchParams(
220         const int version,
221         const size_t start,
222         const size_t end,
223         const std::string& selection,
224         const std::string& content,
225         const std::string& base_page_url,
226         const std::string& encoding,
227         const bool resolve)
228     : version(version),
229       start(start),
230       end(end),
231       selection(selection),
232       content(content),
233       base_page_url(base_page_url),
234       encoding(encoding),
235       resolve(resolve) {
236 }
237 
238 TemplateURLRef::SearchTermsArgs::ContextualSearchParams::
~ContextualSearchParams()239     ~ContextualSearchParams() {
240 }
241 
242 // TemplateURLRef -------------------------------------------------------------
243 
TemplateURLRef(TemplateURL * owner,Type type)244 TemplateURLRef::TemplateURLRef(TemplateURL* owner, Type type)
245     : owner_(owner),
246       type_(type),
247       index_in_owner_(0),
248       parsed_(false),
249       valid_(false),
250       supports_replacements_(false),
251       search_term_key_location_(url::Parsed::QUERY),
252       prepopulated_(false) {
253   DCHECK(owner_);
254   DCHECK_NE(INDEXED, type_);
255 }
256 
TemplateURLRef(TemplateURL * owner,size_t index_in_owner)257 TemplateURLRef::TemplateURLRef(TemplateURL* owner, size_t index_in_owner)
258     : owner_(owner),
259       type_(INDEXED),
260       index_in_owner_(index_in_owner),
261       parsed_(false),
262       valid_(false),
263       supports_replacements_(false),
264       search_term_key_location_(url::Parsed::QUERY),
265       prepopulated_(false) {
266   DCHECK(owner_);
267   DCHECK_LT(index_in_owner_, owner_->URLCount());
268 }
269 
~TemplateURLRef()270 TemplateURLRef::~TemplateURLRef() {
271 }
272 
GetURL() const273 std::string TemplateURLRef::GetURL() const {
274   switch (type_) {
275     case SEARCH:            return owner_->url();
276     case SUGGEST:           return owner_->suggestions_url();
277     case INSTANT:           return owner_->instant_url();
278     case IMAGE:             return owner_->image_url();
279     case NEW_TAB:           return owner_->new_tab_url();
280     case CONTEXTUAL_SEARCH: return owner_->contextual_search_url();
281     case INDEXED:           return owner_->GetURL(index_in_owner_);
282     default:       NOTREACHED(); return std::string();  // NOLINT
283   }
284 }
285 
GetPostParamsString() const286 std::string TemplateURLRef::GetPostParamsString() const {
287   switch (type_) {
288     case INDEXED:
289     case SEARCH:            return owner_->search_url_post_params();
290     case SUGGEST:           return owner_->suggestions_url_post_params();
291     case INSTANT:           return owner_->instant_url_post_params();
292     case NEW_TAB:           return std::string();
293     case CONTEXTUAL_SEARCH: return std::string();
294     case IMAGE:             return owner_->image_url_post_params();
295     default:      NOTREACHED(); return std::string();  // NOLINT
296   }
297 }
298 
UsesPOSTMethod(const SearchTermsData & search_terms_data) const299 bool TemplateURLRef::UsesPOSTMethod(
300     const SearchTermsData& search_terms_data) const {
301   ParseIfNecessary(search_terms_data);
302   return !post_params_.empty();
303 }
304 
EncodeFormData(const PostParams & post_params,PostContent * post_content) const305 bool TemplateURLRef::EncodeFormData(const PostParams& post_params,
306                                     PostContent* post_content) const {
307   if (post_params.empty())
308     return true;
309   if (!post_content)
310     return false;
311 
312   const char kUploadDataMIMEType[] = "multipart/form-data; boundary=";
313   const char kMultipartBoundary[] = "----+*+----%016" PRIx64 "----+*+----";
314   // Each name/value pair is stored in a body part which is preceded by a
315   // boundary delimiter line. Uses random number generator here to create
316   // a unique boundary delimiter for form data encoding.
317   std::string boundary = base::StringPrintf(kMultipartBoundary,
318                                             base::RandUint64());
319   // Sets the content MIME type.
320   post_content->first = kUploadDataMIMEType;
321   post_content->first += boundary;
322   // Encodes the post parameters.
323   std::string* post_data = &post_content->second;
324   post_data->clear();
325   for (PostParams::const_iterator param = post_params.begin();
326        param != post_params.end(); ++param) {
327     DCHECK(!param->first.empty());
328     net::AddMultipartValueForUpload(param->first, param->second, boundary,
329                                     std::string(), post_data);
330   }
331   net::AddMultipartFinalDelimiterForUpload(boundary, post_data);
332   return true;
333 }
334 
SupportsReplacement(const SearchTermsData & search_terms_data) const335 bool TemplateURLRef::SupportsReplacement(
336     const SearchTermsData& search_terms_data) const {
337   ParseIfNecessary(search_terms_data);
338   return valid_ && supports_replacements_;
339 }
340 
ReplaceSearchTerms(const SearchTermsArgs & search_terms_args,const SearchTermsData & search_terms_data,PostContent * post_content) const341 std::string TemplateURLRef::ReplaceSearchTerms(
342     const SearchTermsArgs& search_terms_args,
343     const SearchTermsData& search_terms_data,
344     PostContent* post_content) const {
345   ParseIfNecessary(search_terms_data);
346   if (!valid_)
347     return std::string();
348 
349   std::string url(HandleReplacements(search_terms_args, search_terms_data,
350                                      post_content));
351 
352   GURL gurl(url);
353   if (!gurl.is_valid())
354     return url;
355 
356   std::vector<std::string> query_params;
357   if (search_terms_args.append_extra_query_params) {
358     std::string extra_params(
359         CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
360             switches::kExtraSearchQueryParams));
361     if (!extra_params.empty())
362       query_params.push_back(extra_params);
363   }
364   if (!search_terms_args.suggest_query_params.empty())
365     query_params.push_back(search_terms_args.suggest_query_params);
366   if (!gurl.query().empty())
367     query_params.push_back(gurl.query());
368 
369   if (query_params.empty())
370     return url;
371 
372   GURL::Replacements replacements;
373   std::string query_str = JoinString(query_params, "&");
374   replacements.SetQueryStr(query_str);
375   return gurl.ReplaceComponents(replacements).possibly_invalid_spec();
376 }
377 
IsValid(const SearchTermsData & search_terms_data) const378 bool TemplateURLRef::IsValid(const SearchTermsData& search_terms_data) const {
379   ParseIfNecessary(search_terms_data);
380   return valid_;
381 }
382 
DisplayURL(const SearchTermsData & search_terms_data) const383 base::string16 TemplateURLRef::DisplayURL(
384     const SearchTermsData& search_terms_data) const {
385   ParseIfNecessary(search_terms_data);
386   base::string16 result(base::UTF8ToUTF16(GetURL()));
387   if (valid_ && !replacements_.empty()) {
388     ReplaceSubstringsAfterOffset(&result, 0,
389                                  base::ASCIIToUTF16(kSearchTermsParameterFull),
390                                  base::ASCIIToUTF16(kDisplaySearchTerms));
391     ReplaceSubstringsAfterOffset(&result, 0,
392         base::ASCIIToUTF16(kGoogleUnescapedSearchTermsParameterFull),
393         base::ASCIIToUTF16(kDisplayUnescapedSearchTerms));
394   }
395   return result;
396 }
397 
398 // static
DisplayURLToURLRef(const base::string16 & display_url)399 std::string TemplateURLRef::DisplayURLToURLRef(
400     const base::string16& display_url) {
401   base::string16 result = display_url;
402   ReplaceSubstringsAfterOffset(&result, 0,
403                                base::ASCIIToUTF16(kDisplaySearchTerms),
404                                base::ASCIIToUTF16(kSearchTermsParameterFull));
405   ReplaceSubstringsAfterOffset(
406       &result, 0,
407       base::ASCIIToUTF16(kDisplayUnescapedSearchTerms),
408       base::ASCIIToUTF16(kGoogleUnescapedSearchTermsParameterFull));
409   return base::UTF16ToUTF8(result);
410 }
411 
GetHost(const SearchTermsData & search_terms_data) const412 const std::string& TemplateURLRef::GetHost(
413     const SearchTermsData& search_terms_data) const {
414   ParseIfNecessary(search_terms_data);
415   return host_;
416 }
417 
GetPath(const SearchTermsData & search_terms_data) const418 const std::string& TemplateURLRef::GetPath(
419     const SearchTermsData& search_terms_data) const {
420   ParseIfNecessary(search_terms_data);
421   return path_;
422 }
423 
GetSearchTermKey(const SearchTermsData & search_terms_data) const424 const std::string& TemplateURLRef::GetSearchTermKey(
425     const SearchTermsData& search_terms_data) const {
426   ParseIfNecessary(search_terms_data);
427   return search_term_key_;
428 }
429 
SearchTermToString16(const std::string & term) const430 base::string16 TemplateURLRef::SearchTermToString16(
431     const std::string& term) const {
432   const std::vector<std::string>& encodings = owner_->input_encodings();
433   base::string16 result;
434 
435   std::string unescaped = net::UnescapeURLComponent(
436       term,
437       net::UnescapeRule::REPLACE_PLUS_WITH_SPACE |
438       net::UnescapeRule::URL_SPECIAL_CHARS);
439   for (size_t i = 0; i < encodings.size(); ++i) {
440     if (base::CodepageToUTF16(unescaped, encodings[i].c_str(),
441                               base::OnStringConversionError::FAIL, &result))
442       return result;
443   }
444 
445   // Always fall back on UTF-8 if it works.
446   if (base::CodepageToUTF16(unescaped, base::kCodepageUTF8,
447                             base::OnStringConversionError::FAIL, &result))
448     return result;
449 
450   // When nothing worked, just use the escaped text. We have no idea what the
451   // encoding is. We need to substitute spaces for pluses ourselves since we're
452   // not sending it through an unescaper.
453   result = base::UTF8ToUTF16(term);
454   std::replace(result.begin(), result.end(), '+', ' ');
455   return result;
456 }
457 
HasGoogleBaseURLs(const SearchTermsData & search_terms_data) const458 bool TemplateURLRef::HasGoogleBaseURLs(
459     const SearchTermsData& search_terms_data) const {
460   ParseIfNecessary(search_terms_data);
461   for (size_t i = 0; i < replacements_.size(); ++i) {
462     if ((replacements_[i].type == GOOGLE_BASE_URL) ||
463         (replacements_[i].type == GOOGLE_BASE_SUGGEST_URL))
464       return true;
465   }
466   return false;
467 }
468 
ExtractSearchTermsFromURL(const GURL & url,base::string16 * search_terms,const SearchTermsData & search_terms_data,url::Parsed::ComponentType * search_terms_component,url::Component * search_terms_position) const469 bool TemplateURLRef::ExtractSearchTermsFromURL(
470     const GURL& url,
471     base::string16* search_terms,
472     const SearchTermsData& search_terms_data,
473     url::Parsed::ComponentType* search_terms_component,
474     url::Component* search_terms_position) const {
475   DCHECK(search_terms);
476   search_terms->clear();
477 
478   ParseIfNecessary(search_terms_data);
479 
480   // We need a search term in the template URL to extract something.
481   if (search_term_key_.empty())
482     return false;
483 
484   // TODO(beaudoin): Support patterns of the form http://foo/{searchTerms}/
485   // See crbug.com/153798
486 
487   // Fill-in the replacements. We don't care about search terms in the pattern,
488   // so we use the empty string.
489   // Currently we assume the search term only shows in URL, not in post params.
490   GURL pattern(ReplaceSearchTerms(SearchTermsArgs(base::string16()),
491                                   search_terms_data, NULL));
492   // Host, path and port must match.
493   if (url.port() != pattern.port() ||
494       url.host() != host_ ||
495       url.path() != path_) {
496     return false;
497   }
498 
499   // Parameter must be present either in the query or the ref.
500   const std::string& params(
501       (search_term_key_location_ == url::Parsed::QUERY) ?
502           url.query() : url.ref());
503 
504   url::Component query, key, value;
505   query.len = static_cast<int>(params.size());
506   bool key_found = false;
507   while (url::ExtractQueryKeyValue(params.c_str(), &query, &key, &value)) {
508     if (key.is_nonempty()) {
509       if (params.substr(key.begin, key.len) == search_term_key_) {
510         // Fail if search term key is found twice.
511         if (key_found) {
512           search_terms->clear();
513           return false;
514         }
515         key_found = true;
516         // Extract the search term.
517         *search_terms = net::UnescapeAndDecodeUTF8URLComponent(
518             params.substr(value.begin, value.len),
519             net::UnescapeRule::SPACES |
520                 net::UnescapeRule::URL_SPECIAL_CHARS |
521                 net::UnescapeRule::REPLACE_PLUS_WITH_SPACE);
522         if (search_terms_component)
523           *search_terms_component = search_term_key_location_;
524         if (search_terms_position)
525           *search_terms_position = value;
526       }
527     }
528   }
529   return key_found;
530 }
531 
InvalidateCachedValues() const532 void TemplateURLRef::InvalidateCachedValues() const {
533   supports_replacements_ = valid_ = parsed_ = false;
534   host_.clear();
535   path_.clear();
536   search_term_key_.clear();
537   replacements_.clear();
538   post_params_.clear();
539 }
540 
ParseParameter(size_t start,size_t end,std::string * url,Replacements * replacements) const541 bool TemplateURLRef::ParseParameter(size_t start,
542                                     size_t end,
543                                     std::string* url,
544                                     Replacements* replacements) const {
545   DCHECK(start != std::string::npos &&
546          end != std::string::npos && end > start);
547   size_t length = end - start - 1;
548   bool optional = false;
549   if ((*url)[end - 1] == kOptional) {
550     optional = true;
551     length--;
552   }
553   std::string parameter(url->substr(start + 1, length));
554   std::string full_parameter(url->substr(start, end - start + 1));
555   // Remove the parameter from the string.  For parameters who replacement is
556   // constant and already known, just replace them directly.  For other cases,
557   // like parameters whose values may change over time, use |replacements|.
558   url->erase(start, end - start + 1);
559   if (parameter == kSearchTermsParameter) {
560     replacements->push_back(Replacement(SEARCH_TERMS, start));
561   } else if (parameter == kCountParameter) {
562     if (!optional)
563       url->insert(start, kDefaultCount);
564   } else if (parameter == kGoogleAssistedQueryStatsParameter) {
565     replacements->push_back(Replacement(GOOGLE_ASSISTED_QUERY_STATS, start));
566   } else if (parameter == kGoogleBaseURLParameter) {
567     replacements->push_back(Replacement(GOOGLE_BASE_URL, start));
568   } else if (parameter == kGoogleBaseSuggestURLParameter) {
569     replacements->push_back(Replacement(GOOGLE_BASE_SUGGEST_URL, start));
570   } else if (parameter == kGoogleBookmarkBarPinnedParameter) {
571     replacements->push_back(Replacement(GOOGLE_BOOKMARK_BAR_PINNED, start));
572   } else if (parameter == kGoogleCurrentPageUrlParameter) {
573     replacements->push_back(Replacement(GOOGLE_CURRENT_PAGE_URL, start));
574   } else if (parameter == kGoogleCursorPositionParameter) {
575     replacements->push_back(Replacement(GOOGLE_CURSOR_POSITION, start));
576   } else if (parameter == kGoogleForceInstantResultsParameter) {
577     replacements->push_back(Replacement(GOOGLE_FORCE_INSTANT_RESULTS, start));
578   } else if (parameter == kGoogleImageOriginalHeight) {
579     replacements->push_back(
580         Replacement(TemplateURLRef::GOOGLE_IMAGE_ORIGINAL_HEIGHT, start));
581   } else if (parameter == kGoogleImageOriginalWidth) {
582     replacements->push_back(
583         Replacement(TemplateURLRef::GOOGLE_IMAGE_ORIGINAL_WIDTH, start));
584   } else if (parameter == kGoogleImageSearchSource) {
585     replacements->push_back(
586         Replacement(TemplateURLRef::GOOGLE_IMAGE_SEARCH_SOURCE, start));
587   } else if (parameter == kGoogleImageThumbnailParameter) {
588     replacements->push_back(
589         Replacement(TemplateURLRef::GOOGLE_IMAGE_THUMBNAIL, start));
590   } else if (parameter == kGoogleImageURLParameter) {
591     replacements->push_back(Replacement(TemplateURLRef::GOOGLE_IMAGE_URL,
592                                         start));
593   } else if (parameter == kGoogleInputTypeParameter) {
594     replacements->push_back(Replacement(TemplateURLRef::GOOGLE_INPUT_TYPE,
595                                         start));
596   } else if (parameter == kGoogleInstantExtendedEnabledParameter) {
597     replacements->push_back(Replacement(GOOGLE_INSTANT_EXTENDED_ENABLED,
598                                         start));
599   } else if (parameter == kGoogleInstantExtendedEnabledKey) {
600     url->insert(start, google_util::kInstantExtendedAPIParam);
601   } else if (parameter == kGoogleNTPIsThemedParameter) {
602     replacements->push_back(Replacement(GOOGLE_NTP_IS_THEMED, start));
603   } else if (parameter == kGoogleOmniboxStartMarginParameter) {
604     replacements->push_back(Replacement(GOOGLE_OMNIBOX_START_MARGIN, start));
605   } else if (parameter == kGoogleContextualSearchVersion) {
606     replacements->push_back(
607         Replacement(GOOGLE_CONTEXTUAL_SEARCH_VERSION, start));
608   } else if (parameter == kGoogleContextualSearchContextData) {
609     replacements->push_back(
610         Replacement(GOOGLE_CONTEXTUAL_SEARCH_CONTEXT_DATA, start));
611   } else if (parameter == kGoogleOriginalQueryForSuggestionParameter) {
612     replacements->push_back(Replacement(GOOGLE_ORIGINAL_QUERY_FOR_SUGGESTION,
613                                         start));
614   } else if (parameter == kGooglePageClassificationParameter) {
615     replacements->push_back(Replacement(GOOGLE_PAGE_CLASSIFICATION, start));
616   } else if (parameter == kGooglePrefetchQuery) {
617     replacements->push_back(Replacement(GOOGLE_PREFETCH_QUERY, start));
618   } else if (parameter == kGoogleRLZParameter) {
619     replacements->push_back(Replacement(GOOGLE_RLZ, start));
620   } else if (parameter == kGoogleSearchClient) {
621     replacements->push_back(Replacement(GOOGLE_SEARCH_CLIENT, start));
622   } else if (parameter == kGoogleSearchFieldtrialParameter) {
623     replacements->push_back(Replacement(GOOGLE_SEARCH_FIELDTRIAL_GROUP, start));
624   } else if (parameter == kGoogleSearchVersion) {
625     replacements->push_back(Replacement(GOOGLE_SEARCH_VERSION, start));
626   } else if (parameter == kGoogleSessionToken) {
627     replacements->push_back(Replacement(GOOGLE_SESSION_TOKEN, start));
628   } else if (parameter == kGoogleSourceIdParameter) {
629 #if defined(OS_ANDROID)
630     url->insert(start, "sourceid=chrome-mobile&");
631 #else
632     url->insert(start, "sourceid=chrome&");
633 #endif
634   } else if (parameter == kGoogleSuggestAPIKeyParameter) {
635     url->insert(start,
636                 net::EscapeQueryParamValue(google_apis::GetAPIKey(), false));
637   } else if (parameter == kGoogleSuggestClient) {
638     replacements->push_back(Replacement(GOOGLE_SUGGEST_CLIENT, start));
639   } else if (parameter == kGoogleSuggestRequestId) {
640     replacements->push_back(Replacement(GOOGLE_SUGGEST_REQUEST_ID, start));
641   } else if (parameter == kGoogleUnescapedSearchTermsParameter) {
642     replacements->push_back(Replacement(GOOGLE_UNESCAPED_SEARCH_TERMS, start));
643   } else if (parameter == kInputEncodingParameter) {
644     replacements->push_back(Replacement(ENCODING, start));
645   } else if (parameter == kLanguageParameter) {
646     replacements->push_back(Replacement(LANGUAGE, start));
647   } else if (parameter == kOutputEncodingParameter) {
648     if (!optional)
649       url->insert(start, kOutputEncodingType);
650   } else if ((parameter == kStartIndexParameter) ||
651              (parameter == kStartPageParameter)) {
652     // We don't support these.
653     if (!optional)
654       url->insert(start, "1");
655   } else if (!prepopulated_) {
656     // If it's a prepopulated URL, we know that it's safe to remove unknown
657     // parameters, so just ignore this and return true below. Otherwise it could
658     // be some garbage but can also be a javascript block. Put it back.
659     url->insert(start, full_parameter);
660     return false;
661   }
662   return true;
663 }
664 
ParseURL(const std::string & url,Replacements * replacements,PostParams * post_params,bool * valid) const665 std::string TemplateURLRef::ParseURL(const std::string& url,
666                                      Replacements* replacements,
667                                      PostParams* post_params,
668                                      bool* valid) const {
669   *valid = false;
670   std::string parsed_url = url;
671   for (size_t last = 0; last != std::string::npos; ) {
672     last = parsed_url.find(kStartParameter, last);
673     if (last != std::string::npos) {
674       size_t template_end = parsed_url.find(kEndParameter, last);
675       if (template_end != std::string::npos) {
676         // Since we allow Javascript in the URL, {} pairs could be nested. Match
677         // only leaf pairs with supported parameters.
678         size_t next_template_start = parsed_url.find(kStartParameter, last + 1);
679         if (next_template_start == std::string::npos ||
680             next_template_start > template_end) {
681           // If successful, ParseParameter erases from the string as such no
682           // need to update |last|. If failed, move |last| to the end of pair.
683           if (!ParseParameter(last, template_end, &parsed_url, replacements)) {
684             // |template_end| + 1 may be beyond the end of the string.
685             last = template_end;
686           }
687         } else {
688           last = next_template_start;
689         }
690       } else {
691         // Open brace without a closing brace, return.
692         return std::string();
693       }
694     }
695   }
696 
697   // Handles the post parameters.
698   const std::string& post_params_string = GetPostParamsString();
699   if (!post_params_string.empty()) {
700     typedef std::vector<std::string> Strings;
701     Strings param_list;
702     base::SplitString(post_params_string, ',', &param_list);
703 
704     for (Strings::const_iterator iterator = param_list.begin();
705          iterator != param_list.end(); ++iterator) {
706       Strings parts;
707       // The '=' delimiter is required and the name must be not empty.
708       base::SplitString(*iterator, '=', &parts);
709       if ((parts.size() != 2U) || parts[0].empty())
710         return std::string();
711 
712       std::string& value = parts[1];
713       size_t replacements_size = replacements->size();
714       if (IsTemplateParameterString(value))
715         ParseParameter(0, value.length() - 1, &value, replacements);
716       post_params->push_back(std::make_pair(parts[0], value));
717       // If there was a replacement added, points its index to last added
718       // PostParam.
719       if (replacements->size() > replacements_size) {
720         DCHECK_EQ(replacements_size + 1, replacements->size());
721         Replacement* r = &replacements->back();
722         r->is_post_param = true;
723         r->index = post_params->size() - 1;
724       }
725     }
726     DCHECK(!post_params->empty());
727   }
728 
729   *valid = true;
730   return parsed_url;
731 }
732 
ParseIfNecessary(const SearchTermsData & search_terms_data) const733 void TemplateURLRef::ParseIfNecessary(
734     const SearchTermsData& search_terms_data) const {
735   if (!parsed_) {
736     InvalidateCachedValues();
737     parsed_ = true;
738     parsed_url_ = ParseURL(GetURL(), &replacements_, &post_params_, &valid_);
739     supports_replacements_ = false;
740     if (valid_) {
741       bool has_only_one_search_term = false;
742       for (Replacements::const_iterator i = replacements_.begin();
743            i != replacements_.end(); ++i) {
744         if ((i->type == SEARCH_TERMS) ||
745             (i->type == GOOGLE_UNESCAPED_SEARCH_TERMS)) {
746           if (has_only_one_search_term) {
747             has_only_one_search_term = false;
748             break;
749           }
750           has_only_one_search_term = true;
751           supports_replacements_ = true;
752         }
753       }
754       // Only parse the host/key if there is one search term. Technically there
755       // could be more than one term, but it's uncommon; so we punt.
756       if (has_only_one_search_term)
757         ParseHostAndSearchTermKey(search_terms_data);
758     }
759   }
760 }
761 
ParseHostAndSearchTermKey(const SearchTermsData & search_terms_data) const762 void TemplateURLRef::ParseHostAndSearchTermKey(
763     const SearchTermsData& search_terms_data) const {
764   std::string url_string(GetURL());
765   ReplaceSubstringsAfterOffset(&url_string, 0,
766                                kGoogleBaseURLParameterFull,
767                                search_terms_data.GoogleBaseURLValue());
768   ReplaceSubstringsAfterOffset(&url_string, 0,
769                                kGoogleBaseSuggestURLParameterFull,
770                                search_terms_data.GoogleBaseSuggestURLValue());
771 
772   search_term_key_.clear();
773   host_.clear();
774   path_.clear();
775   search_term_key_location_ = url::Parsed::REF;
776 
777   GURL url(url_string);
778   if (!url.is_valid())
779     return;
780 
781   std::string query_key = FindSearchTermsKey(url.query());
782   std::string ref_key = FindSearchTermsKey(url.ref());
783   if (query_key.empty() == ref_key.empty())
784     return;  // No key or multiple keys found.  We only handle having one key.
785   search_term_key_ = query_key.empty() ? ref_key : query_key;
786   search_term_key_location_ =
787       query_key.empty() ? url::Parsed::REF : url::Parsed::QUERY;
788   host_ = url.host();
789   path_ = url.path();
790 }
791 
HandleReplacement(const std::string & name,const std::string & value,const Replacement & replacement,std::string * url) const792 void TemplateURLRef::HandleReplacement(const std::string& name,
793                                        const std::string& value,
794                                        const Replacement& replacement,
795                                        std::string* url) const {
796   size_t pos = replacement.index;
797   if (replacement.is_post_param) {
798     DCHECK_LT(pos, post_params_.size());
799     DCHECK(!post_params_[pos].first.empty());
800     post_params_[pos].second = value;
801   } else {
802     url->insert(pos, name.empty() ? value : (name + "=" + value + "&"));
803   }
804 }
805 
HandleReplacements(const SearchTermsArgs & search_terms_args,const SearchTermsData & search_terms_data,PostContent * post_content) const806 std::string TemplateURLRef::HandleReplacements(
807     const SearchTermsArgs& search_terms_args,
808     const SearchTermsData& search_terms_data,
809     PostContent* post_content) const {
810   if (replacements_.empty()) {
811     if (!post_params_.empty())
812       EncodeFormData(post_params_, post_content);
813     return parsed_url_;
814   }
815 
816   // Determine if the search terms are in the query or before. We're escaping
817   // space as '+' in the former case and as '%20' in the latter case.
818   bool is_in_query = true;
819   for (Replacements::iterator i = replacements_.begin();
820        i != replacements_.end(); ++i) {
821     if (i->type == SEARCH_TERMS) {
822       base::string16::size_type query_start = parsed_url_.find('?');
823       is_in_query = query_start != base::string16::npos &&
824           (static_cast<base::string16::size_type>(i->index) > query_start);
825       break;
826     }
827   }
828 
829   std::string input_encoding;
830   base::string16 encoded_terms;
831   base::string16 encoded_original_query;
832   owner_->EncodeSearchTerms(search_terms_args, is_in_query, &input_encoding,
833                             &encoded_terms, &encoded_original_query);
834 
835   std::string url = parsed_url_;
836 
837   // replacements_ is ordered in ascending order, as such we need to iterate
838   // from the back.
839   for (Replacements::reverse_iterator i = replacements_.rbegin();
840        i != replacements_.rend(); ++i) {
841     switch (i->type) {
842       case ENCODING:
843         HandleReplacement(std::string(), input_encoding, *i, &url);
844         break;
845 
846       case GOOGLE_ASSISTED_QUERY_STATS:
847         DCHECK(!i->is_post_param);
848         if (!search_terms_args.assisted_query_stats.empty()) {
849           // Get the base URL without substituting AQS to avoid infinite
850           // recursion.  We need the URL to find out if it meets all
851           // AQS requirements (e.g. HTTPS protocol check).
852           // See TemplateURLRef::SearchTermsArgs for more details.
853           SearchTermsArgs search_terms_args_without_aqs(search_terms_args);
854           search_terms_args_without_aqs.assisted_query_stats.clear();
855           GURL base_url(ReplaceSearchTerms(
856               search_terms_args_without_aqs, search_terms_data, NULL));
857           if (base_url.SchemeIs(url::kHttpsScheme)) {
858             HandleReplacement(
859                 "aqs", search_terms_args.assisted_query_stats, *i, &url);
860           }
861         }
862         break;
863 
864       case GOOGLE_BASE_URL:
865         DCHECK(!i->is_post_param);
866         HandleReplacement(
867             std::string(), search_terms_data.GoogleBaseURLValue(), *i, &url);
868         break;
869 
870       case GOOGLE_BASE_SUGGEST_URL:
871         DCHECK(!i->is_post_param);
872         HandleReplacement(
873             std::string(), search_terms_data.GoogleBaseSuggestURLValue(), *i,
874             &url);
875         break;
876 
877       case GOOGLE_BOOKMARK_BAR_PINNED:
878         if (search_terms_data.IsShowingSearchTermsOnSearchResultsPages()) {
879           // Log whether the bookmark bar is pinned when the user is seeing
880           // InstantExtended on the SRP.
881           DCHECK(!i->is_post_param);
882           HandleReplacement(
883               "bmbp", search_terms_args.bookmark_bar_pinned ? "1" : "0", *i,
884               &url);
885         }
886         break;
887 
888       case GOOGLE_CURRENT_PAGE_URL:
889         DCHECK(!i->is_post_param);
890         if (!search_terms_args.current_page_url.empty()) {
891           const std::string& escaped_current_page_url =
892               net::EscapeQueryParamValue(search_terms_args.current_page_url,
893                                          true);
894           HandleReplacement("url", escaped_current_page_url, *i, &url);
895         }
896         break;
897 
898       case GOOGLE_CURSOR_POSITION:
899         DCHECK(!i->is_post_param);
900         if (search_terms_args.cursor_position != base::string16::npos)
901           HandleReplacement(
902               "cp",
903               base::StringPrintf("%" PRIuS, search_terms_args.cursor_position),
904               *i,
905               &url);
906         break;
907 
908       case GOOGLE_FORCE_INSTANT_RESULTS:
909         DCHECK(!i->is_post_param);
910         HandleReplacement(std::string(),
911                           search_terms_data.ForceInstantResultsParam(
912                               search_terms_args.force_instant_results),
913                           *i,
914                           &url);
915         break;
916 
917       case GOOGLE_INPUT_TYPE:
918         DCHECK(!i->is_post_param);
919         HandleReplacement(
920             "oit", base::IntToString(search_terms_args.input_type), *i, &url);
921         break;
922 
923       case GOOGLE_INSTANT_EXTENDED_ENABLED:
924         DCHECK(!i->is_post_param);
925         HandleReplacement(std::string(),
926                           search_terms_data.InstantExtendedEnabledParam(
927                               type_ == SEARCH),
928                           *i,
929                           &url);
930         break;
931 
932       case GOOGLE_NTP_IS_THEMED:
933         DCHECK(!i->is_post_param);
934         HandleReplacement(
935             std::string(), search_terms_data.NTPIsThemedParam(), *i, &url);
936         break;
937 
938       case GOOGLE_OMNIBOX_START_MARGIN:
939         DCHECK(!i->is_post_param);
940         if (search_terms_args.enable_omnibox_start_margin) {
941           int omnibox_start_margin = search_terms_data.OmniboxStartMargin();
942           if (omnibox_start_margin >= 0) {
943             HandleReplacement("es_sm", base::IntToString(omnibox_start_margin),
944                               *i, &url);
945           }
946         }
947         break;
948 
949       case GOOGLE_CONTEXTUAL_SEARCH_VERSION:
950         if (search_terms_args.contextual_search_params.version >= 0) {
951           HandleReplacement(
952               "ctxs",
953               base::IntToString(
954                   search_terms_args.contextual_search_params.version),
955               *i,
956               &url);
957         }
958         break;
959 
960       case GOOGLE_CONTEXTUAL_SEARCH_CONTEXT_DATA: {
961         DCHECK(!i->is_post_param);
962         std::string context_data;
963 
964         const SearchTermsArgs::ContextualSearchParams& params =
965             search_terms_args.contextual_search_params;
966 
967         if (params.start != std::string::npos) {
968           context_data.append("ctxs_start=" + base::IntToString(
969               params.start) + "&");
970         }
971 
972         if (params.end != std::string::npos) {
973           context_data.append("ctxs_end=" + base::IntToString(
974               params.end) + "&");
975         }
976 
977         if (!params.selection.empty())
978           context_data.append("q=" + params.selection + "&");
979 
980         if (!params.content.empty())
981           context_data.append("ctxs_content=" + params.content + "&");
982 
983         if (!params.base_page_url.empty())
984           context_data.append("ctxsl_url=" + params.base_page_url + "&");
985 
986         if (!params.encoding.empty()) {
987           context_data.append("ctxs_encoding=" + params.encoding + "&");
988         }
989 
990         context_data.append(
991             params.resolve ? "ctxsl_resolve=1" : "ctxsl_resolve=0");
992 
993         HandleReplacement(std::string(), context_data, *i, &url);
994         break;
995       }
996 
997       case GOOGLE_ORIGINAL_QUERY_FOR_SUGGESTION:
998         DCHECK(!i->is_post_param);
999         if (search_terms_args.accepted_suggestion >= 0 ||
1000             !search_terms_args.assisted_query_stats.empty()) {
1001           HandleReplacement(
1002               "oq", base::UTF16ToUTF8(encoded_original_query), *i, &url);
1003         }
1004         break;
1005 
1006       case GOOGLE_PAGE_CLASSIFICATION:
1007         if (search_terms_args.page_classification !=
1008             metrics::OmniboxEventProto::INVALID_SPEC) {
1009           HandleReplacement(
1010               "pgcl", base::IntToString(search_terms_args.page_classification),
1011               *i, &url);
1012         }
1013         break;
1014 
1015       case GOOGLE_PREFETCH_QUERY: {
1016         const std::string& query = search_terms_args.prefetch_query;
1017         const std::string& type = search_terms_args.prefetch_query_type;
1018         if (!query.empty() && !type.empty()) {
1019           HandleReplacement(
1020               std::string(), "pfq=" + query + "&qha=" + type + "&", *i, &url);
1021         }
1022         break;
1023       }
1024 
1025       case GOOGLE_RLZ: {
1026         DCHECK(!i->is_post_param);
1027         // On platforms that don't have RLZ, we still want this branch
1028         // to happen so that we replace the RLZ template with the
1029         // empty string.  (If we don't handle this case, we hit a
1030         // NOTREACHED below.)
1031         base::string16 rlz_string = search_terms_data.GetRlzParameterValue(
1032             search_terms_args.from_app_list);
1033         if (!rlz_string.empty()) {
1034           HandleReplacement("rlz", base::UTF16ToUTF8(rlz_string), *i, &url);
1035         }
1036         break;
1037       }
1038 
1039       case GOOGLE_SEARCH_CLIENT: {
1040         DCHECK(!i->is_post_param);
1041         std::string client = search_terms_data.GetSearchClient();
1042         if (!client.empty())
1043           HandleReplacement("client", client, *i, &url);
1044         break;
1045       }
1046 
1047       case GOOGLE_SEARCH_FIELDTRIAL_GROUP:
1048         // We are not currently running any fieldtrials that modulate the search
1049         // url.  If we do, then we'd have some conditional insert such as:
1050         // url.insert(i->index, used_www ? "gcx=w&" : "gcx=c&");
1051         break;
1052 
1053       case GOOGLE_SEARCH_VERSION:
1054         if (search_terms_data.EnableAnswersInSuggest())
1055           HandleReplacement("gs_rn", "42", *i, &url);
1056         break;
1057 
1058       case GOOGLE_SESSION_TOKEN: {
1059         std::string token = search_terms_args.session_token;
1060         if (!token.empty())
1061           HandleReplacement("psi", token, *i, &url);
1062         break;
1063       }
1064 
1065       case GOOGLE_SUGGEST_CLIENT:
1066         HandleReplacement(
1067             std::string(), search_terms_data.GetSuggestClient(), *i, &url);
1068         break;
1069 
1070       case GOOGLE_SUGGEST_REQUEST_ID:
1071         HandleReplacement(
1072             std::string(), search_terms_data.GetSuggestRequestIdentifier(), *i,
1073             &url);
1074         break;
1075 
1076       case GOOGLE_UNESCAPED_SEARCH_TERMS: {
1077         std::string unescaped_terms;
1078         base::UTF16ToCodepage(search_terms_args.search_terms,
1079                               input_encoding.c_str(),
1080                               base::OnStringConversionError::SKIP,
1081                               &unescaped_terms);
1082         HandleReplacement(std::string(), unescaped_terms, *i, &url);
1083         break;
1084       }
1085 
1086       case LANGUAGE:
1087         HandleReplacement(
1088             std::string(), search_terms_data.GetApplicationLocale(), *i, &url);
1089         break;
1090 
1091       case SEARCH_TERMS:
1092         HandleReplacement(
1093             std::string(), base::UTF16ToUTF8(encoded_terms), *i, &url);
1094         break;
1095 
1096       case GOOGLE_IMAGE_THUMBNAIL:
1097         HandleReplacement(
1098             std::string(), search_terms_args.image_thumbnail_content, *i, &url);
1099         break;
1100 
1101       case GOOGLE_IMAGE_URL:
1102         if (search_terms_args.image_url.is_valid()) {
1103           HandleReplacement(
1104               std::string(), search_terms_args.image_url.spec(), *i, &url);
1105         }
1106         break;
1107 
1108       case GOOGLE_IMAGE_ORIGINAL_WIDTH:
1109         if (!search_terms_args.image_original_size.IsEmpty()) {
1110           HandleReplacement(
1111               std::string(),
1112               base::IntToString(search_terms_args.image_original_size.width()),
1113               *i, &url);
1114         }
1115         break;
1116 
1117       case GOOGLE_IMAGE_ORIGINAL_HEIGHT:
1118         if (!search_terms_args.image_original_size.IsEmpty()) {
1119           HandleReplacement(
1120               std::string(),
1121               base::IntToString(search_terms_args.image_original_size.height()),
1122               *i, &url);
1123         }
1124         break;
1125 
1126       case GOOGLE_IMAGE_SEARCH_SOURCE:
1127         HandleReplacement(
1128             std::string(), search_terms_data.GoogleImageSearchSource(), *i,
1129             &url);
1130         break;
1131 
1132       default:
1133         NOTREACHED();
1134         break;
1135     }
1136   }
1137 
1138   if (!post_params_.empty())
1139     EncodeFormData(post_params_, post_content);
1140 
1141   return url;
1142 }
1143 
1144 
1145 // TemplateURL ----------------------------------------------------------------
1146 
AssociatedExtensionInfo(Type type,const std::string & extension_id)1147 TemplateURL::AssociatedExtensionInfo::AssociatedExtensionInfo(
1148     Type type,
1149     const std::string& extension_id)
1150     : type(type),
1151       extension_id(extension_id),
1152       wants_to_be_default_engine(false) {
1153   DCHECK_NE(NORMAL, type);
1154 }
1155 
~AssociatedExtensionInfo()1156 TemplateURL::AssociatedExtensionInfo::~AssociatedExtensionInfo() {
1157 }
1158 
TemplateURL(const TemplateURLData & data)1159 TemplateURL::TemplateURL(const TemplateURLData& data)
1160     : data_(data),
1161       url_ref_(this, TemplateURLRef::SEARCH),
1162       suggestions_url_ref_(this,
1163                            TemplateURLRef::SUGGEST),
1164       instant_url_ref_(this,
1165                        TemplateURLRef::INSTANT),
1166       image_url_ref_(this, TemplateURLRef::IMAGE),
1167       new_tab_url_ref_(this, TemplateURLRef::NEW_TAB),
1168       contextual_search_url_ref_(this, TemplateURLRef::CONTEXTUAL_SEARCH) {
1169   SetPrepopulateId(data_.prepopulate_id);
1170 
1171   if (data_.search_terms_replacement_key ==
1172       kGoogleInstantExtendedEnabledKeyFull) {
1173     data_.search_terms_replacement_key = google_util::kInstantExtendedAPIParam;
1174   }
1175 }
1176 
~TemplateURL()1177 TemplateURL::~TemplateURL() {
1178 }
1179 
1180 // static
GenerateKeyword(const GURL & url)1181 base::string16 TemplateURL::GenerateKeyword(const GURL& url) {
1182   DCHECK(url.is_valid());
1183   // Strip "www." off the front of the keyword; otherwise the keyword won't work
1184   // properly.  See http://code.google.com/p/chromium/issues/detail?id=6984 .
1185   // Special case: if the host was exactly "www." (not sure this can happen but
1186   // perhaps with some weird intranet and custom DNS server?), ensure we at
1187   // least don't return the empty string.
1188   base::string16 keyword(net::StripWWWFromHost(url));
1189   return keyword.empty() ? base::ASCIIToUTF16("www") : keyword;
1190 }
1191 
1192 // static
GenerateFaviconURL(const GURL & url)1193 GURL TemplateURL::GenerateFaviconURL(const GURL& url) {
1194   DCHECK(url.is_valid());
1195   GURL::Replacements rep;
1196 
1197   const char favicon_path[] = "/favicon.ico";
1198   int favicon_path_len = arraysize(favicon_path) - 1;
1199 
1200   rep.SetPath(favicon_path, url::Component(0, favicon_path_len));
1201   rep.ClearUsername();
1202   rep.ClearPassword();
1203   rep.ClearQuery();
1204   rep.ClearRef();
1205   return url.ReplaceComponents(rep);
1206 }
1207 
1208 // static
MatchesData(const TemplateURL * t_url,const TemplateURLData * data,const SearchTermsData & search_terms_data)1209 bool TemplateURL::MatchesData(const TemplateURL* t_url,
1210                               const TemplateURLData* data,
1211                               const SearchTermsData& search_terms_data) {
1212   if (!t_url || !data)
1213     return !t_url && !data;
1214 
1215   return (t_url->short_name() == data->short_name) &&
1216       t_url->HasSameKeywordAs(*data, search_terms_data) &&
1217       (t_url->url() == data->url()) &&
1218       (t_url->suggestions_url() == data->suggestions_url) &&
1219       (t_url->instant_url() == data->instant_url) &&
1220       (t_url->image_url() == data->image_url) &&
1221       (t_url->new_tab_url() == data->new_tab_url) &&
1222       (t_url->search_url_post_params() == data->search_url_post_params) &&
1223       (t_url->suggestions_url_post_params() ==
1224           data->suggestions_url_post_params) &&
1225       (t_url->instant_url_post_params() == data->instant_url_post_params) &&
1226       (t_url->image_url_post_params() == data->image_url_post_params) &&
1227       (t_url->favicon_url() == data->favicon_url) &&
1228       (t_url->safe_for_autoreplace() == data->safe_for_autoreplace) &&
1229       (t_url->show_in_default_list() == data->show_in_default_list) &&
1230       (t_url->input_encodings() == data->input_encodings) &&
1231       (t_url->alternate_urls() == data->alternate_urls) &&
1232       (t_url->search_terms_replacement_key() ==
1233           data->search_terms_replacement_key);
1234 }
1235 
AdjustedShortNameForLocaleDirection() const1236 base::string16 TemplateURL::AdjustedShortNameForLocaleDirection() const {
1237   base::string16 bidi_safe_short_name = data_.short_name;
1238   base::i18n::AdjustStringForLocaleDirection(&bidi_safe_short_name);
1239   return bidi_safe_short_name;
1240 }
1241 
ShowInDefaultList(const SearchTermsData & search_terms_data) const1242 bool TemplateURL::ShowInDefaultList(
1243     const SearchTermsData& search_terms_data) const {
1244   return data_.show_in_default_list &&
1245       url_ref_.SupportsReplacement(search_terms_data);
1246 }
1247 
SupportsReplacement(const SearchTermsData & search_terms_data) const1248 bool TemplateURL::SupportsReplacement(
1249     const SearchTermsData& search_terms_data) const {
1250   return url_ref_.SupportsReplacement(search_terms_data);
1251 }
1252 
HasGoogleBaseURLs(const SearchTermsData & search_terms_data) const1253 bool TemplateURL::HasGoogleBaseURLs(
1254     const SearchTermsData& search_terms_data) const {
1255   return url_ref_.HasGoogleBaseURLs(search_terms_data) ||
1256       suggestions_url_ref_.HasGoogleBaseURLs(search_terms_data) ||
1257       instant_url_ref_.HasGoogleBaseURLs(search_terms_data) ||
1258       image_url_ref_.HasGoogleBaseURLs(search_terms_data) ||
1259       new_tab_url_ref_.HasGoogleBaseURLs(search_terms_data);
1260 }
1261 
IsGoogleSearchURLWithReplaceableKeyword(const SearchTermsData & search_terms_data) const1262 bool TemplateURL::IsGoogleSearchURLWithReplaceableKeyword(
1263     const SearchTermsData& search_terms_data) const {
1264   return (GetType() == NORMAL) &&
1265       url_ref_.HasGoogleBaseURLs(search_terms_data) &&
1266       google_util::IsGoogleHostname(base::UTF16ToUTF8(data_.keyword()),
1267                                     google_util::DISALLOW_SUBDOMAIN);
1268 }
1269 
HasSameKeywordAs(const TemplateURLData & other,const SearchTermsData & search_terms_data) const1270 bool TemplateURL::HasSameKeywordAs(
1271     const TemplateURLData& other,
1272     const SearchTermsData& search_terms_data) const {
1273   return (data_.keyword() == other.keyword()) ||
1274       (IsGoogleSearchURLWithReplaceableKeyword(search_terms_data) &&
1275        TemplateURL(other).IsGoogleSearchURLWithReplaceableKeyword(
1276            search_terms_data));
1277 }
1278 
GetType() const1279 TemplateURL::Type TemplateURL::GetType() const {
1280   return extension_info_ ? extension_info_->type : NORMAL;
1281 }
1282 
GetExtensionId() const1283 std::string TemplateURL::GetExtensionId() const {
1284   DCHECK(extension_info_);
1285   return extension_info_->extension_id;
1286 }
1287 
URLCount() const1288 size_t TemplateURL::URLCount() const {
1289   // Add 1 for the regular search URL.
1290   return data_.alternate_urls.size() + 1;
1291 }
1292 
GetURL(size_t index) const1293 const std::string& TemplateURL::GetURL(size_t index) const {
1294   DCHECK_LT(index, URLCount());
1295 
1296   return (index < data_.alternate_urls.size()) ?
1297       data_.alternate_urls[index] : url();
1298 }
1299 
ExtractSearchTermsFromURL(const GURL & url,const SearchTermsData & search_terms_data,base::string16 * search_terms)1300 bool TemplateURL::ExtractSearchTermsFromURL(
1301     const GURL& url,
1302     const SearchTermsData& search_terms_data,
1303     base::string16* search_terms) {
1304   return FindSearchTermsInURL(url, search_terms_data, search_terms, NULL, NULL);
1305 }
1306 
IsSearchURL(const GURL & url,const SearchTermsData & search_terms_data)1307 bool TemplateURL::IsSearchURL(
1308     const GURL& url,
1309     const SearchTermsData& search_terms_data) {
1310   base::string16 search_terms;
1311   return ExtractSearchTermsFromURL(url, search_terms_data, &search_terms) &&
1312       !search_terms.empty();
1313 }
1314 
HasSearchTermsReplacementKey(const GURL & url) const1315 bool TemplateURL::HasSearchTermsReplacementKey(const GURL& url) const {
1316   // Look for the key both in the query and the ref.
1317   std::string params[] = {url.query(), url.ref()};
1318 
1319   for (int i = 0; i < 2; ++i) {
1320     url::Component query, key, value;
1321     query.len = static_cast<int>(params[i].size());
1322     while (url::ExtractQueryKeyValue(params[i].c_str(), &query, &key, &value)) {
1323       if (key.is_nonempty() &&
1324           params[i].substr(key.begin, key.len) ==
1325               search_terms_replacement_key()) {
1326         return true;
1327       }
1328     }
1329   }
1330   return false;
1331 }
1332 
ReplaceSearchTermsInURL(const GURL & url,const TemplateURLRef::SearchTermsArgs & search_terms_args,const SearchTermsData & search_terms_data,GURL * result)1333 bool TemplateURL::ReplaceSearchTermsInURL(
1334     const GURL& url,
1335     const TemplateURLRef::SearchTermsArgs& search_terms_args,
1336     const SearchTermsData& search_terms_data,
1337     GURL* result) {
1338   // TODO(beaudoin): Use AQS from |search_terms_args| too.
1339   url::Parsed::ComponentType search_term_component;
1340   url::Component search_terms_position;
1341   base::string16 search_terms;
1342   if (!FindSearchTermsInURL(url, search_terms_data, &search_terms,
1343                             &search_term_component, &search_terms_position)) {
1344     return false;
1345   }
1346   DCHECK(search_terms_position.is_nonempty());
1347 
1348   // FindSearchTermsInURL only returns true for search terms in the query or
1349   // ref, so we can call EncodeSearchTerm with |is_in_query| = true, since query
1350   // and ref are encoded in the same way.
1351   std::string input_encoding;
1352   base::string16 encoded_terms;
1353   base::string16 encoded_original_query;
1354   EncodeSearchTerms(search_terms_args, true, &input_encoding,
1355                     &encoded_terms, &encoded_original_query);
1356 
1357   std::string old_params(
1358       (search_term_component == url::Parsed::REF) ? url.ref() : url.query());
1359   std::string new_params(old_params, 0, search_terms_position.begin);
1360   new_params += base::UTF16ToUTF8(search_terms_args.search_terms);
1361   new_params += old_params.substr(search_terms_position.end());
1362   url::StdStringReplacements<std::string> replacements;
1363   if (search_term_component == url::Parsed::REF)
1364     replacements.SetRefStr(new_params);
1365   else
1366     replacements.SetQueryStr(new_params);
1367   *result = url.ReplaceComponents(replacements);
1368   return true;
1369 }
1370 
EncodeSearchTerms(const TemplateURLRef::SearchTermsArgs & search_terms_args,bool is_in_query,std::string * input_encoding,base::string16 * encoded_terms,base::string16 * encoded_original_query) const1371 void TemplateURL::EncodeSearchTerms(
1372     const TemplateURLRef::SearchTermsArgs& search_terms_args,
1373     bool is_in_query,
1374     std::string* input_encoding,
1375     base::string16* encoded_terms,
1376     base::string16* encoded_original_query) const {
1377 
1378   std::vector<std::string> encodings(input_encodings());
1379   if (std::find(encodings.begin(), encodings.end(), "UTF-8") == encodings.end())
1380     encodings.push_back("UTF-8");
1381   for (std::vector<std::string>::const_iterator i(encodings.begin());
1382        i != encodings.end(); ++i) {
1383     if (TryEncoding(search_terms_args.search_terms,
1384                     search_terms_args.original_query, i->c_str(),
1385                     is_in_query, encoded_terms, encoded_original_query)) {
1386       *input_encoding = *i;
1387       return;
1388     }
1389   }
1390   NOTREACHED();
1391 }
1392 
GenerateSearchURL(const SearchTermsData & search_terms_data) const1393 GURL TemplateURL::GenerateSearchURL(
1394     const SearchTermsData& search_terms_data) const {
1395   if (!url_ref_.IsValid(search_terms_data))
1396     return GURL();
1397 
1398   if (!url_ref_.SupportsReplacement(search_terms_data))
1399     return GURL(url());
1400 
1401   // Use something obscure for the search terms argument so that in the rare
1402   // case the term replaces the URL it's unlikely another keyword would have the
1403   // same url.
1404   // TODO(jnd): Add additional parameters to get post data when the search URL
1405   // has post parameters.
1406   return GURL(url_ref_.ReplaceSearchTerms(
1407       TemplateURLRef::SearchTermsArgs(
1408           base::ASCIIToUTF16("blah.blah.blah.blah.blah")),
1409       search_terms_data, NULL));
1410 }
1411 
CopyFrom(const TemplateURL & other)1412 void TemplateURL::CopyFrom(const TemplateURL& other) {
1413   if (this == &other)
1414     return;
1415 
1416   data_ = other.data_;
1417   url_ref_.InvalidateCachedValues();
1418   suggestions_url_ref_.InvalidateCachedValues();
1419   instant_url_ref_.InvalidateCachedValues();
1420   SetPrepopulateId(other.data_.prepopulate_id);
1421 }
1422 
SetURL(const std::string & url)1423 void TemplateURL::SetURL(const std::string& url) {
1424   data_.SetURL(url);
1425   url_ref_.InvalidateCachedValues();
1426 }
1427 
SetPrepopulateId(int id)1428 void TemplateURL::SetPrepopulateId(int id) {
1429   data_.prepopulate_id = id;
1430   const bool prepopulated = id > 0;
1431   url_ref_.prepopulated_ = prepopulated;
1432   suggestions_url_ref_.prepopulated_ = prepopulated;
1433   instant_url_ref_.prepopulated_ = prepopulated;
1434 }
1435 
ResetKeywordIfNecessary(const SearchTermsData & search_terms_data,bool force)1436 void TemplateURL::ResetKeywordIfNecessary(
1437     const SearchTermsData& search_terms_data,
1438     bool force) {
1439   if (IsGoogleSearchURLWithReplaceableKeyword(search_terms_data) || force) {
1440     DCHECK(GetType() != OMNIBOX_API_EXTENSION);
1441     GURL url(GenerateSearchURL(search_terms_data));
1442     if (url.is_valid())
1443       data_.SetKeyword(GenerateKeyword(url));
1444   }
1445 }
1446 
FindSearchTermsInURL(const GURL & url,const SearchTermsData & search_terms_data,base::string16 * search_terms,url::Parsed::ComponentType * search_term_component,url::Component * search_terms_position)1447 bool TemplateURL::FindSearchTermsInURL(
1448     const GURL& url,
1449     const SearchTermsData& search_terms_data,
1450     base::string16* search_terms,
1451     url::Parsed::ComponentType* search_term_component,
1452     url::Component* search_terms_position) {
1453   DCHECK(search_terms);
1454   search_terms->clear();
1455 
1456   // Try to match with every pattern.
1457   for (size_t i = 0; i < URLCount(); ++i) {
1458     TemplateURLRef ref(this, i);
1459     if (ref.ExtractSearchTermsFromURL(url, search_terms, search_terms_data,
1460         search_term_component, search_terms_position)) {
1461       // If ExtractSearchTermsFromURL() returns true and |search_terms| is empty
1462       // it means the pattern matched but no search terms were present. In this
1463       // case we fail immediately without looking for matches in subsequent
1464       // patterns. This means that given patterns
1465       //    [ "http://foo/#q={searchTerms}", "http://foo/?q={searchTerms}" ],
1466       // calling ExtractSearchTermsFromURL() on "http://foo/?q=bar#q=' would
1467       // return false. This is important for at least Google, where such URLs
1468       // are invalid.
1469       return !search_terms->empty();
1470     }
1471   }
1472   return false;
1473 }
1474