• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/google/core/browser/google_util.h"
6 
7 #include <string>
8 #include <vector>
9 
10 #include "base/command_line.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_split.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "components/google/core/browser/google_switches.h"
17 #include "components/google/core/browser/google_url_tracker.h"
18 #include "components/url_fixer/url_fixer.h"
19 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
20 #include "net/base/url_util.h"
21 #include "url/gurl.h"
22 
23 // Only use Link Doctor on official builds.  It uses an API key, too, but
24 // seems best to just disable it, for more responsive error pages and to reduce
25 // server load.
26 #if defined(GOOGLE_CHROME_BUILD)
27 #define LINKDOCTOR_SERVER_REQUEST_URL "https://www.googleapis.com/rpc"
28 #else
29 #define LINKDOCTOR_SERVER_REQUEST_URL ""
30 #endif
31 
32 
33 // Helpers --------------------------------------------------------------------
34 
35 namespace {
36 
37 bool gUseMockLinkDoctorBaseURLForTesting = false;
38 
IsPathHomePageBase(const std::string & path)39 bool IsPathHomePageBase(const std::string& path) {
40   return (path == "/") || (path == "/webhp");
41 }
42 
43 // True if |host| is "[www.]<domain_in_lower_case>.<TLD>" with a valid TLD. If
44 // |subdomain_permission| is ALLOW_SUBDOMAIN, we check against host
45 // "*.<domain_in_lower_case>.<TLD>" instead.
IsValidHostName(const std::string & host,const std::string & domain_in_lower_case,google_util::SubdomainPermission subdomain_permission)46 bool IsValidHostName(const std::string& host,
47                      const std::string& domain_in_lower_case,
48                      google_util::SubdomainPermission subdomain_permission) {
49   size_t tld_length = net::registry_controlled_domains::GetRegistryLength(
50       host,
51       net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
52       net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
53   if ((tld_length == 0) || (tld_length == std::string::npos))
54     return false;
55   // Removes the tld and the preceding dot.
56   std::string host_minus_tld(host, 0, host.length() - tld_length - 1);
57   if (LowerCaseEqualsASCII(host_minus_tld, domain_in_lower_case.c_str()))
58     return true;
59   if (subdomain_permission == google_util::ALLOW_SUBDOMAIN)
60     return EndsWith(host_minus_tld, "." + domain_in_lower_case, false);
61   return LowerCaseEqualsASCII(host_minus_tld,
62                               ("www." + domain_in_lower_case).c_str());
63 }
64 
65 // True if |url| is a valid URL with HTTP or HTTPS scheme. If |port_permission|
66 // is DISALLOW_NON_STANDARD_PORTS, this also requires |url| to use the standard
67 // port for its scheme (80 for HTTP, 443 for HTTPS).
IsValidURL(const GURL & url,google_util::PortPermission port_permission)68 bool IsValidURL(const GURL& url, google_util::PortPermission port_permission) {
69   return url.is_valid() && url.SchemeIsHTTPOrHTTPS() &&
70       (url.port().empty() ||
71        (port_permission == google_util::ALLOW_NON_STANDARD_PORTS));
72 }
73 
74 }  // namespace
75 
76 
77 namespace google_util {
78 
79 // Global functions -----------------------------------------------------------
80 
HasGoogleSearchQueryParam(const std::string & str)81 bool HasGoogleSearchQueryParam(const std::string& str) {
82   url::Component query(0, str.length()), key, value;
83   while (url::ExtractQueryKeyValue(str.c_str(), &query, &key, &value)) {
84     if ((key.len == 1) && (str[key.begin] == 'q') && value.is_nonempty())
85       return true;
86   }
87   return false;
88 }
89 
LinkDoctorBaseURL()90 GURL LinkDoctorBaseURL() {
91   if (gUseMockLinkDoctorBaseURLForTesting)
92     return GURL("http://mock.linkdoctor.url/for?testing");
93   return GURL(LINKDOCTOR_SERVER_REQUEST_URL);
94 }
95 
SetMockLinkDoctorBaseURLForTesting()96 void SetMockLinkDoctorBaseURLForTesting() {
97   gUseMockLinkDoctorBaseURLForTesting = true;
98 }
99 
GetGoogleLocale(const std::string & application_locale)100 std::string GetGoogleLocale(const std::string& application_locale) {
101   // Google does not recognize "nb" for Norwegian Bokmal; it uses "no".
102   return (application_locale == "nb") ? "no" : application_locale;
103 }
104 
AppendGoogleLocaleParam(const GURL & url,const std::string & application_locale)105 GURL AppendGoogleLocaleParam(const GURL& url,
106                              const std::string& application_locale) {
107   return net::AppendQueryParameter(
108       url, "hl", GetGoogleLocale(application_locale));
109 }
110 
GetGoogleCountryCode(GURL google_homepage_url)111 std::string GetGoogleCountryCode(GURL google_homepage_url) {
112   const std::string google_hostname = google_homepage_url.host();
113   const size_t last_dot = google_hostname.find_last_of('.');
114   if (last_dot == std::string::npos) {
115     NOTREACHED();
116   }
117   std::string country_code = google_hostname.substr(last_dot + 1);
118   // Assume the com TLD implies the US.
119   if (country_code == "com")
120     return "us";
121   // Google uses the Unicode Common Locale Data Repository (CLDR), and the CLDR
122   // code for the UK is "gb".
123   if (country_code == "uk")
124     return "gb";
125   // Catalonia does not have a CLDR country code, since it's a region in Spain,
126   // so use Spain instead.
127   if (country_code == "cat")
128     return "es";
129   return country_code;
130 }
131 
GetGoogleSearchURL(GURL google_homepage_url)132 GURL GetGoogleSearchURL(GURL google_homepage_url) {
133   // To transform the homepage URL into the corresponding search URL, add the
134   // "search" and the "q=" query string.
135   std::string search_path = "search";
136   std::string query_string = "q=";
137   GURL::Replacements replacements;
138   replacements.SetPathStr(search_path);
139   replacements.SetQueryStr(query_string);
140   return google_homepage_url.ReplaceComponents(replacements);
141 }
142 
CommandLineGoogleBaseURL()143 GURL CommandLineGoogleBaseURL() {
144   // Unit tests may add command-line flags after the first call to this
145   // function, so we don't simply initialize a static |base_url| directly and
146   // then unconditionally return it.
147   CR_DEFINE_STATIC_LOCAL(std::string, switch_value, ());
148   CR_DEFINE_STATIC_LOCAL(GURL, base_url, ());
149   std::string current_switch_value(
150       CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
151           switches::kGoogleBaseURL));
152   if (current_switch_value != switch_value) {
153     switch_value = current_switch_value;
154     base_url = url_fixer::FixupURL(switch_value, std::string());
155     if (!base_url.is_valid() || base_url.has_query() || base_url.has_ref())
156       base_url = GURL();
157   }
158   return base_url;
159 }
160 
StartsWithCommandLineGoogleBaseURL(const GURL & url)161 bool StartsWithCommandLineGoogleBaseURL(const GURL& url) {
162   GURL base_url(CommandLineGoogleBaseURL());
163   return base_url.is_valid() &&
164       StartsWithASCII(url.possibly_invalid_spec(), base_url.spec(), true);
165 }
166 
IsGoogleHostname(const std::string & host,SubdomainPermission subdomain_permission)167 bool IsGoogleHostname(const std::string& host,
168                       SubdomainPermission subdomain_permission) {
169   GURL base_url(CommandLineGoogleBaseURL());
170   if (base_url.is_valid() && (host == base_url.host()))
171     return true;
172 
173   return IsValidHostName(host, "google", subdomain_permission);
174 }
175 
IsGoogleDomainUrl(const GURL & url,SubdomainPermission subdomain_permission,PortPermission port_permission)176 bool IsGoogleDomainUrl(const GURL& url,
177                        SubdomainPermission subdomain_permission,
178                        PortPermission port_permission) {
179   return IsValidURL(url, port_permission) &&
180       IsGoogleHostname(url.host(), subdomain_permission);
181 }
182 
IsGoogleHomePageUrl(const GURL & url)183 bool IsGoogleHomePageUrl(const GURL& url) {
184   // First check to see if this has a Google domain.
185   if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, DISALLOW_NON_STANDARD_PORTS))
186     return false;
187 
188   // Make sure the path is a known home page path.
189   std::string path(url.path());
190   return IsPathHomePageBase(path) || StartsWithASCII(path, "/ig", false);
191 }
192 
IsGoogleSearchUrl(const GURL & url)193 bool IsGoogleSearchUrl(const GURL& url) {
194   // First check to see if this has a Google domain.
195   if (!IsGoogleDomainUrl(url, DISALLOW_SUBDOMAIN, DISALLOW_NON_STANDARD_PORTS))
196     return false;
197 
198   // Make sure the path is a known search path.
199   std::string path(url.path());
200   bool is_home_page_base = IsPathHomePageBase(path);
201   if (!is_home_page_base && (path != "/search"))
202     return false;
203 
204   // Check for query parameter in URL parameter and hash fragment, depending on
205   // the path type.
206   return HasGoogleSearchQueryParam(url.ref()) ||
207       (!is_home_page_base && HasGoogleSearchQueryParam(url.query()));
208 }
209 
IsYoutubeDomainUrl(const GURL & url,SubdomainPermission subdomain_permission,PortPermission port_permission)210 bool IsYoutubeDomainUrl(const GURL& url,
211                         SubdomainPermission subdomain_permission,
212                         PortPermission port_permission) {
213   return IsValidURL(url, port_permission) &&
214       IsValidHostName(url.host(), "youtube", subdomain_permission);
215 }
216 
217 }  // namespace google_util
218