• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "url/scheme_host_port.h"
6 
7 #include <stdint.h>
8 #include <string.h>
9 
10 #include <ostream>
11 #include <string_view>
12 #include <tuple>
13 
14 #include "base/check_op.h"
15 #include "base/containers/contains.h"
16 #include "base/notreached.h"
17 #include "base/numerics/safe_conversions.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/trace_event/memory_usage_estimator.h"
20 #include "url/gurl.h"
21 #include "url/third_party/mozilla/url_parse.h"
22 #include "url/url_canon.h"
23 #include "url/url_canon_stdstring.h"
24 #include "url/url_constants.h"
25 #include "url/url_features.h"
26 #include "url/url_util.h"
27 
28 namespace url {
29 
30 namespace {
31 
IsCanonicalHost(std::string_view host,bool is_file_scheme)32 bool IsCanonicalHost(std::string_view host, bool is_file_scheme) {
33   std::string canon_host;
34 
35   // Try to canonicalize the host (copy/pasted from net/base. :( ).
36   const Component raw_host_component(0,
37                                      base::checked_cast<int>(host.length()));
38   StdStringCanonOutput canon_host_output(&canon_host);
39   CanonHostInfo host_info;
40   if (is_file_scheme) {
41     CanonicalizeFileHostVerbose(host.data(), raw_host_component,
42                                 canon_host_output, host_info);
43   } else {
44     CanonicalizeSpecialHostVerbose(host.data(), raw_host_component,
45                                    canon_host_output, host_info);
46   }
47 
48   if (host_info.out_host.is_nonempty() &&
49       host_info.family != CanonHostInfo::BROKEN) {
50     // Success!  Assert that there's no extra garbage.
51     canon_host_output.Complete();
52     DCHECK_EQ(host_info.out_host.len, static_cast<int>(canon_host.length()));
53   } else {
54     // Empty host, or canonicalization failed.
55     canon_host.clear();
56   }
57 
58   return host == canon_host;
59 }
60 
61 // Note: When changing IsValidInput, consider also updating
62 // ShouldTreatAsOpaqueOrigin in Blink (there might be existing differences in
63 // behavior between these 2 layers, but we should avoid introducing new
64 // differences).
IsValidInput(std::string_view scheme,std::string_view host,uint16_t port,SchemeHostPort::ConstructPolicy policy)65 bool IsValidInput(std::string_view scheme,
66                   std::string_view host,
67                   uint16_t port,
68                   SchemeHostPort::ConstructPolicy policy) {
69   // Empty schemes are never valid.
70   if (scheme.empty())
71     return false;
72 
73   // about:blank and other no-access schemes translate into an opaque origin.
74   // This helps consistency with ShouldTreatAsOpaqueOrigin in Blink.
75   if (base::Contains(GetNoAccessSchemes(), scheme))
76     return false;
77 
78   SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
79   bool is_standard = GetStandardSchemeType(
80       scheme.data(),
81       Component(0, base::checked_cast<int>(scheme.length())),
82       &scheme_type);
83   if (!is_standard) {
84     // To be consistent with ShouldTreatAsOpaqueOrigin in Blink, local
85     // non-standard schemes are currently allowed to be tuple origins.
86     //
87     // TODO: Migrate "content:" and "externalfile:" to be standard schemes, and
88     // remove this local scheme exception.
89     if (url::IsUsingStandardCompliantNonSpecialSchemeURLParsing()) {
90       // If the flag is enabled, a host can be empty for non-special URLs.
91       // Therefore, we don't check a host nor port.
92       if (base::Contains(GetLocalSchemes(), scheme)) {
93         return true;
94       }
95     } else {
96       if (base::Contains(GetLocalSchemes(), scheme) && host.empty() &&
97           port == 0) {
98         return true;
99       }
100     }
101 
102     // Otherwise, allow non-standard schemes only if the Android WebView
103     // workaround is enabled.
104     return AllowNonStandardSchemesForAndroidWebView();
105   }
106 
107   switch (scheme_type) {
108     case SCHEME_WITH_HOST_AND_PORT:
109     case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION:
110       // A URL with |scheme| is required to have the host and port, so return an
111       // invalid instance if host is not given.  Note that a valid port is
112       // always provided by SchemeHostPort(const GURL&) constructor (a missing
113       // port is replaced with a default port if needed by
114       // GURL::EffectiveIntPort()).
115       if (host.empty())
116         return false;
117 
118       // Don't do an expensive canonicalization if the host is already
119       // canonicalized.
120       DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
121              IsCanonicalHost(host, scheme == url::kFileScheme));
122       if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
123           !IsCanonicalHost(host, scheme == url::kFileScheme)) {
124         return false;
125       }
126 
127       return true;
128 
129     case SCHEME_WITH_HOST:
130       if (port != 0) {
131         // Return an invalid object if a URL with the scheme never represents
132         // the port data but the given |port| is non-zero.
133         return false;
134       }
135 
136       // Don't do an expensive canonicalization if the host is already
137       // canonicalized.
138       DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
139              IsCanonicalHost(host, scheme == url::kFileScheme));
140       if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
141           !IsCanonicalHost(host, scheme == url::kFileScheme)) {
142         return false;
143       }
144 
145       return true;
146 
147     case SCHEME_WITHOUT_AUTHORITY:
148       return false;
149 
150     default:
151       NOTREACHED();
152   }
153 }
154 
155 }  // namespace
156 
157 SchemeHostPort::SchemeHostPort() = default;
158 
SchemeHostPort(std::string scheme,std::string host,uint16_t port,ConstructPolicy policy)159 SchemeHostPort::SchemeHostPort(std::string scheme,
160                                std::string host,
161                                uint16_t port,
162                                ConstructPolicy policy) {
163   if (ShouldDiscardHostAndPort(scheme)) {
164     host = "";
165     port = 0;
166   }
167 
168   if (!IsValidInput(scheme, host, port, policy)) {
169     DCHECK(!IsValid());
170     return;
171   }
172 
173   scheme_ = std::move(scheme);
174   host_ = std::move(host);
175   port_ = port;
176   DCHECK(IsValid()) << "Scheme: " << scheme_ << " Host: " << host_
177                     << " Port: " << port;
178 }
179 
SchemeHostPort(std::string_view scheme,std::string_view host,uint16_t port)180 SchemeHostPort::SchemeHostPort(std::string_view scheme,
181                                std::string_view host,
182                                uint16_t port)
183     : SchemeHostPort(std::string(scheme),
184                      std::string(host),
185                      port,
186                      ConstructPolicy::CHECK_CANONICALIZATION) {}
187 
SchemeHostPort(const GURL & url)188 SchemeHostPort::SchemeHostPort(const GURL& url) {
189   if (!url.is_valid())
190     return;
191 
192   std::string_view scheme = url.scheme_piece();
193   std::string_view host = url.host_piece();
194 
195   // A valid GURL never returns PORT_INVALID.
196   int port = url.EffectiveIntPort();
197   if (port == PORT_UNSPECIFIED) {
198     port = 0;
199   } else {
200     DCHECK_GE(port, 0);
201     DCHECK_LE(port, 65535);
202   }
203 
204   if (ShouldDiscardHostAndPort(scheme)) {
205     host = "";
206     port = 0;
207   }
208 
209   if (!IsValidInput(scheme, host, port, ALREADY_CANONICALIZED))
210     return;
211 
212   scheme_ = std::string(scheme);
213   host_ = std::string(host);
214   port_ = port;
215 }
216 
217 SchemeHostPort::~SchemeHostPort() = default;
218 
IsValid() const219 bool SchemeHostPort::IsValid() const {
220   // It suffices to just check |scheme_| for emptiness; the other fields are
221   // never present without it.
222   DCHECK(!scheme_.empty() || host_.empty());
223   DCHECK(!scheme_.empty() || port_ == 0);
224   return !scheme_.empty();
225 }
226 
Serialize() const227 std::string SchemeHostPort::Serialize() const {
228   // Null checking for |parsed| in SerializeInternal is probably slower than
229   // just filling it in and discarding it here.
230   url::Parsed parsed;
231   return SerializeInternal(&parsed);
232 }
233 
GetURL() const234 GURL SchemeHostPort::GetURL() const {
235   url::Parsed parsed;
236   std::string serialized = SerializeInternal(&parsed);
237 
238   if (!IsValid())
239     return GURL(std::move(serialized), parsed, false);
240 
241   // SchemeHostPort does not have enough information to determine if an empty
242   // host is valid or not for the given scheme. Force re-parsing.
243   DCHECK(!scheme_.empty());
244   if (host_.empty())
245     return GURL(serialized);
246 
247   // If the serialized string is passed to GURL for parsing, it will append an
248   // empty path "/" for standard URLs. Add that here. Note: per RFC 6454 we
249   // cannot do this for normal Origin serialization.
250   DCHECK(!parsed.path.is_valid());
251   if (url::IsUsingStandardCompliantNonSpecialSchemeURLParsing()) {
252     // Append "/" only if the URL is standard. If the flag is enabled,
253     // non-special URLs can have an empty path and GURL doesn't append "/" to
254     // that.
255     if (IsStandardScheme(scheme_)) {
256       parsed.path = Component(serialized.length(), 1);
257       serialized.append("/");
258     }
259   } else {
260     parsed.path = Component(serialized.length(), 1);
261     serialized.append("/");
262   }
263   return GURL(std::move(serialized), parsed, true);
264 }
265 
EstimateMemoryUsage() const266 size_t SchemeHostPort::EstimateMemoryUsage() const {
267   return base::trace_event::EstimateMemoryUsage(scheme_) +
268          base::trace_event::EstimateMemoryUsage(host_);
269 }
270 
operator <(const SchemeHostPort & other) const271 bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
272   return std::tie(port_, scheme_, host_) <
273          std::tie(other.port_, other.scheme_, other.host_);
274 }
275 
SerializeInternal(url::Parsed * parsed) const276 std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const {
277   std::string result;
278   if (!IsValid())
279     return result;
280 
281   // Reserve enough space for the "normal" case of scheme://host/.
282   result.reserve(scheme_.size() + host_.size() + 4);
283 
284   if (!scheme_.empty()) {
285     parsed->scheme = Component(0, scheme_.length());
286     result.append(scheme_);
287   }
288 
289   result.append(kStandardSchemeSeparator);
290 
291   if (!host_.empty()) {
292     parsed->host = Component(result.length(), host_.length());
293     result.append(host_);
294   }
295 
296   // Omit the port component if the port matches with the default port
297   // defined for the scheme, if any.
298   int default_port = DefaultPortForScheme(scheme_);
299   if (default_port == PORT_UNSPECIFIED)
300     return result;
301   if (port_ != default_port) {
302     result.push_back(':');
303     std::string port(base::NumberToString(port_));
304     parsed->port = Component(result.length(), port.length());
305     result.append(std::move(port));
306   }
307 
308   return result;
309 }
310 
ShouldDiscardHostAndPort(std::string_view scheme)311 bool SchemeHostPort::ShouldDiscardHostAndPort(std::string_view scheme) {
312   return IsAndroidWebViewHackEnabledScheme(scheme) &&
313          IsUsingStandardCompliantNonSpecialSchemeURLParsing();
314 }
315 
operator <<(std::ostream & out,const SchemeHostPort & scheme_host_port)316 std::ostream& operator<<(std::ostream& out,
317                          const SchemeHostPort& scheme_host_port) {
318   return out << scheme_host_port.Serialize();
319 }
320 
321 }  // namespace url
322