• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "url/scheme_host_port.h"
6 
7 #include <stdint.h>
8 #include <string.h>
9 
10 #include <ostream>
11 #include <tuple>
12 
13 #include "base/check_op.h"
14 #include "base/containers/contains.h"
15 #include "base/notreached.h"
16 #include "base/numerics/safe_conversions.h"
17 #include "base/strings/string_number_conversions.h"
18 #include "base/strings/string_piece.h"
19 #include "url/gurl.h"
20 #include "url/third_party/mozilla/url_parse.h"
21 #include "url/url_canon.h"
22 #include "url/url_canon_stdstring.h"
23 #include "url/url_constants.h"
24 #include "url/url_util.h"
25 
26 namespace url {
27 
28 namespace {
29 
IsCanonicalHost(const base::StringPiece & host)30 bool IsCanonicalHost(const base::StringPiece& host) {
31   std::string canon_host;
32 
33   // Try to canonicalize the host (copy/pasted from net/base. :( ).
34   const Component raw_host_component(0,
35                                      base::checked_cast<int>(host.length()));
36   StdStringCanonOutput canon_host_output(&canon_host);
37   CanonHostInfo host_info;
38   CanonicalizeHostVerbose(host.data(), raw_host_component,
39                           &canon_host_output, &host_info);
40 
41   if (host_info.out_host.is_nonempty() &&
42       host_info.family != CanonHostInfo::BROKEN) {
43     // Success!  Assert that there's no extra garbage.
44     canon_host_output.Complete();
45     DCHECK_EQ(host_info.out_host.len, static_cast<int>(canon_host.length()));
46   } else {
47     // Empty host, or canonicalization failed.
48     canon_host.clear();
49   }
50 
51   return host == canon_host;
52 }
53 
54 // Note: When changing IsValidInput, consider also updating
55 // ShouldTreatAsOpaqueOrigin in Blink (there might be existing differences in
56 // behavior between these 2 layers, but we should avoid introducing new
57 // differences).
IsValidInput(const base::StringPiece & scheme,const base::StringPiece & host,uint16_t port,SchemeHostPort::ConstructPolicy policy)58 bool IsValidInput(const base::StringPiece& scheme,
59                   const base::StringPiece& host,
60                   uint16_t port,
61                   SchemeHostPort::ConstructPolicy policy) {
62   // Empty schemes are never valid.
63   if (scheme.empty())
64     return false;
65 
66   // about:blank and other no-access schemes translate into an opaque origin.
67   // This helps consistency with ShouldTreatAsOpaqueOrigin in Blink.
68   if (base::Contains(GetNoAccessSchemes(), scheme))
69     return false;
70 
71   SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
72   bool is_standard = GetStandardSchemeType(
73       scheme.data(),
74       Component(0, base::checked_cast<int>(scheme.length())),
75       &scheme_type);
76   if (!is_standard) {
77     // To be consistent with ShouldTreatAsOpaqueOrigin in Blink, local
78     // non-standard schemes are currently allowed to be tuple origins.
79     // Nonstandard schemes don't have hostnames, so their tuple is just
80     // ("protocol", "", 0).
81     //
82     // TODO: Migrate "content:" and "externalfile:" to be standard schemes, and
83     // remove this local scheme exception.
84     if (base::Contains(GetLocalSchemes(), scheme) && host.empty() && port == 0)
85       return true;
86 
87     // Otherwise, allow non-standard schemes only if the Android WebView
88     // workaround is enabled.
89     return AllowNonStandardSchemesForAndroidWebView();
90   }
91 
92   switch (scheme_type) {
93     case SCHEME_WITH_HOST_AND_PORT:
94     case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION:
95       // A URL with |scheme| is required to have the host and port, so return an
96       // invalid instance if host is not given.  Note that a valid port is
97       // always provided by SchemeHostPort(const GURL&) constructor (a missing
98       // port is replaced with a default port if needed by
99       // GURL::EffectiveIntPort()).
100       if (host.empty())
101         return false;
102 
103       // Don't do an expensive canonicalization if the host is already
104       // canonicalized.
105       DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
106              IsCanonicalHost(host));
107       if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
108           !IsCanonicalHost(host)) {
109         return false;
110       }
111 
112       return true;
113 
114     case SCHEME_WITH_HOST:
115       if (port != 0) {
116         // Return an invalid object if a URL with the scheme never represents
117         // the port data but the given |port| is non-zero.
118         return false;
119       }
120 
121       // Don't do an expensive canonicalization if the host is already
122       // canonicalized.
123       DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
124              IsCanonicalHost(host));
125       if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
126           !IsCanonicalHost(host)) {
127         return false;
128       }
129 
130       return true;
131 
132     case SCHEME_WITHOUT_AUTHORITY:
133       return false;
134 
135     default:
136       NOTREACHED();
137       return false;
138   }
139 }
140 
141 }  // namespace
142 
143 SchemeHostPort::SchemeHostPort() = default;
144 
SchemeHostPort(std::string scheme,std::string host,uint16_t port,ConstructPolicy policy)145 SchemeHostPort::SchemeHostPort(std::string scheme,
146                                std::string host,
147                                uint16_t port,
148                                ConstructPolicy policy) {
149   if (!IsValidInput(scheme, host, port, policy)) {
150     DCHECK(!IsValid());
151     return;
152   }
153 
154   scheme_ = std::move(scheme);
155   host_ = std::move(host);
156   port_ = port;
157   DCHECK(IsValid()) << "Scheme: " << scheme_ << " Host: " << host_
158                     << " Port: " << port;
159 }
160 
SchemeHostPort(base::StringPiece scheme,base::StringPiece host,uint16_t port)161 SchemeHostPort::SchemeHostPort(base::StringPiece scheme,
162                                base::StringPiece host,
163                                uint16_t port)
164     : SchemeHostPort(std::string(scheme),
165                      std::string(host),
166                      port,
167                      ConstructPolicy::CHECK_CANONICALIZATION) {}
168 
SchemeHostPort(const GURL & url)169 SchemeHostPort::SchemeHostPort(const GURL& url) {
170   if (!url.is_valid())
171     return;
172 
173   base::StringPiece scheme = url.scheme_piece();
174   base::StringPiece host = url.host_piece();
175 
176   // A valid GURL never returns PORT_INVALID.
177   int port = url.EffectiveIntPort();
178   if (port == PORT_UNSPECIFIED) {
179     port = 0;
180   } else {
181     DCHECK_GE(port, 0);
182     DCHECK_LE(port, 65535);
183   }
184 
185   if (!IsValidInput(scheme, host, port, ALREADY_CANONICALIZED))
186     return;
187 
188   scheme_ = std::string(scheme);
189   host_ = std::string(host);
190   port_ = port;
191 }
192 
193 SchemeHostPort::~SchemeHostPort() = default;
194 
IsValid() const195 bool SchemeHostPort::IsValid() const {
196   // It suffices to just check |scheme_| for emptiness; the other fields are
197   // never present without it.
198   DCHECK(!scheme_.empty() || host_.empty());
199   DCHECK(!scheme_.empty() || port_ == 0);
200   return !scheme_.empty();
201 }
202 
Serialize() const203 std::string SchemeHostPort::Serialize() const {
204   // Null checking for |parsed| in SerializeInternal is probably slower than
205   // just filling it in and discarding it here.
206   url::Parsed parsed;
207   return SerializeInternal(&parsed);
208 }
209 
GetURL() const210 GURL SchemeHostPort::GetURL() const {
211   url::Parsed parsed;
212   std::string serialized = SerializeInternal(&parsed);
213 
214   if (!IsValid())
215     return GURL(std::move(serialized), parsed, false);
216 
217   // SchemeHostPort does not have enough information to determine if an empty
218   // host is valid or not for the given scheme. Force re-parsing.
219   DCHECK(!scheme_.empty());
220   if (host_.empty())
221     return GURL(serialized);
222 
223   // If the serialized string is passed to GURL for parsing, it will append an
224   // empty path "/". Add that here. Note: per RFC 6454 we cannot do this for
225   // normal Origin serialization.
226   DCHECK(!parsed.path.is_valid());
227   parsed.path = Component(serialized.length(), 1);
228   serialized.append("/");
229   return GURL(std::move(serialized), parsed, true);
230 }
231 
operator <(const SchemeHostPort & other) const232 bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
233   return std::tie(port_, scheme_, host_) <
234          std::tie(other.port_, other.scheme_, other.host_);
235 }
236 
SerializeInternal(url::Parsed * parsed) const237 std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const {
238   std::string result;
239   if (!IsValid())
240     return result;
241 
242   // Reserve enough space for the "normal" case of scheme://host/.
243   result.reserve(scheme_.size() + host_.size() + 4);
244 
245   if (!scheme_.empty()) {
246     parsed->scheme = Component(0, scheme_.length());
247     result.append(scheme_);
248   }
249 
250   result.append(kStandardSchemeSeparator);
251 
252   if (!host_.empty()) {
253     parsed->host = Component(result.length(), host_.length());
254     result.append(host_);
255   }
256 
257   // Omit the port component if the port matches with the default port
258   // defined for the scheme, if any.
259   int default_port = DefaultPortForScheme(scheme_.data(),
260                                           static_cast<int>(scheme_.length()));
261   if (default_port == PORT_UNSPECIFIED)
262     return result;
263   if (port_ != default_port) {
264     result.push_back(':');
265     std::string port(base::NumberToString(port_));
266     parsed->port = Component(result.length(), port.length());
267     result.append(std::move(port));
268   }
269 
270   return result;
271 }
272 
operator <<(std::ostream & out,const SchemeHostPort & scheme_host_port)273 std::ostream& operator<<(std::ostream& out,
274                          const SchemeHostPort& scheme_host_port) {
275   return out << scheme_host_port.Serialize();
276 }
277 
278 }  // namespace url
279