1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "url/scheme_host_port.h"
6
7 #include <stdint.h>
8 #include <string.h>
9
10 #include <ostream>
11 #include <string_view>
12 #include <tuple>
13
14 #include "base/check_op.h"
15 #include "base/containers/contains.h"
16 #include "base/notreached.h"
17 #include "base/numerics/safe_conversions.h"
18 #include "base/strings/string_number_conversions.h"
19 #include "base/trace_event/memory_usage_estimator.h"
20 #include "url/gurl.h"
21 #include "url/third_party/mozilla/url_parse.h"
22 #include "url/url_canon.h"
23 #include "url/url_canon_stdstring.h"
24 #include "url/url_constants.h"
25 #include "url/url_features.h"
26 #include "url/url_util.h"
27
28 namespace url {
29
30 namespace {
31
IsCanonicalHost(std::string_view host,bool is_file_scheme)32 bool IsCanonicalHost(std::string_view host, bool is_file_scheme) {
33 std::string canon_host;
34
35 // Try to canonicalize the host (copy/pasted from net/base. :( ).
36 const Component raw_host_component(0,
37 base::checked_cast<int>(host.length()));
38 StdStringCanonOutput canon_host_output(&canon_host);
39 CanonHostInfo host_info;
40 if (is_file_scheme) {
41 CanonicalizeFileHostVerbose(host.data(), raw_host_component,
42 canon_host_output, host_info);
43 } else {
44 CanonicalizeSpecialHostVerbose(host.data(), raw_host_component,
45 canon_host_output, host_info);
46 }
47
48 if (host_info.out_host.is_nonempty() &&
49 host_info.family != CanonHostInfo::BROKEN) {
50 // Success! Assert that there's no extra garbage.
51 canon_host_output.Complete();
52 DCHECK_EQ(host_info.out_host.len, static_cast<int>(canon_host.length()));
53 } else {
54 // Empty host, or canonicalization failed.
55 canon_host.clear();
56 }
57
58 return host == canon_host;
59 }
60
61 // Note: When changing IsValidInput, consider also updating
62 // ShouldTreatAsOpaqueOrigin in Blink (there might be existing differences in
63 // behavior between these 2 layers, but we should avoid introducing new
64 // differences).
IsValidInput(std::string_view scheme,std::string_view host,uint16_t port,SchemeHostPort::ConstructPolicy policy)65 bool IsValidInput(std::string_view scheme,
66 std::string_view host,
67 uint16_t port,
68 SchemeHostPort::ConstructPolicy policy) {
69 // Empty schemes are never valid.
70 if (scheme.empty())
71 return false;
72
73 // about:blank and other no-access schemes translate into an opaque origin.
74 // This helps consistency with ShouldTreatAsOpaqueOrigin in Blink.
75 if (base::Contains(GetNoAccessSchemes(), scheme))
76 return false;
77
78 SchemeType scheme_type = SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION;
79 bool is_standard = GetStandardSchemeType(
80 scheme.data(),
81 Component(0, base::checked_cast<int>(scheme.length())),
82 &scheme_type);
83 if (!is_standard) {
84 // To be consistent with ShouldTreatAsOpaqueOrigin in Blink, local
85 // non-standard schemes are currently allowed to be tuple origins.
86 //
87 // TODO: Migrate "content:" and "externalfile:" to be standard schemes, and
88 // remove this local scheme exception.
89 if (url::IsUsingStandardCompliantNonSpecialSchemeURLParsing()) {
90 // If the flag is enabled, a host can be empty for non-special URLs.
91 // Therefore, we don't check a host nor port.
92 if (base::Contains(GetLocalSchemes(), scheme)) {
93 return true;
94 }
95 } else {
96 if (base::Contains(GetLocalSchemes(), scheme) && host.empty() &&
97 port == 0) {
98 return true;
99 }
100 }
101
102 // Otherwise, allow non-standard schemes only if the Android WebView
103 // workaround is enabled.
104 return AllowNonStandardSchemesForAndroidWebView();
105 }
106
107 switch (scheme_type) {
108 case SCHEME_WITH_HOST_AND_PORT:
109 case SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION:
110 // A URL with |scheme| is required to have the host and port, so return an
111 // invalid instance if host is not given. Note that a valid port is
112 // always provided by SchemeHostPort(const GURL&) constructor (a missing
113 // port is replaced with a default port if needed by
114 // GURL::EffectiveIntPort()).
115 if (host.empty())
116 return false;
117
118 // Don't do an expensive canonicalization if the host is already
119 // canonicalized.
120 DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
121 IsCanonicalHost(host, scheme == url::kFileScheme));
122 if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
123 !IsCanonicalHost(host, scheme == url::kFileScheme)) {
124 return false;
125 }
126
127 return true;
128
129 case SCHEME_WITH_HOST:
130 if (port != 0) {
131 // Return an invalid object if a URL with the scheme never represents
132 // the port data but the given |port| is non-zero.
133 return false;
134 }
135
136 // Don't do an expensive canonicalization if the host is already
137 // canonicalized.
138 DCHECK(policy == SchemeHostPort::CHECK_CANONICALIZATION ||
139 IsCanonicalHost(host, scheme == url::kFileScheme));
140 if (policy == SchemeHostPort::CHECK_CANONICALIZATION &&
141 !IsCanonicalHost(host, scheme == url::kFileScheme)) {
142 return false;
143 }
144
145 return true;
146
147 case SCHEME_WITHOUT_AUTHORITY:
148 return false;
149
150 default:
151 NOTREACHED();
152 }
153 }
154
155 } // namespace
156
157 SchemeHostPort::SchemeHostPort() = default;
158
SchemeHostPort(std::string scheme,std::string host,uint16_t port,ConstructPolicy policy)159 SchemeHostPort::SchemeHostPort(std::string scheme,
160 std::string host,
161 uint16_t port,
162 ConstructPolicy policy) {
163 if (ShouldDiscardHostAndPort(scheme)) {
164 host = "";
165 port = 0;
166 }
167
168 if (!IsValidInput(scheme, host, port, policy)) {
169 DCHECK(!IsValid());
170 return;
171 }
172
173 scheme_ = std::move(scheme);
174 host_ = std::move(host);
175 port_ = port;
176 DCHECK(IsValid()) << "Scheme: " << scheme_ << " Host: " << host_
177 << " Port: " << port;
178 }
179
SchemeHostPort(std::string_view scheme,std::string_view host,uint16_t port)180 SchemeHostPort::SchemeHostPort(std::string_view scheme,
181 std::string_view host,
182 uint16_t port)
183 : SchemeHostPort(std::string(scheme),
184 std::string(host),
185 port,
186 ConstructPolicy::CHECK_CANONICALIZATION) {}
187
SchemeHostPort(const GURL & url)188 SchemeHostPort::SchemeHostPort(const GURL& url) {
189 if (!url.is_valid())
190 return;
191
192 std::string_view scheme = url.scheme_piece();
193 std::string_view host = url.host_piece();
194
195 // A valid GURL never returns PORT_INVALID.
196 int port = url.EffectiveIntPort();
197 if (port == PORT_UNSPECIFIED) {
198 port = 0;
199 } else {
200 DCHECK_GE(port, 0);
201 DCHECK_LE(port, 65535);
202 }
203
204 if (ShouldDiscardHostAndPort(scheme)) {
205 host = "";
206 port = 0;
207 }
208
209 if (!IsValidInput(scheme, host, port, ALREADY_CANONICALIZED))
210 return;
211
212 scheme_ = std::string(scheme);
213 host_ = std::string(host);
214 port_ = port;
215 }
216
217 SchemeHostPort::~SchemeHostPort() = default;
218
IsValid() const219 bool SchemeHostPort::IsValid() const {
220 // It suffices to just check |scheme_| for emptiness; the other fields are
221 // never present without it.
222 DCHECK(!scheme_.empty() || host_.empty());
223 DCHECK(!scheme_.empty() || port_ == 0);
224 return !scheme_.empty();
225 }
226
Serialize() const227 std::string SchemeHostPort::Serialize() const {
228 // Null checking for |parsed| in SerializeInternal is probably slower than
229 // just filling it in and discarding it here.
230 url::Parsed parsed;
231 return SerializeInternal(&parsed);
232 }
233
GetURL() const234 GURL SchemeHostPort::GetURL() const {
235 url::Parsed parsed;
236 std::string serialized = SerializeInternal(&parsed);
237
238 if (!IsValid())
239 return GURL(std::move(serialized), parsed, false);
240
241 // SchemeHostPort does not have enough information to determine if an empty
242 // host is valid or not for the given scheme. Force re-parsing.
243 DCHECK(!scheme_.empty());
244 if (host_.empty())
245 return GURL(serialized);
246
247 // If the serialized string is passed to GURL for parsing, it will append an
248 // empty path "/" for standard URLs. Add that here. Note: per RFC 6454 we
249 // cannot do this for normal Origin serialization.
250 DCHECK(!parsed.path.is_valid());
251 if (url::IsUsingStandardCompliantNonSpecialSchemeURLParsing()) {
252 // Append "/" only if the URL is standard. If the flag is enabled,
253 // non-special URLs can have an empty path and GURL doesn't append "/" to
254 // that.
255 if (IsStandardScheme(scheme_)) {
256 parsed.path = Component(serialized.length(), 1);
257 serialized.append("/");
258 }
259 } else {
260 parsed.path = Component(serialized.length(), 1);
261 serialized.append("/");
262 }
263 return GURL(std::move(serialized), parsed, true);
264 }
265
EstimateMemoryUsage() const266 size_t SchemeHostPort::EstimateMemoryUsage() const {
267 return base::trace_event::EstimateMemoryUsage(scheme_) +
268 base::trace_event::EstimateMemoryUsage(host_);
269 }
270
operator <(const SchemeHostPort & other) const271 bool SchemeHostPort::operator<(const SchemeHostPort& other) const {
272 return std::tie(port_, scheme_, host_) <
273 std::tie(other.port_, other.scheme_, other.host_);
274 }
275
SerializeInternal(url::Parsed * parsed) const276 std::string SchemeHostPort::SerializeInternal(url::Parsed* parsed) const {
277 std::string result;
278 if (!IsValid())
279 return result;
280
281 // Reserve enough space for the "normal" case of scheme://host/.
282 result.reserve(scheme_.size() + host_.size() + 4);
283
284 if (!scheme_.empty()) {
285 parsed->scheme = Component(0, scheme_.length());
286 result.append(scheme_);
287 }
288
289 result.append(kStandardSchemeSeparator);
290
291 if (!host_.empty()) {
292 parsed->host = Component(result.length(), host_.length());
293 result.append(host_);
294 }
295
296 // Omit the port component if the port matches with the default port
297 // defined for the scheme, if any.
298 int default_port = DefaultPortForScheme(scheme_);
299 if (default_port == PORT_UNSPECIFIED)
300 return result;
301 if (port_ != default_port) {
302 result.push_back(':');
303 std::string port(base::NumberToString(port_));
304 parsed->port = Component(result.length(), port.length());
305 result.append(std::move(port));
306 }
307
308 return result;
309 }
310
ShouldDiscardHostAndPort(std::string_view scheme)311 bool SchemeHostPort::ShouldDiscardHostAndPort(std::string_view scheme) {
312 return IsAndroidWebViewHackEnabledScheme(scheme) &&
313 IsUsingStandardCompliantNonSpecialSchemeURLParsing();
314 }
315
operator <<(std::ostream & out,const SchemeHostPort & scheme_host_port)316 std::ostream& operator<<(std::ostream& out,
317 const SchemeHostPort& scheme_host_port) {
318 return out << scheme_host_port.Serialize();
319 }
320
321 } // namespace url
322