1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef URL_ORIGIN_H_
6 #define URL_ORIGIN_H_
7
8 #include <stdint.h>
9
10 #include <memory>
11 #include <string>
12
13 #include "base/component_export.h"
14 #include "base/debug/alias.h"
15 #include "base/debug/crash_logging.h"
16 #include "base/gtest_prod_util.h"
17 #include "base/strings/string_piece_forward.h"
18 #include "base/strings/string_util.h"
19 #include "base/trace_event/base_tracing_forward.h"
20 #include "base/unguessable_token.h"
21 #include "build/build_config.h"
22 #include "build/buildflag.h"
23 #include "third_party/abseil-cpp/absl/types/optional.h"
24 #include "url/scheme_host_port.h"
25
26 #if BUILDFLAG(IS_ANDROID)
27 #include <jni.h>
28
29 namespace base {
30 namespace android {
31 template <typename>
32 class ScopedJavaLocalRef;
33 template <typename>
34 class JavaRef;
35 } // namespace android
36 } // namespace base
37 #endif // BUILDFLAG(IS_ANDROID)
38
39 class GURL;
40
41 namespace blink {
42 class SecurityOrigin;
43 class SecurityOriginTest;
44 class StorageKey;
45 class StorageKeyTest;
46 } // namespace blink
47
48 namespace IPC {
49 template <class P>
50 struct ParamTraits;
51 } // namespace IPC
52
53 namespace ipc_fuzzer {
54 template <class T>
55 struct FuzzTraits;
56 } // namespace ipc_fuzzer
57
58 namespace mojo {
59 template <typename DataViewType, typename T>
60 struct StructTraits;
61 struct UrlOriginAdapter;
62 } // namespace mojo
63
64 namespace net {
65 class SchemefulSite;
66 } // namespace net
67
68 namespace url {
69
70 namespace mojom {
71 class OriginDataView;
72 } // namespace mojom
73
74 // Per https://html.spec.whatwg.org/multipage/origin.html#origin, an origin is
75 // either:
76 // - a tuple origin of (scheme, host, port) as described in RFC 6454.
77 // - an opaque origin with an internal value, and a memory of the tuple origin
78 // from which it was derived.
79 //
80 // TL;DR: If you need to make a security-relevant decision, use 'url::Origin'.
81 // If you only need to extract the bits of a URL which are relevant for a
82 // network connection, use 'url::SchemeHostPort'.
83 //
84 // STL;SDR: If you aren't making actual network connections, use 'url::Origin'.
85 //
86 // This class ought to be used when code needs to determine if two resources
87 // are "same-origin", and when a canonical serialization of an origin is
88 // required. Note that the canonical serialization of an origin *must not* be
89 // used to determine if two resources are same-origin.
90 //
91 // A tuple origin, like 'SchemeHostPort', is composed of a tuple of (scheme,
92 // host, port), but contains a number of additional concepts which make it
93 // appropriate for use as a security boundary and access control mechanism
94 // between contexts. Two tuple origins are same-origin if the tuples are equal.
95 // A tuple origin may also be re-created from its serialization.
96 //
97 // An opaque origin has an internal globally unique identifier. When creating a
98 // new opaque origin from a URL, a fresh globally unique identifier is
99 // generated. However, if an opaque origin is copied or moved, the internal
100 // globally unique identifier is preserved. Two opaque origins are same-origin
101 // iff the globally unique identifiers match. Unlike tuple origins, an opaque
102 // origin cannot be re-created from its serialization, which is always the
103 // string "null".
104 //
105 // IMPORTANT: Since opaque origins always serialize as the string "null", it is
106 // *never* safe to use the serialization for security checks!
107 //
108 // A tuple origin and an opaque origin are never same-origin.
109 //
110 // There are a few subtleties to note:
111 //
112 // * A default constructed Origin is opaque, with no precursor origin.
113 //
114 // * Invalid and non-standard GURLs are parsed as opaque origins. This includes
115 // non-hierarchical URLs like 'data:text/html,...' and 'javascript:alert(1)'.
116 //
117 // * GURLs with schemes of 'filesystem' or 'blob' parse the origin out of the
118 // internals of the URL. That is, 'filesystem:https://example.com/temporary/f'
119 // is parsed as ('https', 'example.com', 443).
120 //
121 // * GURLs with a 'file' scheme are tricky. They are parsed as ('file', '', 0),
122 // but their behavior may differ from embedder to embedder.
123 // TODO(dcheng): This behavior is not consistent with Blink's notion of file
124 // URLs, which always creates an opaque origin.
125 //
126 // * The host component of an IPv6 address includes brackets, just like the URL
127 // representation.
128 //
129 // * Constructing origins from GURLs (or from SchemeHostPort) is typically a red
130 // flag (this is true for `url::Origin::Create` but also to some extent for
131 // `url::Origin::Resolve`). See docs/security/origin-vs-url.md for more.
132 //
133 // * To answer the question "Are |this| and |that| "same-origin" with each
134 // other?", use |Origin::IsSameOriginWith|:
135 //
136 // if (this.IsSameOriginWith(that)) {
137 // // Amazingness goes here.
138 // }
COMPONENT_EXPORT(URL)139 class COMPONENT_EXPORT(URL) Origin {
140 public:
141 // Creates an opaque Origin with a nonce that is different from all previously
142 // existing origins.
143 Origin();
144
145 // WARNING: Converting an URL into an Origin is usually a red flag. See
146 // //docs/security/origin-vs-url.md for more details. Some discussion about
147 // deprecating the Create method can be found in https://crbug.com/1270878.
148 //
149 // Creates an Origin from `url`, as described at
150 // https://url.spec.whatwg.org/#origin, with the following additions:
151 // 1. If `url` is invalid or non-standard, an opaque Origin is constructed.
152 // 2. 'filesystem' URLs behave as 'blob' URLs (that is, the origin is parsed
153 // out of everything in the URL which follows the scheme).
154 // 3. 'file' URLs all parse as ("file", "", 0).
155 //
156 // WARNING: `url::Origin::Create(url)` can give unexpected results if:
157 // 1) `url` is "about:blank", or "about:srcdoc" (returning unique, opaque
158 // origin rather than the real origin of the frame)
159 // 2) `url` comes from a sandboxed frame (potentially returning a non-opaque
160 // origin, when an opaque one is needed; see also
161 // https://www.html5rocks.com/en/tutorials/security/sandboxed-iframes/)
162 // 3) Wrong `url` is used - e.g. in some navigations `base_url_for_data_url`
163 // might need to be used instead of relying on
164 // `content::NavigationHandle::GetURL`.
165 //
166 // WARNING: The returned Origin may have a different scheme and host from
167 // `url` (e.g. in case of blob URLs - see OriginTest.ConstructFromGURL).
168 //
169 // WARNING: data: URLs will be correctly be translated into opaque origins,
170 // but the precursor origin will be lost (unlike with `url::Origin::Resolve`).
171 static Origin Create(const GURL& url);
172
173 // Creates an Origin for the resource `url` as if it were requested
174 // from the context of `base_origin`. If `url` is standard
175 // (in the sense that it embeds a complete origin, like http/https),
176 // this returns the same value as would Create().
177 //
178 // If `url` is "about:blank" or "about:srcdoc", this returns a copy of
179 // `base_origin`.
180 //
181 // Otherwise, returns a new opaque origin derived from `base_origin`.
182 // In this case, the resulting opaque origin will inherit the tuple
183 // (or precursor tuple) of `base_origin`, but will not be same origin
184 // with `base_origin`, even if `base_origin` is already opaque.
185 static Origin Resolve(const GURL& url, const Origin& base_origin);
186
187 // Copyable and movable.
188 Origin(const Origin&);
189 Origin& operator=(const Origin&);
190 Origin(Origin&&) noexcept;
191 Origin& operator=(Origin&&) noexcept;
192
193 // Creates an Origin from a |scheme|, |host|, and |port|. All the parameters
194 // must be valid and canonicalized. Returns nullopt if any parameter is not
195 // canonical, or if all the parameters are empty.
196 //
197 // This constructor should be used in order to pass 'Origin' objects back and
198 // forth over IPC (as transitioning through GURL would risk potentially
199 // dangerous recanonicalization); other potential callers should prefer the
200 // 'GURL'-based constructor.
201 static absl::optional<Origin> UnsafelyCreateTupleOriginWithoutNormalization(
202 base::StringPiece scheme,
203 base::StringPiece host,
204 uint16_t port);
205
206 // Creates an origin without sanity checking that the host is canonicalized.
207 // This should only be used when converting between already normalized types,
208 // and should NOT be used for IPC. Method takes std::strings for use with move
209 // operators to avoid copies.
210 static Origin CreateFromNormalizedTuple(std::string scheme,
211 std::string host,
212 uint16_t port);
213
214 ~Origin();
215
216 // For opaque origins, these return ("", "", 0).
217 const std::string& scheme() const {
218 return !opaque() ? tuple_.scheme() : base::EmptyString();
219 }
220 const std::string& host() const {
221 return !opaque() ? tuple_.host() : base::EmptyString();
222 }
223 uint16_t port() const { return !opaque() ? tuple_.port() : 0; }
224
225 bool opaque() const { return nonce_.has_value(); }
226
227 // An ASCII serialization of the Origin as per Section 6.2 of RFC 6454, with
228 // the addition that all Origins with a 'file' scheme serialize to "file://".
229 std::string Serialize() const;
230
231 // Two non-opaque Origins are "same-origin" if their schemes, hosts, and ports
232 // are exact matches. Two opaque origins are same-origin only if their
233 // internal nonce values match. A non-opaque origin is never same-origin with
234 // an opaque origin.
235 bool IsSameOriginWith(const Origin& other) const;
236 bool operator==(const Origin& other) const { return IsSameOriginWith(other); }
237 bool operator!=(const Origin& other) const {
238 return !IsSameOriginWith(other);
239 }
240
241 // Non-opaque origin is "same-origin" with `url` if their schemes, hosts, and
242 // ports are exact matches. Opaque origin is never "same-origin" with any
243 // `url`. about:blank, about:srcdoc, and invalid GURLs are never
244 // "same-origin" with any origin. This method is a shorthand for
245 // `origin.IsSameOriginWith(url::Origin::Create(url))`.
246 //
247 // See also CanBeDerivedFrom.
248 bool IsSameOriginWith(const GURL& url) const;
249
250 // This method returns true for any |url| which if navigated to could result
251 // in an origin compatible with |this|.
252 bool CanBeDerivedFrom(const GURL& url) const;
253
254 // Get the scheme, host, and port from which this origin derives. For
255 // a tuple Origin, this gives the same values as calling scheme(), host()
256 // and port(). For an opaque Origin that was created by calling
257 // Origin::DeriveNewOpaqueOrigin() on a precursor or Origin::Resolve(),
258 // this returns the tuple inherited from the precursor.
259 //
260 // If this Origin is opaque and was created via the default constructor or
261 // Origin::Create(), the precursor origin is unknown.
262 //
263 // Use with great caution: opaque origins should generally not inherit
264 // privileges from the origins they derive from. However, in some cases
265 // (such as restrictions on process placement, or determining the http lock
266 // icon) this information may be relevant to ensure that entering an
267 // opaque origin does not grant privileges initially denied to the original
268 // non-opaque origin.
269 //
270 // This method has a deliberately obnoxious name to prompt caution in its use.
271 const SchemeHostPort& GetTupleOrPrecursorTupleIfOpaque() const {
272 return tuple_;
273 }
274
275 // Efficiently returns what GURL(Serialize()) would without re-parsing the
276 // URL. This can be used for the (rare) times a GURL representation is needed
277 // for an Origin.
278 // Note: The returned URL will not necessarily be serialized to the same value
279 // as the Origin would. The GURL will have an added "/" path for Origins with
280 // valid SchemeHostPorts and file Origins.
281 //
282 // Try not to use this method under normal circumstances, as it loses type
283 // information. Downstream consumers can mistake the returned GURL with a full
284 // URL (e.g. with a path component).
285 GURL GetURL() const;
286
287 // Same as GURL::DomainIs. If |this| origin is opaque, then returns false.
288 bool DomainIs(base::StringPiece canonical_domain) const;
289
290 // Allows Origin to be used as a key in STL (for example, a std::set or
291 // std::map).
292 bool operator<(const Origin& other) const;
293
294 // Creates a new opaque origin that is guaranteed to be cross-origin to all
295 // currently existing origins. An origin created by this method retains its
296 // identity across copies. Copies are guaranteed to be same-origin to each
297 // other, e.g.
298 //
299 // url::Origin page = Origin::Create(GURL("http://example.com"))
300 // url::Origin a = page.DeriveNewOpaqueOrigin();
301 // url::Origin b = page.DeriveNewOpaqueOrigin();
302 // url::Origin c = a;
303 // url::Origin d = b;
304 //
305 // |a| and |c| are same-origin, since |c| was copied from |a|. |b| and |d| are
306 // same-origin as well, since |d| was copied from |b|. All other combinations
307 // of origins are considered cross-origin, e.g. |a| is cross-origin to |b| and
308 // |d|, |b| is cross-origin to |a| and |c|, |c| is cross-origin to |b| and
309 // |d|, and |d| is cross-origin to |a| and |c|.
310 Origin DeriveNewOpaqueOrigin() const;
311
312 // Creates a string representation of the object that can be used for logging
313 // and debugging. It serializes the internal state, such as the nonce value
314 // and precursor information.
315 std::string GetDebugString(bool include_nonce = true) const;
316
317 #if BUILDFLAG(IS_ANDROID)
318 base::android::ScopedJavaLocalRef<jobject> CreateJavaObject() const;
319 static Origin FromJavaObject(
320 const base::android::JavaRef<jobject>& java_origin);
321 static jlong CreateNative(JNIEnv* env,
322 const base::android::JavaRef<jstring>& java_scheme,
323 const base::android::JavaRef<jstring>& java_host,
324 uint16_t port,
325 bool is_opaque,
326 uint64_t tokenHighBits,
327 uint64_t tokenLowBits);
328 #endif // BUILDFLAG(IS_ANDROID)
329
330 void WriteIntoTrace(perfetto::TracedValue context) const;
331
332 private:
333 friend class blink::SecurityOrigin;
334 friend class blink::SecurityOriginTest;
335 friend class blink::StorageKey;
336 // SchemefulSite needs access to the serialization/deserialization logic which
337 // includes the nonce.
338 friend class net::SchemefulSite;
339 friend class OriginTest;
340 friend struct mojo::UrlOriginAdapter;
341 friend struct ipc_fuzzer::FuzzTraits<Origin>;
342 friend struct mojo::StructTraits<url::mojom::OriginDataView, url::Origin>;
343 friend IPC::ParamTraits<url::Origin>;
344 friend COMPONENT_EXPORT(URL) std::ostream& operator<<(std::ostream& out,
345 const Origin& origin);
346 friend class blink::StorageKeyTest;
347
348 // Origin::Nonce is a wrapper around base::UnguessableToken that generates
349 // the random value only when the value is first accessed. The lazy generation
350 // allows Origin to be default-constructed quickly, without spending time
351 // in random number generation.
352 //
353 // TODO(nick): Should this optimization move into UnguessableToken, once it no
354 // longer treats the Null case specially?
355 class COMPONENT_EXPORT(URL) Nonce {
356 public:
357 // Creates a nonce to hold a newly-generated UnguessableToken. The actual
358 // token value will be generated lazily.
359 Nonce();
360
361 // Creates a nonce to hold an already-generated UnguessableToken value. This
362 // constructor should only be used for IPC serialization and testing --
363 // regular code should never need to touch the UnguessableTokens directly,
364 // and the default constructor is faster.
365 explicit Nonce(const base::UnguessableToken& token);
366
367 // Accessor, which lazily initializes the underlying |token_| member.
368 const base::UnguessableToken& token() const;
369
370 // Do not use in cases where lazy initialization is expected! This
371 // accessor does not initialize the |token_| member.
372 const base::UnguessableToken& raw_token() const;
373
374 // Copyable and movable. Copying a Nonce triggers lazy-initialization,
375 // moving it does not.
376 Nonce(const Nonce&);
377 Nonce& operator=(const Nonce&);
378 Nonce(Nonce&&) noexcept;
379 Nonce& operator=(Nonce&&) noexcept;
380
381 // Note that operator<, used by maps type containers, will trigger |token_|
382 // lazy-initialization. Equality comparisons do not.
383 bool operator<(const Nonce& other) const;
384 bool operator==(const Nonce& other) const;
385 bool operator!=(const Nonce& other) const;
386
387 private:
388 friend class OriginTest;
389
390 // mutable to support lazy generation.
391 mutable base::UnguessableToken token_;
392 };
393
394 // This needs to be friended within Origin as well, since Nonce is a private
395 // nested class of Origin.
396 friend COMPONENT_EXPORT(URL) std::ostream& operator<<(std::ostream& out,
397 const Nonce& nonce);
398
399 // Creates an origin without sanity checking that the host is canonicalized.
400 // This should only be used when converting between already normalized types,
401 // and should NOT be used for IPC. Method takes std::strings for use with move
402 // operators to avoid copies.
403 static Origin CreateOpaqueFromNormalizedPrecursorTuple(
404 std::string precursor_scheme,
405 std::string precursor_host,
406 uint16_t precursor_port,
407 const Nonce& nonce);
408
409 // Creates an opaque Origin with the identity given by |nonce|, and an
410 // optional precursor origin given by |precursor_scheme|, |precursor_host| and
411 // |precursor_port|. Returns nullopt if any parameter is not canonical. When
412 // the precursor is unknown, the precursor parameters should be ("", "", 0).
413 //
414 // This factory method should be used in order to pass opaque Origin objects
415 // back and forth over IPC (as transitioning through GURL would risk
416 // potentially dangerous recanonicalization).
417 static absl::optional<Origin> UnsafelyCreateOpaqueOriginWithoutNormalization(
418 base::StringPiece precursor_scheme,
419 base::StringPiece precursor_host,
420 uint16_t precursor_port,
421 const Nonce& nonce);
422
423 // Constructs a non-opaque tuple origin. |tuple| must be valid.
424 explicit Origin(SchemeHostPort tuple);
425
426 // Constructs an opaque origin derived from the |precursor| tuple, with the
427 // given |nonce|.
428 Origin(const Nonce& nonce, SchemeHostPort precursor);
429
430 // Get the nonce associated with this origin, if it is opaque, or nullptr
431 // otherwise. This should be used only when trying to send an Origin across an
432 // IPC pipe.
433 const base::UnguessableToken* GetNonceForSerialization() const;
434
435 // Serializes this Origin, including its nonce if it is opaque. If an opaque
436 // origin's |tuple_| is invalid nullopt is returned. If the nonce is not
437 // initialized, a nonce of 0 is used. Use of this method should be limited as
438 // an opaque origin will never be matchable in future browser sessions.
439 absl::optional<std::string> SerializeWithNonce() const;
440
441 // Like SerializeWithNonce(), but forces |nonce_| to be initialized prior to
442 // serializing.
443 absl::optional<std::string> SerializeWithNonceAndInitIfNeeded();
444
445 absl::optional<std::string> SerializeWithNonceImpl() const;
446
447 // Deserializes an origin from |ToValueWithNonce|. Returns nullopt if the
448 // value was invalid in any way.
449 static absl::optional<Origin> Deserialize(const std::string& value);
450
451 // The tuple is used for both tuple origins (e.g. https://example.com:80), as
452 // well as for opaque origins, where it tracks the tuple origin from which
453 // the opaque origin was initially derived (we call this the "precursor"
454 // origin).
455 SchemeHostPort tuple_;
456
457 // The nonce is used for maintaining identity of an opaque origin. This
458 // nonce is preserved when an opaque origin is copied or moved. An Origin
459 // is considered opaque if and only if |nonce_| holds a value.
460 absl::optional<Nonce> nonce_;
461 };
462
463 // Pretty-printers for logging. These expose the internal state of the nonce.
464 COMPONENT_EXPORT(URL)
465 std::ostream& operator<<(std::ostream& out, const Origin& origin);
466 COMPONENT_EXPORT(URL)
467 std::ostream& operator<<(std::ostream& out, const Origin::Nonce& origin);
468
469 COMPONENT_EXPORT(URL) bool IsSameOriginWith(const GURL& a, const GURL& b);
470
471 // DEBUG_ALIAS_FOR_ORIGIN(var_name, origin) copies `origin` into a new
472 // stack-allocated variable named `<var_name>`. This helps ensure that the
473 // value of `origin` gets preserved in crash dumps.
474 #define DEBUG_ALIAS_FOR_ORIGIN(var_name, origin) \
475 DEBUG_ALIAS_FOR_CSTR(var_name, (origin).Serialize().c_str(), 128)
476
477 namespace debug {
478
479 class COMPONENT_EXPORT(URL) ScopedOriginCrashKey {
480 public:
481 ScopedOriginCrashKey(base::debug::CrashKeyString* crash_key,
482 const url::Origin* value);
483 ~ScopedOriginCrashKey();
484
485 ScopedOriginCrashKey(const ScopedOriginCrashKey&) = delete;
486 ScopedOriginCrashKey& operator=(const ScopedOriginCrashKey&) = delete;
487
488 private:
489 base::debug::ScopedCrashKeyString scoped_string_value_;
490 };
491
492 } // namespace debug
493
494 } // namespace url
495
496 #endif // URL_ORIGIN_H_
497