1 // Copyright 2020 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_BASE_SCHEMEFUL_SITE_H_ 6 #define NET_BASE_SCHEMEFUL_SITE_H_ 7 8 #include <optional> 9 #include <ostream> 10 #include <string> 11 #include <string_view> 12 13 #include "base/gtest_prod_util.h" 14 #include "base/types/pass_key.h" 15 #include "net/base/net_export.h" 16 #include "url/origin.h" 17 18 class GURL; 19 20 namespace blink { 21 class BlinkSchemefulSite; 22 class StorageKey; 23 } // namespace blink 24 25 namespace IPC { 26 template <class P> 27 struct ParamTraits; 28 } // namespace IPC 29 30 namespace network::mojom { 31 class SchemefulSiteDataView; 32 } // namespace network::mojom 33 34 namespace mojo { 35 template <typename DataViewType, typename T> 36 struct StructTraits; 37 } // namespace mojo 38 39 namespace net { 40 41 class NetworkAnonymizationKey; 42 class SiteForCookies; 43 44 // Class which represents a scheme and etld+1 for an origin, as specified by 45 // https://html.spec.whatwg.org/multipage/origin.html#obtain-a-site. 46 // 47 // A SchemefulSite is obtained from an input origin by normalizing, such that: 48 // 1. Opaque origins have distinct SchemefulSites. 49 // 2. Origins whose schemes have network hosts have the same SchemefulSite iff 50 // they share a scheme, and share a hostname or registrable domain. Origins 51 // whose schemes have network hosts include http, https, ws, wss, file, etc. 52 // 3. Origins whose schemes do not have a network host have the same 53 // SchemefulSite iff they share a scheme and host. 54 // 4. Origins which differ only by port have the same SchemefulSite. 55 // 5. Websocket origins cannot have a SchemefulSite (they trigger a DCHECK). 56 // 57 // Note that blink::BlinkSchemefulSite mirrors this class and needs to be kept 58 // in sync with any data member changes. 59 class NET_EXPORT SchemefulSite { 60 public: 61 SchemefulSite() = default; 62 63 // The passed `origin` may not match the resulting internal representation in 64 // certain circumstances. See the comment, below, on the `site_as_origin_` 65 // member. 66 explicit SchemefulSite(const url::Origin& origin); 67 68 // Using the origin constructor is preferred as this is less efficient. 69 // Should only be used if the origin for a given GURL is not readily 70 // available. 71 explicit SchemefulSite(const GURL& url); 72 73 SchemefulSite(const SchemefulSite& other); 74 SchemefulSite(SchemefulSite&& other) noexcept; 75 76 SchemefulSite& operator=(const SchemefulSite& other); 77 SchemefulSite& operator=(SchemefulSite&& other) noexcept; 78 79 // Tries to construct an instance from a (potentially untrusted) value of the 80 // internal `site_as_origin_` that got received over an RPC. 81 // 82 // Returns whether successful or not. Doesn't touch |*out| if false is 83 // returned. This returning |true| does not mean that whoever sent the values 84 // did not lie, merely that they are well-formed. 85 static bool FromWire(const url::Origin& site_as_origin, SchemefulSite* out); 86 87 // Creates a SchemefulSite iff the passed-in origin has a registerable domain. 88 static std::optional<SchemefulSite> CreateIfHasRegisterableDomain( 89 const url::Origin&); 90 91 // If the scheme is ws or wss, it is converted to http or https, respectively. 92 // Has no effect on SchemefulSites with any other schemes. 93 // 94 // See Step 1 of algorithm "establish a WebSocket connection" in 95 // https://fetch.spec.whatwg.org/#websocket-opening-handshake. 96 void ConvertWebSocketToHttp(); 97 98 // Deserializes a string obtained from `Serialize()` to a `SchemefulSite`. 99 // Returns an opaque `SchemefulSite` if the value was invalid in any way. 100 static SchemefulSite Deserialize(std::string_view value); 101 102 // Returns a serialized version of `site_as_origin_`. If the underlying origin 103 // is invalid, returns an empty string. If serialization of opaque origins 104 // with their associated nonce is necessary, see `SerializeWithNonce()`. 105 std::string Serialize() const; 106 107 // Serializes `site_as_origin_` in cases when it has a 'file' scheme but 108 // we want to preserve the Origin's host. 109 // This was added to serialize cookie partition keys, which may contain 110 // file origins with a host. 111 std::string SerializeFileSiteWithHost() const; 112 113 std::string GetDebugString() const; 114 115 // Gets the underlying site as a GURL. If the internal Origin is opaque, 116 // returns an empty GURL. 117 GURL GetURL() const; 118 119 // Deserializes a string obtained from `SerializeWithNonce()` to a 120 // `SchemefulSite`. Returns nullopt if the value was invalid in any way. 121 static std::optional<SchemefulSite> DeserializeWithNonce( 122 base::PassKey<NetworkAnonymizationKey>, 123 std::string_view value); 124 125 // Returns a serialized version of `site_as_origin_`. For an opaque 126 // `site_as_origin_`, this serializes with the nonce. See 127 // `url::origin::SerializeWithNonce()` for usage information. 128 std::optional<std::string> SerializeWithNonce( 129 base::PassKey<NetworkAnonymizationKey>); 130 opaque()131 bool opaque() const { return site_as_origin_.opaque(); } 132 has_registrable_domain_or_host()133 bool has_registrable_domain_or_host() const { 134 return !registrable_domain_or_host().empty(); 135 } 136 137 // Testing only function which allows tests to access the underlying 138 // `site_as_origin_` in order to verify behavior. 139 const url::Origin& GetInternalOriginForTesting() const; 140 141 // Testing-only function which allows access to the private 142 // `registrable_domain_or_host` method. registrable_domain_or_host_for_testing()143 std::string registrable_domain_or_host_for_testing() const { 144 return registrable_domain_or_host(); 145 } 146 147 // Estimates dynamic memory usage. 148 // See base/trace_event/memory_usage_estimator.h for more info. 149 size_t EstimateMemoryUsage() const; 150 151 bool operator==(const SchemefulSite& other) const; 152 153 bool operator!=(const SchemefulSite& other) const; 154 155 bool operator<(const SchemefulSite& other) const; 156 157 private: 158 // IPC serialization code needs to access internal origin. 159 friend struct mojo::StructTraits<network::mojom::SchemefulSiteDataView, 160 SchemefulSite>; 161 friend struct IPC::ParamTraits<net::SchemefulSite>; 162 163 friend class blink::BlinkSchemefulSite; 164 165 // Create SiteForCookies from SchemefulSite needs to access internal origin, 166 // and SiteForCookies needs to access private method SchemelesslyEqual. 167 friend class SiteForCookies; 168 169 // Needed to create a bogus origin from a site. 170 // TODO(crbug.com/40157262): Give IsolationInfos empty origins instead, 171 // in this case, and unfriend IsolationInfo. 172 friend class IsolationInfo; 173 174 // Needed to create a bogus origin from a site. 175 friend class URLRequest; 176 177 // Needed for access to nonce for serialization. 178 friend class blink::StorageKey; 179 180 FRIEND_TEST_ALL_PREFIXES(SchemefulSiteTest, OpaqueSerialization); 181 FRIEND_TEST_ALL_PREFIXES(SchemefulSiteTest, InternalValue); 182 183 struct ObtainASiteResult; 184 185 static ObtainASiteResult ObtainASite(const url::Origin&); 186 187 explicit SchemefulSite(ObtainASiteResult, const url::Origin&); 188 189 // Deserializes a string obtained from `SerializeWithNonce()` to a 190 // `SchemefulSite`. Returns nullopt if the value was invalid in any way. 191 static std::optional<SchemefulSite> DeserializeWithNonce( 192 std::string_view value); 193 194 // Returns a serialized version of `site_as_origin_`. For an opaque 195 // `site_as_origin_`, this serializes with the nonce. See 196 // `url::origin::SerializeWithNonce()` for usage information. 197 std::optional<std::string> SerializeWithNonce(); 198 199 // Returns whether `this` and `other` share a host or registrable domain. 200 // Should NOT be used to check equality or equivalence. This is only used 201 // for legacy same-site cookie logic that does not check schemes. Private to 202 // restrict usage. 203 bool SchemelesslyEqual(const SchemefulSite& other) const; 204 205 // Returns the host of the underlying `origin`, which will usually be the 206 // registrable domain. This is private because if it were public, it would 207 // trivially allow circumvention of the "Schemeful"-ness of this class. 208 std::string registrable_domain_or_host() const { 209 return site_as_origin_.host(); 210 } 211 212 // This should not be used casually, it's an opaque Origin or an scheme+eTLD+1 213 // packed into an Origin. If you extract this value SchemefulSite is not 214 // responsible for any unexpected friction you might encounter. 215 const url::Origin& internal_value() const { return site_as_origin_; } 216 217 // Origin which stores the result of running the steps documented at 218 // https://html.spec.whatwg.org/multipage/origin.html#obtain-a-site. 219 // This is not an arbitrary origin. It must either be an opaque origin, or a 220 // scheme + eTLD+1 + default port. 221 // 222 // The `origin` passed into the SchemefulSite(const url::Origin&) constructor 223 // might not match this internal representation used by this class to track 224 // the scheme and eTLD+1 representing a schemeful site. This may be the case 225 // if, e.g., the passed `origin` has an eTLD+1 that is not equal to its 226 // hostname, or if the port number is not the default port for its scheme. 227 // 228 // In general, this `site_as_origin_` used for the internal representation 229 // should NOT be used directly by SchemefulSite consumers. 230 url::Origin site_as_origin_; 231 }; 232 233 // Provided to allow gtest to create more helpful error messages, instead of 234 // printing hex. 235 // 236 // Also used so that SchemefulSites can be the arguments of DCHECK_EQ. 237 NET_EXPORT std::ostream& operator<<(std::ostream& os, const SchemefulSite& ss); 238 239 } // namespace net 240 241 #endif // NET_BASE_SCHEMEFUL_SITE_H_ 242