1 // Copyright 2017 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_REPORTING_REPORTING_CACHE_H_ 6 #define NET_REPORTING_REPORTING_CACHE_H_ 7 8 #include <memory> 9 #include <set> 10 #include <string> 11 #include <vector> 12 13 #include "base/containers/flat_map.h" 14 #include "base/containers/flat_set.h" 15 #include "base/functional/callback.h" 16 #include "base/time/time.h" 17 #include "base/unguessable_token.h" 18 #include "base/values.h" 19 #include "net/base/net_export.h" 20 #include "net/reporting/reporting_endpoint.h" 21 #include "net/reporting/reporting_header_parser.h" 22 #include "net/reporting/reporting_report.h" 23 #include "third_party/abseil-cpp/absl/types/optional.h" 24 #include "url/gurl.h" 25 #include "url/origin.h" 26 27 namespace net { 28 29 class ReportingContext; 30 class IsolationInfo; 31 32 // The cache holds undelivered reports and clients (per-origin endpoint 33 // configurations) in memory. (It is not responsible for persisting them.) 34 // 35 // Each Reporting "endpoint" represents a report collector at some specified 36 // URL. Endpoints are organized into named "endpoint groups", each of which 37 // additionally specifies some properties such as expiration time. 38 // A "client" represents the entire endpoint configuration set by an origin via 39 // a Report-To header, which consists of multiple endpoint groups, each of which 40 // consists of multiple endpoints. An endpoint group is keyed by its name. An 41 // endpoint is unkeyed except by the client and group structure tree above it. 42 // 43 // The cache implementation corresponds roughly to the "Reporting cache" 44 // described in the spec, except that endpoints and clients are stored in a more 45 // structurally-convenient way, and endpoint failures/retry-after are tracked in 46 // ReportingEndpointManager. 47 // 48 // The cache implementation has the notion of "pending" reports. These are 49 // reports that are part of an active delivery attempt, so they won't be 50 // actually deallocated. Any attempt to remove a pending report will mark it 51 // "doomed", which will cause it to be deallocated once it is no longer pending. 52 class NET_EXPORT ReportingCache { 53 public: 54 class PersistentReportingStore; 55 56 static std::unique_ptr<ReportingCache> Create(ReportingContext* context); 57 58 virtual ~ReportingCache(); 59 60 // Adds a report to the cache. 61 // 62 // |reporting_source| and |network_anonymization_key| will be used when the 63 // report is delivered, to determine which endpoints are eligible to receive 64 // this report, and which other reports this report can be batched with. 65 // 66 // All other parameters correspond to the desired values for the relevant 67 // fields in ReportingReport. 68 virtual void AddReport( 69 const absl::optional<base::UnguessableToken>& reporting_source, 70 const NetworkAnonymizationKey& network_anonymization_key, 71 const GURL& url, 72 const std::string& user_agent, 73 const std::string& group_name, 74 const std::string& type, 75 base::Value::Dict body, 76 int depth, 77 base::TimeTicks queued, 78 int attempts) = 0; 79 80 // Gets all reports in the cache. The returned pointers are valid as long as 81 // either no calls to |RemoveReports| have happened or the reports' |pending| 82 // flag has been set to true using |SetReportsPending|. Does not return 83 // doomed reports (pending reports for which removal has been requested). 84 // 85 // (Clears any existing data in |*reports_out|.) 86 virtual void GetReports( 87 std::vector<const ReportingReport*>* reports_out) const = 0; 88 89 // Gets all reports in the cache, including pending and doomed reports, as a 90 // base::Value. 91 virtual base::Value GetReportsAsValue() const = 0; 92 93 // Gets all reports in the cache that aren't pending or doomed (i.e. that are 94 // eligible for delivery), and marks returned reports as pending in 95 // preparation for a delivery attempt. The returned pointers are valid as long 96 // as the reports are still pending. 97 virtual std::vector<const ReportingReport*> GetReportsToDeliver() = 0; 98 99 // Gets all reports in the cache which are eligible for delivery, which were 100 // queued for a single `reporting_source`, and marks returned reports as 101 // pending in preparation for a delivery attempt. The returned pointers are 102 // valid as long as the reports are still pending. This method is used when a 103 // reporting source is being destroyed, to trigger delivery of any remaining 104 // outstanding reports. 105 virtual std::vector<const ReportingReport*> GetReportsToDeliverForSource( 106 const base::UnguessableToken& reporting_source) = 0; 107 108 // Unmarks a set of reports as pending. |reports| must be previously marked as 109 // pending. 110 virtual void ClearReportsPending( 111 const std::vector<const ReportingReport*>& reports) = 0; 112 113 // Increments |attempts| on a set of reports. 114 virtual void IncrementReportsAttempts( 115 const std::vector<const ReportingReport*>& reports) = 0; 116 117 // Records that we attempted (and possibly succeeded at) delivering 118 // |reports_delivered| reports to the specified endpoint. 119 virtual void IncrementEndpointDeliveries( 120 const ReportingEndpointGroupKey& group_key, 121 const GURL& url, 122 int reports_delivered, 123 bool successful) = 0; 124 125 // Marks a `reporting_source` as expired, when the source (document or 126 // worker) has beed destroyed. The endpoint configuration for the source will 127 // be removed by the garbage collector once all outstanding reports have been 128 // delivered or expired. 129 virtual void SetExpiredSource( 130 const base::UnguessableToken& reporting_source) = 0; 131 132 // Gets the current set of expired reporting sources. 133 virtual const base::flat_set<base::UnguessableToken>& GetExpiredSources() 134 const = 0; 135 136 // Removes a set of reports. Any reports that are pending will not be removed 137 // immediately, but rather marked doomed and removed once they are no longer 138 // pending. 139 virtual void RemoveReports( 140 const std::vector<const ReportingReport*>& reports) = 0; 141 virtual void RemoveReports(const std::vector<const ReportingReport*>& reports, 142 bool delivery_success) = 0; 143 144 // Removes all reports. Like |RemoveReports()|, pending reports are doomed 145 // until no longer pending. 146 virtual void RemoveAllReports() = 0; 147 148 // Gets the count of reports in the cache, *including* doomed reports. 149 // 150 // Needed to ensure that doomed reports are eventually deleted, since no 151 // method provides a view of *every* report in the cache, just non-doomed 152 // ones. 153 virtual size_t GetFullReportCountForTesting() const = 0; 154 155 // Gets the count of reports in the cache with a specific `status`. 156 virtual size_t GetReportCountWithStatusForTesting( 157 ReportingReport::Status status) const = 0; 158 159 virtual bool IsReportPendingForTesting( 160 const ReportingReport* report) const = 0; 161 162 virtual bool IsReportDoomedForTesting( 163 const ReportingReport* report) const = 0; 164 165 // Adds a new client to the cache for |origin|, or updates the existing one 166 // to match the new header. All values are assumed to be valid as they have 167 // passed through the ReportingHeaderParser. 168 virtual void OnParsedHeader( 169 const NetworkAnonymizationKey& network_anonymization_key, 170 const url::Origin& origin, 171 std::vector<ReportingEndpointGroup> parsed_header) = 0; 172 173 // Adds named endpoints for |reporting_source| to the cache, based on the 174 // received Reporting-Endpoints header. 175 // |reporting_source| is the token identifying the document or worker with 176 // which this header was received, and may not be empty. 177 // |isolation_info| is the appropriate network isolation info struct for that 178 // source, and is used for determining credentials to send with reports. 179 virtual void OnParsedReportingEndpointsHeader( 180 const base::UnguessableToken& reporting_source, 181 const IsolationInfo& isolation_info, 182 std::vector<ReportingEndpoint> parsed_header) = 0; 183 184 // Gets all the origins of clients in the cache. 185 virtual std::set<url::Origin> GetAllOrigins() const = 0; 186 187 // Remove client for the given (NIK, origin) pair, if it exists in the cache. 188 // All endpoint groups and endpoints for that client are also removed. 189 virtual void RemoveClient( 190 const NetworkAnonymizationKey& network_anonymization_key, 191 const url::Origin& origin) = 0; 192 193 // Remove all clients for the given |origin|, if any exists in the cache. 194 // All endpoint groups and endpoints for |origin| are also removed. 195 virtual void RemoveClientsForOrigin(const url::Origin& origin) = 0; 196 197 // Remove all clients, groups, and endpoints from the cache. 198 virtual void RemoveAllClients() = 0; 199 200 // Remove the endpoint group matching |group_key|, and remove 201 // all endpoints for that group. May cause the client it was associated with 202 // to be deleted if it becomes empty. 203 virtual void RemoveEndpointGroup( 204 const ReportingEndpointGroupKey& group_key) = 0; 205 206 // Remove all endpoints for with |url|, regardless of origin or group. Used 207 // when a delivery returns 410 Gone. May cause deletion of groups/clients if 208 // they become empty. 209 virtual void RemoveEndpointsForUrl(const GURL& url) = 0; 210 211 // Remove `reporting_source` from the cache, including any configured 212 // endpoints. There should be no non-doomed reports in the cache for 213 // `reporting_source` when this is called. 214 virtual void RemoveSourceAndEndpoints( 215 const base::UnguessableToken& reporting_source) = 0; 216 217 // Insert endpoints and endpoint groups that have been loaded from the store. 218 // 219 // You must only call this method if context.store() was non-null when you 220 // constructed the cache and persist_clients_across_restarts in your 221 // ReportingPolicy is true. 222 virtual void AddClientsLoadedFromStore( 223 std::vector<ReportingEndpoint> loaded_endpoints, 224 std::vector<CachedReportingEndpointGroup> loaded_endpoint_groups) = 0; 225 226 // Gets endpoints that apply to a delivery for |origin| and |group|. 227 // 228 // First checks for |group| in a client exactly matching |origin|. 229 // If none exists, then checks for |group| in clients for superdomains 230 // of |origin| which have include_subdomains enabled, returning only the 231 // endpoints for the most specific applicable parent origin of |origin|. If 232 // there are multiple origins with that group within the most specific 233 // applicable superdomain, gets endpoints for that group from only one of 234 // them. The group must not be expired. 235 // 236 // For example, given the origin https://foo.bar.baz.com/, the cache 237 // would prioritize returning each potential match below over the ones below 238 // it, for groups with name |group| with include_subdomains enabled: 239 // 1. https://foo.bar.baz.com/ (exact origin match) 240 // 2. https://foo.bar.baz.com:444/ (technically, a superdomain) 241 // 3. https://bar.baz.com/, https://bar.baz.com:444/, etc. (superdomain) 242 // 4. https://baz.com/, https://baz.com:444/, etc. (superdomain) 243 // If both https://bar.baz.com/ and https://bar.baz.com:444/ had a group with 244 // name |group| with include_subdomains enabled, this method would return 245 // endpoints from that group from the earliest-inserted origin. 246 virtual std::vector<ReportingEndpoint> GetCandidateEndpointsForDelivery( 247 const ReportingEndpointGroupKey& group_key) = 0; 248 249 // Gets the status of all clients in the cache, including expired ones, as a 250 // base::Value. 251 virtual base::Value GetClientsAsValue() const = 0; 252 253 // Gets the total number of endpoints in the cache across all origins. 254 virtual size_t GetEndpointCount() const = 0; 255 256 // Flush the contents of the cache to disk, if applicable. 257 virtual void Flush() = 0; 258 259 // Returns all V1 endpoints keyed by origin. 260 virtual base::flat_map<url::Origin, std::vector<ReportingEndpoint>> 261 GetV1ReportingEndpointsByOrigin() const = 0; 262 263 // Returns the endpoint named |endpoint_name| for the reporting source, if it 264 // was configured with the Reporting-Endpoints header, otherwise returns an 265 // invalid ReportingEndpoint. 266 // |reporting_source| must not be empty. 267 virtual ReportingEndpoint GetV1EndpointForTesting( 268 const base::UnguessableToken& reporting_source, 269 const std::string& endpoint_name) const = 0; 270 271 // Finds an endpoint for the given |group_key| and |url|, otherwise returns an 272 // invalid ReportingEndpoint. 273 virtual ReportingEndpoint GetEndpointForTesting( 274 const ReportingEndpointGroupKey& group_key, 275 const GURL& url) const = 0; 276 277 // Returns whether an endpoint group with exactly the given properties exists 278 // in the cache. If |expires| is base::Time(), it will not be checked. 279 virtual bool EndpointGroupExistsForTesting( 280 const ReportingEndpointGroupKey& group_key, 281 OriginSubdomains include_subdomains, 282 base::Time expires) const = 0; 283 284 // Returns whether a client for the given (NIK, Origin) exists. 285 virtual bool ClientExistsForTesting( 286 const NetworkAnonymizationKey& network_anonymization_key, 287 const url::Origin& origin) const = 0; 288 289 // Returns number of endpoint groups. 290 virtual size_t GetEndpointGroupCountForTesting() const = 0; 291 292 // Returns number of endpoint groups. 293 virtual size_t GetClientCountForTesting() const = 0; 294 295 // Returns number of reporting source tokens associated with endpoints. 296 virtual size_t GetReportingSourceCountForTesting() const = 0; 297 298 // Sets an endpoint with the given properties in a group with the given 299 // properties, bypassing header parsing. Note that the endpoint is not 300 // guaranteed to exist in the cache after calling this function, if endpoint 301 // eviction is triggered. Unlike the AddOrUpdate*() methods used in header 302 // parsing, this method inserts or updates a single endpoint while leaving the 303 // existing configuration for that origin intact. 304 virtual void SetEndpointForTesting(const ReportingEndpointGroupKey& group_key, 305 const GURL& url, 306 OriginSubdomains include_subdomains, 307 base::Time expires, 308 int priority, 309 int weight) = 0; 310 311 // Sets a V1 named endpoint with the given key for `reporting_source`, 312 // bypassing header parsing. This method inserts a single endpoint while 313 // leaving the existing configuration for that source intact. If any 314 // endpoints already exist for this source, then `isolation_info` must 315 // match the value that was previously associated with it. 316 virtual void SetV1EndpointForTesting( 317 const ReportingEndpointGroupKey& group_key, 318 const base::UnguessableToken& reporting_source, 319 const IsolationInfo& isolation_info, 320 const GURL& url) = 0; 321 322 // Gets the isolation info associated with `reporting_source`, used when 323 // determining which credentials to send for a given report. If 324 // `reporting_source` is nullopt, as when a report is being delivered to a V0 325 // reporting endpoint group, this always will return an empty site. 326 virtual IsolationInfo GetIsolationInfoForEndpoint( 327 const ReportingEndpoint& endpoint) const = 0; 328 }; 329 330 // Persistent storage for Reporting reports and clients. 331 class NET_EXPORT ReportingCache::PersistentReportingStore { 332 public: 333 using ReportingClientsLoadedCallback = 334 base::OnceCallback<void(std::vector<ReportingEndpoint>, 335 std::vector<CachedReportingEndpointGroup>)>; 336 337 PersistentReportingStore() = default; 338 339 PersistentReportingStore(const PersistentReportingStore&) = delete; 340 PersistentReportingStore& operator=(const PersistentReportingStore&) = delete; 341 342 virtual ~PersistentReportingStore() = default; 343 344 // Initializes the store and retrieves stored endpoints and endpoint groups. 345 // Called only once at startup. 346 virtual void LoadReportingClients( 347 ReportingClientsLoadedCallback loaded_callback) = 0; 348 349 // Adds an endpoint to the store. 350 virtual void AddReportingEndpoint(const ReportingEndpoint& endpoint) = 0; 351 // Adds an endpoint group to the store. 352 virtual void AddReportingEndpointGroup( 353 const CachedReportingEndpointGroup& group) = 0; 354 355 // Updates the access time of an endpoint group in the store. 356 virtual void UpdateReportingEndpointGroupAccessTime( 357 const CachedReportingEndpointGroup& group) = 0; 358 359 // Updates the details of an endpoint in the store. 360 virtual void UpdateReportingEndpointDetails( 361 const ReportingEndpoint& endpoint) = 0; 362 // Updates the details of an endpoint group in the store. 363 virtual void UpdateReportingEndpointGroupDetails( 364 const CachedReportingEndpointGroup& group) = 0; 365 366 // Deletes an endpoint from the store. 367 virtual void DeleteReportingEndpoint(const ReportingEndpoint& endpoint) = 0; 368 // Deletes an endpoint group from the store. 369 virtual void DeleteReportingEndpointGroup( 370 const CachedReportingEndpointGroup& group) = 0; 371 372 // TODO(chlily): methods to load, add, and delete reports will be added. 373 374 // Flushes the store. 375 virtual void Flush() = 0; 376 }; 377 378 } // namespace net 379 380 #endif // NET_REPORTING_REPORTING_CACHE_H_ 381