1 // Copyright 2024 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/device_bound_sessions/session_inclusion_rules.h"
6
7 #include <string_view>
8
9 #include "base/check.h"
10 #include "base/containers/adapters.h"
11 #include "base/logging.h"
12 #include "base/strings/string_util.h"
13 #include "net/base/ip_address.h"
14 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
15 #include "net/base/scheme_host_port_matcher_result.h"
16 #include "net/base/scheme_host_port_matcher_rule.h"
17 #include "net/base/url_util.h"
18 #include "net/device_bound_sessions/proto/storage.pb.h"
19 #include "net/device_bound_sessions/session.h"
20
21 namespace net::device_bound_sessions {
22
23 namespace {
24
IsIncludeSiteAllowed(const url::Origin & origin)25 bool IsIncludeSiteAllowed(const url::Origin& origin) {
26 // This is eTLD+1
27 const std::string domain_and_registry =
28 registry_controlled_domains::GetDomainAndRegistry(
29 origin, registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
30 return !domain_and_registry.empty() && origin.host() == domain_and_registry;
31 }
32
AsInclusionResult(bool should_include)33 SessionInclusionRules::InclusionResult AsInclusionResult(bool should_include) {
34 return should_include ? SessionInclusionRules::kInclude
35 : SessionInclusionRules::kExclude;
36 }
37
38 // Types of characters valid in IPv6 addresses.
39 // Derived from logic in url::DoIPv6AddressToNumber() and url::DoParseIPv6().
IsValidIPv6Char(char c)40 bool IsValidIPv6Char(char c) {
41 return c == ':' || base::IsHexDigit(c) || c == '.' ||
42 // 'x' or 'X' is used in IPv4 to denote hex values, and can be used in
43 // parts of IPv6 addresses.
44 c == 'x' || c == 'X';
45 }
46
GetRuleTypeProto(SessionInclusionRules::InclusionResult result)47 proto::RuleType GetRuleTypeProto(
48 SessionInclusionRules::InclusionResult result) {
49 return result == SessionInclusionRules::InclusionResult::kInclude
50 ? proto::RuleType::INCLUDE
51 : proto::RuleType::EXCLUDE;
52 }
53
GetInclusionResult(proto::RuleType proto)54 std::optional<SessionInclusionRules::InclusionResult> GetInclusionResult(
55 proto::RuleType proto) {
56 if (proto == proto::RuleType::INCLUDE) {
57 return SessionInclusionRules::InclusionResult::kInclude;
58 } else if (proto == proto::RuleType::EXCLUDE) {
59 return SessionInclusionRules::InclusionResult::kExclude;
60 }
61
62 // proto = RULE_TYPE_UNSPECIFIED
63 return std::nullopt;
64 }
65
66 } // namespace
67
68 // Encapsulates a single rule which applies to the request URL.
69 struct SessionInclusionRules::UrlRule {
70 // URLs that match the rule will be subject to inclusion or exclusion as
71 // specified by the type.
72 InclusionResult rule_type;
73
74 // Domain or pattern that the URL must match. This must either be a
75 // full domain (host piece) or a pattern containing a wildcard in the
76 // most-specific (leftmost) label position followed by a dot and a non-eTLD.
77 // The matched strings follow SchemeHostPortMatcherRule's logic, but with
78 // some extra requirements for validity:
79 // - A leading wildcard * must be followed by a dot, so "*ple.com" is not
80 // acceptable.
81 // - "*.com" is not accepted because com is an eTLD. Same with "*.co.uk" and
82 // similar.
83 // - Multiple wildcards are not allowed.
84 // - Internal wildcards are not allowed, so "sub.*.example.com" does not
85 // work because the wildcard is not the leftmost component.
86 // - IP addresses also work if specified as the exact host, as described in
87 // SchemeHostPortMatcherRule.
88 std::unique_ptr<SchemeHostPortMatcherRule> host_matcher_rule;
89
90 // Prefix consisting of path components that the URL must match. Must begin
91 // with '/'. Wildcards are not allowed. Simply use "/" to match all paths.
92 std::string path_prefix;
93
operator ==(const UrlRule & lhs,const UrlRule & rhs)94 friend bool operator==(const UrlRule& lhs, const UrlRule& rhs) {
95 return lhs.rule_type == rhs.rule_type &&
96 lhs.path_prefix == rhs.path_prefix &&
97 lhs.host_matcher_rule->ToString() ==
98 rhs.host_matcher_rule->ToString();
99 }
100
101 // Returns whether the given `url` matches this rule. Note that this
102 // function does not check the scheme and port portions of the URL/origin.
103 bool MatchesHostAndPath(const GURL& url) const;
104 };
105
SessionInclusionRules(const url::Origin & origin)106 SessionInclusionRules::SessionInclusionRules(const url::Origin& origin)
107 : origin_(origin), may_include_site_(IsIncludeSiteAllowed(origin)) {}
108
109 SessionInclusionRules::SessionInclusionRules() = default;
110
111 SessionInclusionRules::~SessionInclusionRules() = default;
112
113 SessionInclusionRules::SessionInclusionRules(SessionInclusionRules&& other) =
114 default;
115
116 SessionInclusionRules& SessionInclusionRules::operator=(
117 SessionInclusionRules&& other) = default;
118
119 bool SessionInclusionRules::operator==(
120 const SessionInclusionRules& other) const = default;
121
SetIncludeSite(bool include_site)122 void SessionInclusionRules::SetIncludeSite(bool include_site) {
123 if (!may_include_site_) {
124 return;
125 }
126
127 if (!include_site) {
128 include_site_.reset();
129 return;
130 }
131
132 include_site_ = SchemefulSite(origin_);
133 }
134
AddUrlRuleIfValid(InclusionResult rule_type,const std::string & host_pattern,const std::string & path_prefix)135 bool SessionInclusionRules::AddUrlRuleIfValid(InclusionResult rule_type,
136 const std::string& host_pattern,
137 const std::string& path_prefix) {
138 if (path_prefix.empty() || path_prefix.front() != '/') {
139 return false;
140 }
141 if (host_pattern.empty()) {
142 return false;
143 }
144
145 // If only the origin is allowed, the host_pattern must be precisely its host.
146 bool host_pattern_is_host = host_pattern == origin_.host();
147 if (!may_include_site_ && !host_pattern_is_host) {
148 return false;
149 }
150
151 // Don't allow '*' anywhere besides the first character of the pattern.
152 size_t star_pos = host_pattern.rfind('*');
153 if (star_pos != std::string::npos && star_pos != 0) {
154 return false;
155 }
156 // Only allow wildcard if immediately followed by a dot.
157 bool has_initial_wildcard_label = host_pattern.starts_with("*.");
158 if (star_pos != std::string::npos && !has_initial_wildcard_label) {
159 return false;
160 }
161
162 std::string_view hostlike_part{host_pattern};
163 if (has_initial_wildcard_label) {
164 hostlike_part = hostlike_part.substr(2);
165 }
166
167 bool presumed_ipv6 = host_pattern.front() == '[';
168 if (presumed_ipv6 && host_pattern.back() != ']') {
169 return false;
170 }
171
172 // Allow only specific characters into SchemeHostPortMatcherRule parsing.
173 if (presumed_ipv6) {
174 // Leave out the brackets, but everything else must be a valid char.
175 std::string_view ipv6_address{host_pattern.begin() + 1,
176 host_pattern.end() - 1};
177 if (std::find_if_not(ipv6_address.begin(), ipv6_address.end(),
178 &IsValidIPv6Char) != ipv6_address.end()) {
179 return false;
180 }
181 } else {
182 // Note that this excludes a ':' character specifying a port number, even
183 // though SchemeHostPortMatcherRule supports it. Same for '/' (for the
184 // scheme or an IP block).
185 // TODO(chlily): Consider supporting port numbers.
186 if (!IsCanonicalizedHostCompliant(hostlike_part)) {
187 return false;
188 }
189 }
190
191 // Delegate the rest of the parsing to SchemeHostPortMatcherRule.
192 std::unique_ptr<SchemeHostPortMatcherRule> host_matcher_rule =
193 SchemeHostPortMatcherRule::FromUntrimmedRawString(host_pattern);
194 if (!host_matcher_rule) {
195 return false;
196 }
197
198 // Now that we know the host_pattern is at least the right shape, validate the
199 // remaining restrictions.
200
201 // Skip the eTLD lookups if the host pattern is an exact match.
202 if (host_pattern_is_host) {
203 url_rules_.emplace_back(rule_type, std::move(host_matcher_rule),
204 path_prefix);
205 return true;
206 }
207
208 std::string hostlike_part_domain =
209 registry_controlled_domains::GetDomainAndRegistry(
210 hostlike_part,
211 registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
212 // If there is a wildcard, we require the pattern to be a normal domain and
213 // not an eTLD.
214 if (has_initial_wildcard_label && hostlike_part_domain.empty()) {
215 return false;
216 }
217
218 // Validate that the host pattern is on the right origin/site.
219 // TODO(chlily): Perhaps we should use a cached value, but surely URL rule
220 // parsing only happens a small number of times.
221 std::string domain_and_registry =
222 registry_controlled_domains::GetDomainAndRegistry(
223 origin_, registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
224 // The origin_ must have an eTLD+1, because if it didn't, then we'd know that
225 // !may_include_site_, and that would mean we'd have already returned early
226 // and would never get here.
227 CHECK(!domain_and_registry.empty());
228 if (hostlike_part_domain != domain_and_registry) {
229 return false;
230 }
231
232 url_rules_.emplace_back(rule_type, std::move(host_matcher_rule), path_prefix);
233 return true;
234 }
235
236 SessionInclusionRules::InclusionResult
EvaluateRequestUrl(const GURL & url) const237 SessionInclusionRules::EvaluateRequestUrl(const GURL& url) const {
238 bool same_origin = origin_.IsSameOriginWith(url);
239 if (!may_include_site_ && !same_origin) {
240 return SessionInclusionRules::kExclude;
241 }
242
243 // Evaluate against specific rules, most-recently-added first.
244 for (const UrlRule& rule : base::Reversed(url_rules_)) {
245 // The rule covers host and path, and scheme is checked too. We don't check
246 // port here, because in the !may_include_site_ case that's already covered
247 // by being same-origin, and in the may_include_site_ case it's ok for the
248 // port to differ.
249 if (rule.MatchesHostAndPath(url) &&
250 url.scheme_piece() == origin_.scheme()) {
251 return rule.rule_type;
252 }
253 }
254
255 // None of the specific rules apply. Evaluate against the basic include rule.
256 if (include_site_) {
257 return AsInclusionResult(SchemefulSite(url) == *include_site_);
258 }
259 return AsInclusionResult(same_origin);
260 }
261
MatchesHostAndPath(const GURL & url) const262 bool SessionInclusionRules::UrlRule::MatchesHostAndPath(const GURL& url) const {
263 if (host_matcher_rule->Evaluate(url) ==
264 SchemeHostPortMatcherResult::kNoMatch) {
265 return false;
266 }
267
268 std::string_view url_path = url.path_piece();
269 if (!url_path.starts_with(path_prefix)) {
270 return false;
271 }
272 // We must check the following to prevent a path prefix like "/foo" from
273 // erroneously matching a URL path like "/foobar/baz". There are 2 possible
274 // cases: `url_path` may be the same length as `path_prefix`, or `url_path`
275 // may be longer than `path_prefix`. In the first case, the two paths are
276 // equal and a match has been found. In the second case, we want to know
277 // whether the end of the `path_prefix` represents a full label in the path.
278 // Either the path_prefix string ends in '/' and is explicitly the end of a
279 // label, or the next character of `url_path` beyond the identical portion is
280 // '/'. Otherwise, reject the path as a false (incomplete label) prefix match.
281 CHECK(url_path.length() >= path_prefix.length());
282 if (url_path.length() > path_prefix.length() && path_prefix.back() != '/' &&
283 url_path[path_prefix.length()] != '/') {
284 return false;
285 }
286
287 return true;
288 }
289
num_url_rules_for_testing() const290 size_t SessionInclusionRules::num_url_rules_for_testing() const {
291 return url_rules_.size();
292 }
293
ToProto() const294 proto::SessionInclusionRules SessionInclusionRules::ToProto() const {
295 proto::SessionInclusionRules proto;
296 proto.set_origin(origin_.Serialize());
297 proto.set_do_include_site(include_site_.has_value());
298
299 // Note that the ordering of the rules (in terms of when they were added to
300 // the session) is preserved in the proto. Preserving the ordering is
301 // important to handle rules overlap - the latest rule wins.
302 for (auto& rule : url_rules_) {
303 proto::UrlRule rule_proto;
304 rule_proto.set_rule_type(GetRuleTypeProto(rule.rule_type));
305 rule_proto.set_host_matcher_rule(rule.host_matcher_rule->ToString());
306 rule_proto.set_path_prefix(rule.path_prefix);
307 proto.mutable_url_rules()->Add(std::move(rule_proto));
308 }
309
310 return proto;
311 }
312
313 // static:
CreateFromProto(const proto::SessionInclusionRules & proto)314 std::unique_ptr<SessionInclusionRules> SessionInclusionRules::CreateFromProto(
315 const proto::SessionInclusionRules& proto) {
316 if (!proto.has_origin() || !proto.has_do_include_site()) {
317 return nullptr;
318 }
319 url::Origin origin = url::Origin::Create(GURL(proto.origin()));
320 if (origin.opaque()) {
321 DLOG(ERROR) << "proto origin parse error: " << origin.GetDebugString();
322 return nullptr;
323 }
324
325 auto result = std::make_unique<SessionInclusionRules>(origin);
326 result->SetIncludeSite(proto.do_include_site());
327 for (const auto& rule_proto : proto.url_rules()) {
328 std::optional<InclusionResult> rule_type =
329 GetInclusionResult(rule_proto.rule_type());
330 if (!rule_type.has_value() ||
331 !result->AddUrlRuleIfValid(*rule_type, rule_proto.host_matcher_rule(),
332 rule_proto.path_prefix())) {
333 DLOG(ERROR) << "proto rule parse error: " << "type:"
334 << proto::RuleType_Name(rule_proto.rule_type()) << " "
335 << "matcher:" << rule_proto.host_matcher_rule() << " "
336 << "prefix:" << rule_proto.path_prefix();
337 return nullptr;
338 }
339 }
340
341 return result;
342 }
343
344 } // namespace net::device_bound_sessions
345