• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2024 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/device_bound_sessions/session_inclusion_rules.h"
6 
7 #include <string_view>
8 
9 #include "base/check.h"
10 #include "base/containers/adapters.h"
11 #include "base/logging.h"
12 #include "base/strings/string_util.h"
13 #include "net/base/ip_address.h"
14 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
15 #include "net/base/scheme_host_port_matcher_result.h"
16 #include "net/base/scheme_host_port_matcher_rule.h"
17 #include "net/base/url_util.h"
18 #include "net/device_bound_sessions/proto/storage.pb.h"
19 #include "net/device_bound_sessions/session.h"
20 
21 namespace net::device_bound_sessions {
22 
23 namespace {
24 
IsIncludeSiteAllowed(const url::Origin & origin)25 bool IsIncludeSiteAllowed(const url::Origin& origin) {
26   // This is eTLD+1
27   const std::string domain_and_registry =
28       registry_controlled_domains::GetDomainAndRegistry(
29           origin, registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
30   return !domain_and_registry.empty() && origin.host() == domain_and_registry;
31 }
32 
AsInclusionResult(bool should_include)33 SessionInclusionRules::InclusionResult AsInclusionResult(bool should_include) {
34   return should_include ? SessionInclusionRules::kInclude
35                         : SessionInclusionRules::kExclude;
36 }
37 
38 // Types of characters valid in IPv6 addresses.
39 // Derived from logic in url::DoIPv6AddressToNumber() and url::DoParseIPv6().
IsValidIPv6Char(char c)40 bool IsValidIPv6Char(char c) {
41   return c == ':' || base::IsHexDigit(c) || c == '.' ||
42          // 'x' or 'X' is used in IPv4 to denote hex values, and can be used in
43          // parts of IPv6 addresses.
44          c == 'x' || c == 'X';
45 }
46 
GetRuleTypeProto(SessionInclusionRules::InclusionResult result)47 proto::RuleType GetRuleTypeProto(
48     SessionInclusionRules::InclusionResult result) {
49   return result == SessionInclusionRules::InclusionResult::kInclude
50              ? proto::RuleType::INCLUDE
51              : proto::RuleType::EXCLUDE;
52 }
53 
GetInclusionResult(proto::RuleType proto)54 std::optional<SessionInclusionRules::InclusionResult> GetInclusionResult(
55     proto::RuleType proto) {
56   if (proto == proto::RuleType::INCLUDE) {
57     return SessionInclusionRules::InclusionResult::kInclude;
58   } else if (proto == proto::RuleType::EXCLUDE) {
59     return SessionInclusionRules::InclusionResult::kExclude;
60   }
61 
62   // proto = RULE_TYPE_UNSPECIFIED
63   return std::nullopt;
64 }
65 
66 }  // namespace
67 
68 // Encapsulates a single rule which applies to the request URL.
69 struct SessionInclusionRules::UrlRule {
70   // URLs that match the rule will be subject to inclusion or exclusion as
71   // specified by the type.
72   InclusionResult rule_type;
73 
74   // Domain or pattern that the URL must match. This must either be a
75   // full domain (host piece) or a pattern containing a wildcard in the
76   // most-specific (leftmost) label position followed by a dot and a non-eTLD.
77   // The matched strings follow SchemeHostPortMatcherRule's logic, but with
78   // some extra requirements for validity:
79   // - A leading wildcard * must be followed by a dot, so "*ple.com" is not
80   //   acceptable.
81   // - "*.com" is not accepted because com is an eTLD. Same with "*.co.uk" and
82   //   similar.
83   // - Multiple wildcards are not allowed.
84   // - Internal wildcards are not allowed, so "sub.*.example.com" does not
85   //   work because the wildcard is not the leftmost component.
86   // - IP addresses also work if specified as the exact host, as described in
87   //   SchemeHostPortMatcherRule.
88   std::unique_ptr<SchemeHostPortMatcherRule> host_matcher_rule;
89 
90   // Prefix consisting of path components that the URL must match. Must begin
91   // with '/'. Wildcards are not allowed. Simply use "/" to match all paths.
92   std::string path_prefix;
93 
operator ==(const UrlRule & lhs,const UrlRule & rhs)94   friend bool operator==(const UrlRule& lhs, const UrlRule& rhs) {
95     return lhs.rule_type == rhs.rule_type &&
96            lhs.path_prefix == rhs.path_prefix &&
97            lhs.host_matcher_rule->ToString() ==
98                rhs.host_matcher_rule->ToString();
99   }
100 
101   // Returns whether the given `url` matches this rule. Note that this
102   // function does not check the scheme and port portions of the URL/origin.
103   bool MatchesHostAndPath(const GURL& url) const;
104 };
105 
SessionInclusionRules(const url::Origin & origin)106 SessionInclusionRules::SessionInclusionRules(const url::Origin& origin)
107     : origin_(origin), may_include_site_(IsIncludeSiteAllowed(origin)) {}
108 
109 SessionInclusionRules::SessionInclusionRules() = default;
110 
111 SessionInclusionRules::~SessionInclusionRules() = default;
112 
113 SessionInclusionRules::SessionInclusionRules(SessionInclusionRules&& other) =
114     default;
115 
116 SessionInclusionRules& SessionInclusionRules::operator=(
117     SessionInclusionRules&& other) = default;
118 
119 bool SessionInclusionRules::operator==(
120     const SessionInclusionRules& other) const = default;
121 
SetIncludeSite(bool include_site)122 void SessionInclusionRules::SetIncludeSite(bool include_site) {
123   if (!may_include_site_) {
124     return;
125   }
126 
127   if (!include_site) {
128     include_site_.reset();
129     return;
130   }
131 
132   include_site_ = SchemefulSite(origin_);
133 }
134 
AddUrlRuleIfValid(InclusionResult rule_type,const std::string & host_pattern,const std::string & path_prefix)135 bool SessionInclusionRules::AddUrlRuleIfValid(InclusionResult rule_type,
136                                               const std::string& host_pattern,
137                                               const std::string& path_prefix) {
138   if (path_prefix.empty() || path_prefix.front() != '/') {
139     return false;
140   }
141   if (host_pattern.empty()) {
142     return false;
143   }
144 
145   // If only the origin is allowed, the host_pattern must be precisely its host.
146   bool host_pattern_is_host = host_pattern == origin_.host();
147   if (!may_include_site_ && !host_pattern_is_host) {
148     return false;
149   }
150 
151   // Don't allow '*' anywhere besides the first character of the pattern.
152   size_t star_pos = host_pattern.rfind('*');
153   if (star_pos != std::string::npos && star_pos != 0) {
154     return false;
155   }
156   // Only allow wildcard if immediately followed by a dot.
157   bool has_initial_wildcard_label = host_pattern.starts_with("*.");
158   if (star_pos != std::string::npos && !has_initial_wildcard_label) {
159     return false;
160   }
161 
162   std::string_view hostlike_part{host_pattern};
163   if (has_initial_wildcard_label) {
164     hostlike_part = hostlike_part.substr(2);
165   }
166 
167   bool presumed_ipv6 = host_pattern.front() == '[';
168   if (presumed_ipv6 && host_pattern.back() != ']') {
169     return false;
170   }
171 
172   // Allow only specific characters into SchemeHostPortMatcherRule parsing.
173   if (presumed_ipv6) {
174     // Leave out the brackets, but everything else must be a valid char.
175     std::string_view ipv6_address{host_pattern.begin() + 1,
176                                   host_pattern.end() - 1};
177     if (std::find_if_not(ipv6_address.begin(), ipv6_address.end(),
178                          &IsValidIPv6Char) != ipv6_address.end()) {
179       return false;
180     }
181   } else {
182     // Note that this excludes a ':' character specifying a port number, even
183     // though SchemeHostPortMatcherRule supports it. Same for '/' (for the
184     // scheme or an IP block).
185     // TODO(chlily): Consider supporting port numbers.
186     if (!IsCanonicalizedHostCompliant(hostlike_part)) {
187       return false;
188     }
189   }
190 
191   // Delegate the rest of the parsing to SchemeHostPortMatcherRule.
192   std::unique_ptr<SchemeHostPortMatcherRule> host_matcher_rule =
193       SchemeHostPortMatcherRule::FromUntrimmedRawString(host_pattern);
194   if (!host_matcher_rule) {
195     return false;
196   }
197 
198   // Now that we know the host_pattern is at least the right shape, validate the
199   // remaining restrictions.
200 
201   // Skip the eTLD lookups if the host pattern is an exact match.
202   if (host_pattern_is_host) {
203     url_rules_.emplace_back(rule_type, std::move(host_matcher_rule),
204                             path_prefix);
205     return true;
206   }
207 
208   std::string hostlike_part_domain =
209       registry_controlled_domains::GetDomainAndRegistry(
210           hostlike_part,
211           registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
212   // If there is a wildcard, we require the pattern to be a normal domain and
213   // not an eTLD.
214   if (has_initial_wildcard_label && hostlike_part_domain.empty()) {
215     return false;
216   }
217 
218   // Validate that the host pattern is on the right origin/site.
219   // TODO(chlily): Perhaps we should use a cached value, but surely URL rule
220   // parsing only happens a small number of times.
221   std::string domain_and_registry =
222       registry_controlled_domains::GetDomainAndRegistry(
223           origin_, registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
224   // The origin_ must have an eTLD+1, because if it didn't, then we'd know that
225   // !may_include_site_, and that would mean we'd have already returned early
226   // and would never get here.
227   CHECK(!domain_and_registry.empty());
228   if (hostlike_part_domain != domain_and_registry) {
229     return false;
230   }
231 
232   url_rules_.emplace_back(rule_type, std::move(host_matcher_rule), path_prefix);
233   return true;
234 }
235 
236 SessionInclusionRules::InclusionResult
EvaluateRequestUrl(const GURL & url) const237 SessionInclusionRules::EvaluateRequestUrl(const GURL& url) const {
238   bool same_origin = origin_.IsSameOriginWith(url);
239   if (!may_include_site_ && !same_origin) {
240     return SessionInclusionRules::kExclude;
241   }
242 
243   // Evaluate against specific rules, most-recently-added first.
244   for (const UrlRule& rule : base::Reversed(url_rules_)) {
245     // The rule covers host and path, and scheme is checked too. We don't check
246     // port here, because in the !may_include_site_ case that's already covered
247     // by being same-origin, and in the may_include_site_ case it's ok for the
248     // port to differ.
249     if (rule.MatchesHostAndPath(url) &&
250         url.scheme_piece() == origin_.scheme()) {
251       return rule.rule_type;
252     }
253   }
254 
255   // None of the specific rules apply. Evaluate against the basic include rule.
256   if (include_site_) {
257     return AsInclusionResult(SchemefulSite(url) == *include_site_);
258   }
259   return AsInclusionResult(same_origin);
260 }
261 
MatchesHostAndPath(const GURL & url) const262 bool SessionInclusionRules::UrlRule::MatchesHostAndPath(const GURL& url) const {
263   if (host_matcher_rule->Evaluate(url) ==
264       SchemeHostPortMatcherResult::kNoMatch) {
265     return false;
266   }
267 
268   std::string_view url_path = url.path_piece();
269   if (!url_path.starts_with(path_prefix)) {
270     return false;
271   }
272   // We must check the following to prevent a path prefix like "/foo" from
273   // erroneously matching a URL path like "/foobar/baz". There are 2 possible
274   // cases: `url_path` may be the same length as `path_prefix`, or `url_path`
275   // may be longer than `path_prefix`. In the first case, the two paths are
276   // equal and a match has been found. In the second case, we want to know
277   // whether the end of the `path_prefix` represents a full label in the path.
278   // Either the path_prefix string ends in '/' and is explicitly the end of a
279   // label, or the next character of `url_path` beyond the identical portion is
280   // '/'. Otherwise, reject the path as a false (incomplete label) prefix match.
281   CHECK(url_path.length() >= path_prefix.length());
282   if (url_path.length() > path_prefix.length() && path_prefix.back() != '/' &&
283       url_path[path_prefix.length()] != '/') {
284     return false;
285   }
286 
287   return true;
288 }
289 
num_url_rules_for_testing() const290 size_t SessionInclusionRules::num_url_rules_for_testing() const {
291   return url_rules_.size();
292 }
293 
ToProto() const294 proto::SessionInclusionRules SessionInclusionRules::ToProto() const {
295   proto::SessionInclusionRules proto;
296   proto.set_origin(origin_.Serialize());
297   proto.set_do_include_site(include_site_.has_value());
298 
299   // Note that the ordering of the rules (in terms of when they were added to
300   // the session) is preserved in the proto. Preserving the ordering is
301   // important to handle rules overlap - the latest rule wins.
302   for (auto& rule : url_rules_) {
303     proto::UrlRule rule_proto;
304     rule_proto.set_rule_type(GetRuleTypeProto(rule.rule_type));
305     rule_proto.set_host_matcher_rule(rule.host_matcher_rule->ToString());
306     rule_proto.set_path_prefix(rule.path_prefix);
307     proto.mutable_url_rules()->Add(std::move(rule_proto));
308   }
309 
310   return proto;
311 }
312 
313 // static:
CreateFromProto(const proto::SessionInclusionRules & proto)314 std::unique_ptr<SessionInclusionRules> SessionInclusionRules::CreateFromProto(
315     const proto::SessionInclusionRules& proto) {
316   if (!proto.has_origin() || !proto.has_do_include_site()) {
317     return nullptr;
318   }
319   url::Origin origin = url::Origin::Create(GURL(proto.origin()));
320   if (origin.opaque()) {
321     DLOG(ERROR) << "proto origin parse error: " << origin.GetDebugString();
322     return nullptr;
323   }
324 
325   auto result = std::make_unique<SessionInclusionRules>(origin);
326   result->SetIncludeSite(proto.do_include_site());
327   for (const auto& rule_proto : proto.url_rules()) {
328     std::optional<InclusionResult> rule_type =
329         GetInclusionResult(rule_proto.rule_type());
330     if (!rule_type.has_value() ||
331         !result->AddUrlRuleIfValid(*rule_type, rule_proto.host_matcher_rule(),
332                                    rule_proto.path_prefix())) {
333       DLOG(ERROR) << "proto rule parse error: " << "type:"
334                   << proto::RuleType_Name(rule_proto.rule_type()) << " "
335                   << "matcher:" << rule_proto.host_matcher_rule() << " "
336                   << "prefix:" << rule_proto.path_prefix();
337       return nullptr;
338     }
339   }
340 
341   return result;
342 }
343 
344 }  // namespace net::device_bound_sessions
345