// Copyright 2024 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "net/device_bound_sessions/session_inclusion_rules.h" #include #include "base/check.h" #include "base/containers/adapters.h" #include "base/logging.h" #include "base/strings/string_util.h" #include "net/base/ip_address.h" #include "net/base/registry_controlled_domains/registry_controlled_domain.h" #include "net/base/scheme_host_port_matcher_result.h" #include "net/base/scheme_host_port_matcher_rule.h" #include "net/base/url_util.h" #include "net/device_bound_sessions/proto/storage.pb.h" #include "net/device_bound_sessions/session.h" namespace net::device_bound_sessions { namespace { bool IsIncludeSiteAllowed(const url::Origin& origin) { // This is eTLD+1 const std::string domain_and_registry = registry_controlled_domains::GetDomainAndRegistry( origin, registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); return !domain_and_registry.empty() && origin.host() == domain_and_registry; } SessionInclusionRules::InclusionResult AsInclusionResult(bool should_include) { return should_include ? SessionInclusionRules::kInclude : SessionInclusionRules::kExclude; } // Types of characters valid in IPv6 addresses. // Derived from logic in url::DoIPv6AddressToNumber() and url::DoParseIPv6(). bool IsValidIPv6Char(char c) { return c == ':' || base::IsHexDigit(c) || c == '.' || // 'x' or 'X' is used in IPv4 to denote hex values, and can be used in // parts of IPv6 addresses. c == 'x' || c == 'X'; } proto::RuleType GetRuleTypeProto( SessionInclusionRules::InclusionResult result) { return result == SessionInclusionRules::InclusionResult::kInclude ? proto::RuleType::INCLUDE : proto::RuleType::EXCLUDE; } std::optional GetInclusionResult( proto::RuleType proto) { if (proto == proto::RuleType::INCLUDE) { return SessionInclusionRules::InclusionResult::kInclude; } else if (proto == proto::RuleType::EXCLUDE) { return SessionInclusionRules::InclusionResult::kExclude; } // proto = RULE_TYPE_UNSPECIFIED return std::nullopt; } } // namespace // Encapsulates a single rule which applies to the request URL. struct SessionInclusionRules::UrlRule { // URLs that match the rule will be subject to inclusion or exclusion as // specified by the type. InclusionResult rule_type; // Domain or pattern that the URL must match. This must either be a // full domain (host piece) or a pattern containing a wildcard in the // most-specific (leftmost) label position followed by a dot and a non-eTLD. // The matched strings follow SchemeHostPortMatcherRule's logic, but with // some extra requirements for validity: // - A leading wildcard * must be followed by a dot, so "*ple.com" is not // acceptable. // - "*.com" is not accepted because com is an eTLD. Same with "*.co.uk" and // similar. // - Multiple wildcards are not allowed. // - Internal wildcards are not allowed, so "sub.*.example.com" does not // work because the wildcard is not the leftmost component. // - IP addresses also work if specified as the exact host, as described in // SchemeHostPortMatcherRule. std::unique_ptr host_matcher_rule; // Prefix consisting of path components that the URL must match. Must begin // with '/'. Wildcards are not allowed. Simply use "/" to match all paths. std::string path_prefix; friend bool operator==(const UrlRule& lhs, const UrlRule& rhs) { return lhs.rule_type == rhs.rule_type && lhs.path_prefix == rhs.path_prefix && lhs.host_matcher_rule->ToString() == rhs.host_matcher_rule->ToString(); } // Returns whether the given `url` matches this rule. Note that this // function does not check the scheme and port portions of the URL/origin. bool MatchesHostAndPath(const GURL& url) const; }; SessionInclusionRules::SessionInclusionRules(const url::Origin& origin) : origin_(origin), may_include_site_(IsIncludeSiteAllowed(origin)) {} SessionInclusionRules::SessionInclusionRules() = default; SessionInclusionRules::~SessionInclusionRules() = default; SessionInclusionRules::SessionInclusionRules(SessionInclusionRules&& other) = default; SessionInclusionRules& SessionInclusionRules::operator=( SessionInclusionRules&& other) = default; bool SessionInclusionRules::operator==( const SessionInclusionRules& other) const = default; void SessionInclusionRules::SetIncludeSite(bool include_site) { if (!may_include_site_) { return; } if (!include_site) { include_site_.reset(); return; } include_site_ = SchemefulSite(origin_); } bool SessionInclusionRules::AddUrlRuleIfValid(InclusionResult rule_type, const std::string& host_pattern, const std::string& path_prefix) { if (path_prefix.empty() || path_prefix.front() != '/') { return false; } if (host_pattern.empty()) { return false; } // If only the origin is allowed, the host_pattern must be precisely its host. bool host_pattern_is_host = host_pattern == origin_.host(); if (!may_include_site_ && !host_pattern_is_host) { return false; } // Don't allow '*' anywhere besides the first character of the pattern. size_t star_pos = host_pattern.rfind('*'); if (star_pos != std::string::npos && star_pos != 0) { return false; } // Only allow wildcard if immediately followed by a dot. bool has_initial_wildcard_label = host_pattern.starts_with("*."); if (star_pos != std::string::npos && !has_initial_wildcard_label) { return false; } std::string_view hostlike_part{host_pattern}; if (has_initial_wildcard_label) { hostlike_part = hostlike_part.substr(2); } bool presumed_ipv6 = host_pattern.front() == '['; if (presumed_ipv6 && host_pattern.back() != ']') { return false; } // Allow only specific characters into SchemeHostPortMatcherRule parsing. if (presumed_ipv6) { // Leave out the brackets, but everything else must be a valid char. std::string_view ipv6_address{host_pattern.begin() + 1, host_pattern.end() - 1}; if (std::find_if_not(ipv6_address.begin(), ipv6_address.end(), &IsValidIPv6Char) != ipv6_address.end()) { return false; } } else { // Note that this excludes a ':' character specifying a port number, even // though SchemeHostPortMatcherRule supports it. Same for '/' (for the // scheme or an IP block). // TODO(chlily): Consider supporting port numbers. if (!IsCanonicalizedHostCompliant(hostlike_part)) { return false; } } // Delegate the rest of the parsing to SchemeHostPortMatcherRule. std::unique_ptr host_matcher_rule = SchemeHostPortMatcherRule::FromUntrimmedRawString(host_pattern); if (!host_matcher_rule) { return false; } // Now that we know the host_pattern is at least the right shape, validate the // remaining restrictions. // Skip the eTLD lookups if the host pattern is an exact match. if (host_pattern_is_host) { url_rules_.emplace_back(rule_type, std::move(host_matcher_rule), path_prefix); return true; } std::string hostlike_part_domain = registry_controlled_domains::GetDomainAndRegistry( hostlike_part, registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); // If there is a wildcard, we require the pattern to be a normal domain and // not an eTLD. if (has_initial_wildcard_label && hostlike_part_domain.empty()) { return false; } // Validate that the host pattern is on the right origin/site. // TODO(chlily): Perhaps we should use a cached value, but surely URL rule // parsing only happens a small number of times. std::string domain_and_registry = registry_controlled_domains::GetDomainAndRegistry( origin_, registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); // The origin_ must have an eTLD+1, because if it didn't, then we'd know that // !may_include_site_, and that would mean we'd have already returned early // and would never get here. CHECK(!domain_and_registry.empty()); if (hostlike_part_domain != domain_and_registry) { return false; } url_rules_.emplace_back(rule_type, std::move(host_matcher_rule), path_prefix); return true; } SessionInclusionRules::InclusionResult SessionInclusionRules::EvaluateRequestUrl(const GURL& url) const { bool same_origin = origin_.IsSameOriginWith(url); if (!may_include_site_ && !same_origin) { return SessionInclusionRules::kExclude; } // Evaluate against specific rules, most-recently-added first. for (const UrlRule& rule : base::Reversed(url_rules_)) { // The rule covers host and path, and scheme is checked too. We don't check // port here, because in the !may_include_site_ case that's already covered // by being same-origin, and in the may_include_site_ case it's ok for the // port to differ. if (rule.MatchesHostAndPath(url) && url.scheme_piece() == origin_.scheme()) { return rule.rule_type; } } // None of the specific rules apply. Evaluate against the basic include rule. if (include_site_) { return AsInclusionResult(SchemefulSite(url) == *include_site_); } return AsInclusionResult(same_origin); } bool SessionInclusionRules::UrlRule::MatchesHostAndPath(const GURL& url) const { if (host_matcher_rule->Evaluate(url) == SchemeHostPortMatcherResult::kNoMatch) { return false; } std::string_view url_path = url.path_piece(); if (!url_path.starts_with(path_prefix)) { return false; } // We must check the following to prevent a path prefix like "/foo" from // erroneously matching a URL path like "/foobar/baz". There are 2 possible // cases: `url_path` may be the same length as `path_prefix`, or `url_path` // may be longer than `path_prefix`. In the first case, the two paths are // equal and a match has been found. In the second case, we want to know // whether the end of the `path_prefix` represents a full label in the path. // Either the path_prefix string ends in '/' and is explicitly the end of a // label, or the next character of `url_path` beyond the identical portion is // '/'. Otherwise, reject the path as a false (incomplete label) prefix match. CHECK(url_path.length() >= path_prefix.length()); if (url_path.length() > path_prefix.length() && path_prefix.back() != '/' && url_path[path_prefix.length()] != '/') { return false; } return true; } size_t SessionInclusionRules::num_url_rules_for_testing() const { return url_rules_.size(); } proto::SessionInclusionRules SessionInclusionRules::ToProto() const { proto::SessionInclusionRules proto; proto.set_origin(origin_.Serialize()); proto.set_do_include_site(include_site_.has_value()); // Note that the ordering of the rules (in terms of when they were added to // the session) is preserved in the proto. Preserving the ordering is // important to handle rules overlap - the latest rule wins. for (auto& rule : url_rules_) { proto::UrlRule rule_proto; rule_proto.set_rule_type(GetRuleTypeProto(rule.rule_type)); rule_proto.set_host_matcher_rule(rule.host_matcher_rule->ToString()); rule_proto.set_path_prefix(rule.path_prefix); proto.mutable_url_rules()->Add(std::move(rule_proto)); } return proto; } // static: std::unique_ptr SessionInclusionRules::CreateFromProto( const proto::SessionInclusionRules& proto) { if (!proto.has_origin() || !proto.has_do_include_site()) { return nullptr; } url::Origin origin = url::Origin::Create(GURL(proto.origin())); if (origin.opaque()) { DLOG(ERROR) << "proto origin parse error: " << origin.GetDebugString(); return nullptr; } auto result = std::make_unique(origin); result->SetIncludeSite(proto.do_include_site()); for (const auto& rule_proto : proto.url_rules()) { std::optional rule_type = GetInclusionResult(rule_proto.rule_type()); if (!rule_type.has_value() || !result->AddUrlRuleIfValid(*rule_type, rule_proto.host_matcher_rule(), rule_proto.path_prefix())) { DLOG(ERROR) << "proto rule parse error: " << "type:" << proto::RuleType_Name(rule_proto.rule_type()) << " " << "matcher:" << rule_proto.host_matcher_rule() << " " << "prefix:" << rule_proto.path_prefix(); return nullptr; } } return result; } } // namespace net::device_bound_sessions