1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "quiche/quic/core/http/spdy_server_push_utils.h"
6
7 #include "absl/strings/string_view.h"
8 #include "url/gurl.h"
9
10 using spdy::Http2HeaderBlock;
11
12 namespace quic {
13
14 // static
GetPromisedUrlFromHeaders(const Http2HeaderBlock & headers)15 std::string SpdyServerPushUtils::GetPromisedUrlFromHeaders(
16 const Http2HeaderBlock& headers) {
17 // RFC 7540, Section 8.1.2.3: All HTTP/2 requests MUST include exactly
18 // one valid value for the ":method", ":scheme", and ":path" pseudo-header
19 // fields, unless it is a CONNECT request.
20
21 // RFC 7540, Section 8.2.1: The header fields in PUSH_PROMISE and any
22 // subsequent CONTINUATION frames MUST be a valid and complete set of request
23 // header fields (Section 8.1.2.3). The server MUST include a method in the
24 // ":method" pseudo-header field that is safe and cacheable.
25 //
26 // RFC 7231, Section 4.2.1: Of the request methods defined by this
27 // specification, the GET, HEAD, OPTIONS, and TRACE methods are defined to be
28 // safe.
29 //
30 // RFC 7231, Section 4.2.1: ... this specification defines GET, HEAD, and
31 // POST as cacheable, ...
32 //
33 // So the only methods allowed in a PUSH_PROMISE are GET and HEAD.
34 Http2HeaderBlock::const_iterator it = headers.find(":method");
35 if (it == headers.end() || (it->second != "GET" && it->second != "HEAD")) {
36 return std::string();
37 }
38
39 it = headers.find(":scheme");
40 if (it == headers.end() || it->second.empty()) {
41 return std::string();
42 }
43 absl::string_view scheme = it->second;
44
45 // RFC 7540, Section 8.2: The server MUST include a value in the
46 // ":authority" pseudo-header field for which the server is authoritative
47 // (see Section 10.1).
48 it = headers.find(":authority");
49 if (it == headers.end() || it->second.empty()) {
50 return std::string();
51 }
52 absl::string_view authority = it->second;
53
54 // RFC 7540, Section 8.1.2.3 requires that the ":path" pseudo-header MUST
55 // NOT be empty for "http" or "https" URIs;
56 //
57 // However, to ensure the scheme is consistently canonicalized, that check
58 // is deferred to implementations in QuicUrlUtils::GetPushPromiseUrl().
59 it = headers.find(":path");
60 if (it == headers.end()) {
61 return std::string();
62 }
63 absl::string_view path = it->second;
64
65 return GetPushPromiseUrl(scheme, authority, path);
66 }
67
68 // static
GetPromisedHostNameFromHeaders(const Http2HeaderBlock & headers)69 std::string SpdyServerPushUtils::GetPromisedHostNameFromHeaders(
70 const Http2HeaderBlock& headers) {
71 // TODO(fayang): Consider just checking out the value of the ":authority" key
72 // in headers.
73 return GURL(GetPromisedUrlFromHeaders(headers)).host();
74 }
75
76 // static
PromisedUrlIsValid(const Http2HeaderBlock & headers)77 bool SpdyServerPushUtils::PromisedUrlIsValid(const Http2HeaderBlock& headers) {
78 std::string url(GetPromisedUrlFromHeaders(headers));
79 return !url.empty() && GURL(url).is_valid();
80 }
81
82 // static
GetPushPromiseUrl(absl::string_view scheme,absl::string_view authority,absl::string_view path)83 std::string SpdyServerPushUtils::GetPushPromiseUrl(absl::string_view scheme,
84 absl::string_view authority,
85 absl::string_view path) {
86 // RFC 7540, Section 8.1.2.3: The ":path" pseudo-header field includes the
87 // path and query parts of the target URI (the "path-absolute" production
88 // and optionally a '?' character followed by the "query" production (see
89 // Sections 3.3 and 3.4 of RFC3986). A request in asterisk form includes the
90 // value '*' for the ":path" pseudo-header field.
91 //
92 // This pseudo-header field MUST NOT be empty for "http" or "https" URIs;
93 // "http" or "https" URIs that do not contain a path MUST include a value of
94 // '/'. The exception to this rule is an OPTIONS request for an "http" or
95 // "https" URI that does not include a path component; these MUST include a
96 // ":path" pseudo-header with a value of '*' (see RFC7230, Section 5.3.4).
97 //
98 // In addition to the above restriction from RFC 7540, note that RFC3986
99 // defines the "path-absolute" construction as starting with "/" but not "//".
100 //
101 // RFC 7540, Section 8.2.1: The header fields in PUSH_PROMISE and any
102 // subsequent CONTINUATION frames MUST be a valid and complete set of request
103 // header fields (Section 8.1.2.3). The server MUST include a method in the
104 // ":method" pseudo-header field that is safe and cacheable.
105 //
106 // RFC 7231, Section 4.2.1:
107 // ... this specification defines GET, HEAD, and POST as cacheable, ...
108 //
109 // Since the OPTIONS method is not cacheable, it cannot be the method of a
110 // PUSH_PROMISE. Therefore, the exception mentioned in RFC 7540, Section
111 // 8.1.2.3 about OPTIONS requests does not apply here (i.e. ":path" cannot be
112 // "*").
113 if (path.empty() || path[0] != '/' || (path.size() >= 2 && path[1] == '/')) {
114 return std::string();
115 }
116
117 // Validate the scheme; this is to ensure a scheme of "foo://bar" is not
118 // parsed as a URL of "foo://bar://baz" when combined with a host of "baz".
119 std::string canonical_scheme;
120 url::StdStringCanonOutput canon_scheme_output(&canonical_scheme);
121 url::Component canon_component;
122 url::Component scheme_component(0, scheme.size());
123
124 if (!url::CanonicalizeScheme(scheme.data(), scheme_component,
125 &canon_scheme_output, &canon_component) ||
126 !canon_component.is_nonempty() || canon_component.begin != 0) {
127 return std::string();
128 }
129 canonical_scheme.resize(canon_component.len + 1);
130
131 // Validate the authority; this is to ensure an authority such as
132 // "host/path" is not accepted, as when combined with a scheme like
133 // "http://", could result in a URL of "http://host/path".
134 url::Component auth_component(0, authority.size());
135 url::Component username_component;
136 url::Component password_component;
137 url::Component host_component;
138 url::Component port_component;
139
140 url::ParseAuthority(authority.data(), auth_component, &username_component,
141 &password_component, &host_component, &port_component);
142
143 // RFC 7540, Section 8.1.2.3: The authority MUST NOT include the deprecated
144 // "userinfo" subcomponent for "http" or "https" schemed URIs.
145 //
146 // Note: Although |canonical_scheme| has not yet been checked for that, as
147 // it is performed later in processing, only "http" and "https" schemed
148 // URIs are supported for PUSH.
149 if (username_component.is_valid() || password_component.is_valid()) {
150 return std::string();
151 }
152
153 // Failed parsing or no host present. ParseAuthority() will ensure that
154 // host_component + port_component cover the entire string, if
155 // username_component and password_component are not present.
156 if (!host_component.is_nonempty()) {
157 return std::string();
158 }
159
160 // Validate the port (if present; it's optional).
161 int parsed_port_number = url::PORT_INVALID;
162 if (port_component.is_nonempty()) {
163 parsed_port_number = url::ParsePort(authority.data(), port_component);
164 if (parsed_port_number < 0 && parsed_port_number != url::PORT_UNSPECIFIED) {
165 return std::string();
166 }
167 }
168
169 // Validate the host by attempting to canonicalize it. Invalid characters
170 // will result in a canonicalization failure (e.g. '/')
171 std::string canon_host;
172 url::StdStringCanonOutput canon_host_output(&canon_host);
173 canon_component.reset();
174 if (!url::CanonicalizeHost(authority.data(), host_component,
175 &canon_host_output, &canon_component) ||
176 !canon_component.is_nonempty() || canon_component.begin != 0) {
177 return std::string();
178 }
179
180 // At this point, "authority" has been validated to either be of the form
181 // 'host:port' or 'host', with 'host' being a valid domain or IP address,
182 // and 'port' (if present), being a valid port. Attempt to construct a
183 // URL of just the (scheme, host, port), which should be safe and will not
184 // result in ambiguous parsing.
185 //
186 // This also enforces that all PUSHed URLs are either HTTP or HTTPS-schemed
187 // URIs, consistent with the other restrictions enforced above.
188 //
189 // Note: url::CanonicalizeScheme() will have added the ':' to
190 // |canonical_scheme|.
191 GURL origin_url(canonical_scheme + "//" + std::string(authority));
192 if (!origin_url.is_valid() || !origin_url.SchemeIsHTTPOrHTTPS() ||
193 // The following checks are merely defense in depth.
194 origin_url.has_username() || origin_url.has_password() ||
195 (origin_url.has_path() && origin_url.path_piece() != "/") ||
196 origin_url.has_query() || origin_url.has_ref()) {
197 return std::string();
198 }
199
200 // Attempt to parse the path.
201 std::string spec = origin_url.GetWithEmptyPath().spec();
202 spec.pop_back(); // Remove the '/', as ":path" must contain it.
203 spec.append(std::string(path));
204
205 // Attempt to parse the full URL, with the path as well. Ensure there is no
206 // fragment to the query.
207 GURL full_url(spec);
208 if (!full_url.is_valid() || full_url.has_ref()) {
209 return std::string();
210 }
211
212 return full_url.spec();
213 }
214
215 } // namespace quic
216