1 // Copyright 2021 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/base/proxy_string_util.h"
6
7 #include <string>
8 #include <string_view>
9 #include <vector>
10
11 #include "base/check.h"
12 #include "base/notreached.h"
13 #include "base/strings/strcat.h"
14 #include "base/strings/string_split.h"
15 #include "base/strings/string_util.h"
16 #include "build/buildflag.h"
17 #include "net/base/proxy_server.h"
18 #include "net/base/url_util.h"
19 #include "net/http/http_util.h"
20 #include "net/net_buildflags.h"
21 #include "url/third_party/mozilla/url_parse.h"
22
23 namespace net {
24
25 namespace {
26
27 // Parses the proxy type from a PAC string, to a ProxyServer::Scheme.
28 // This mapping is case-insensitive. If no type could be matched
29 // returns SCHEME_INVALID.
GetSchemeFromPacTypeInternal(std::string_view type)30 ProxyServer::Scheme GetSchemeFromPacTypeInternal(std::string_view type) {
31 if (base::EqualsCaseInsensitiveASCII(type, "proxy")) {
32 return ProxyServer::SCHEME_HTTP;
33 }
34 if (base::EqualsCaseInsensitiveASCII(type, "socks")) {
35 // Default to v4 for compatibility. This is because the SOCKS4 vs SOCKS5
36 // notation didn't originally exist, so if a client returns SOCKS they
37 // really meant SOCKS4.
38 return ProxyServer::SCHEME_SOCKS4;
39 }
40 if (base::EqualsCaseInsensitiveASCII(type, "socks4")) {
41 return ProxyServer::SCHEME_SOCKS4;
42 }
43 if (base::EqualsCaseInsensitiveASCII(type, "socks5")) {
44 return ProxyServer::SCHEME_SOCKS5;
45 }
46 if (base::EqualsCaseInsensitiveASCII(type, "https")) {
47 return ProxyServer::SCHEME_HTTPS;
48 }
49
50 return ProxyServer::SCHEME_INVALID;
51 }
52
ConstructHostPortString(std::string_view hostname,uint16_t port)53 std::string ConstructHostPortString(std::string_view hostname, uint16_t port) {
54 DCHECK(!hostname.empty());
55 DCHECK((hostname.front() == '[' && hostname.back() == ']') ||
56 hostname.find(":") == std::string_view::npos);
57
58 return base::StrCat({hostname, ":", base::NumberToString(port)});
59 }
60
61 std::tuple<std::string_view, std::string_view>
PacResultElementToSchemeAndHostPort(std::string_view pac_result_element)62 PacResultElementToSchemeAndHostPort(std::string_view pac_result_element) {
63 // Trim the leading/trailing whitespace.
64 pac_result_element = HttpUtil::TrimLWS(pac_result_element);
65
66 // Input should match:
67 // ( <type> 1*(LWS) <host-and-port> )
68
69 // Start by finding the first space (if any).
70 size_t space = 0;
71 for (; space < pac_result_element.size(); space++) {
72 if (HttpUtil::IsLWS(pac_result_element[space])) {
73 break;
74 }
75 }
76 // Everything to the left of the space is the scheme.
77 std::string_view scheme = pac_result_element.substr(0, space);
78
79 // And everything to the right of the space is the
80 // <host>[":" <port>].
81 std::string_view host_and_port = pac_result_element.substr(space);
82 return std::make_tuple(scheme, host_and_port);
83 }
84
85 } // namespace
86
PacResultElementToProxyChain(std::string_view pac_result_element)87 ProxyChain PacResultElementToProxyChain(std::string_view pac_result_element) {
88 // Proxy chains are not supported in PAC strings, so this is just parsed
89 // as a single server.
90 auto [type, host_and_port] =
91 PacResultElementToSchemeAndHostPort(pac_result_element);
92 if (base::EqualsCaseInsensitiveASCII(type, "direct") &&
93 host_and_port.empty()) {
94 return ProxyChain::Direct();
95 }
96 return ProxyChain(PacResultElementToProxyServer(pac_result_element));
97 }
98
PacResultElementToProxyServer(std::string_view pac_result_element)99 ProxyServer PacResultElementToProxyServer(std::string_view pac_result_element) {
100 auto [type, host_and_port] =
101 PacResultElementToSchemeAndHostPort(pac_result_element);
102 ProxyServer::Scheme scheme = GetSchemeFromPacTypeInternal(type);
103 return ProxySchemeHostAndPortToProxyServer(scheme, host_and_port);
104 }
105
ProxyServerToPacResultElement(const ProxyServer & proxy_server)106 std::string ProxyServerToPacResultElement(const ProxyServer& proxy_server) {
107 switch (proxy_server.scheme()) {
108 case ProxyServer::SCHEME_HTTP:
109 return std::string("PROXY ") +
110 ConstructHostPortString(proxy_server.GetHost(),
111 proxy_server.GetPort());
112 case ProxyServer::SCHEME_SOCKS4:
113 // For compatibility send SOCKS instead of SOCKS4.
114 return std::string("SOCKS ") +
115 ConstructHostPortString(proxy_server.GetHost(),
116 proxy_server.GetPort());
117 case ProxyServer::SCHEME_SOCKS5:
118 return std::string("SOCKS5 ") +
119 ConstructHostPortString(proxy_server.GetHost(),
120 proxy_server.GetPort());
121 case ProxyServer::SCHEME_HTTPS:
122 return std::string("HTTPS ") +
123 ConstructHostPortString(proxy_server.GetHost(),
124 proxy_server.GetPort());
125 case ProxyServer::SCHEME_QUIC:
126 return std::string("QUIC ") +
127 ConstructHostPortString(proxy_server.GetHost(),
128 proxy_server.GetPort());
129 default:
130 // Got called with an invalid scheme.
131 NOTREACHED();
132 }
133 }
134
ProxyUriToProxyChain(std::string_view uri,ProxyServer::Scheme default_scheme,bool is_quic_allowed)135 ProxyChain ProxyUriToProxyChain(std::string_view uri,
136 ProxyServer::Scheme default_scheme,
137 bool is_quic_allowed) {
138 // If uri is direct, return direct proxy chain.
139 uri = HttpUtil::TrimLWS(uri);
140 size_t colon = uri.find("://");
141 if (colon != std::string_view::npos &&
142 base::EqualsCaseInsensitiveASCII(uri.substr(0, colon), "direct")) {
143 if (!uri.substr(colon + 3).empty()) {
144 return ProxyChain(); // Invalid -- Direct chain cannot have a host/port.
145 }
146 return ProxyChain::Direct();
147 }
148 return ProxyChain(
149 ProxyUriToProxyServer(uri, default_scheme, is_quic_allowed));
150 }
151
ProxyUriToProxyServer(std::string_view uri,ProxyServer::Scheme default_scheme,bool is_quic_allowed)152 ProxyServer ProxyUriToProxyServer(std::string_view uri,
153 ProxyServer::Scheme default_scheme,
154 bool is_quic_allowed) {
155 // We will default to |default_scheme| if no scheme specifier was given.
156 ProxyServer::Scheme scheme = default_scheme;
157
158 // Trim the leading/trailing whitespace.
159 uri = HttpUtil::TrimLWS(uri);
160
161 // Check for [<scheme> "://"]
162 size_t colon = uri.find(':');
163 if (colon != std::string_view::npos && uri.size() - colon >= 3 &&
164 uri[colon + 1] == '/' && uri[colon + 2] == '/') {
165 scheme = GetSchemeFromUriScheme(uri.substr(0, colon), is_quic_allowed);
166 uri = uri.substr(colon + 3); // Skip past the "://"
167 }
168
169 // Now parse the <host>[":"<port>].
170 return ProxySchemeHostAndPortToProxyServer(scheme, uri);
171 }
172
ProxyServerToProxyUri(const ProxyServer & proxy_server)173 std::string ProxyServerToProxyUri(const ProxyServer& proxy_server) {
174 switch (proxy_server.scheme()) {
175 case ProxyServer::SCHEME_HTTP:
176 // Leave off "http://" since it is our default scheme.
177 return ConstructHostPortString(proxy_server.GetHost(),
178 proxy_server.GetPort());
179 case ProxyServer::SCHEME_SOCKS4:
180 return std::string("socks4://") +
181 ConstructHostPortString(proxy_server.GetHost(),
182 proxy_server.GetPort());
183 case ProxyServer::SCHEME_SOCKS5:
184 return std::string("socks5://") +
185 ConstructHostPortString(proxy_server.GetHost(),
186 proxy_server.GetPort());
187 case ProxyServer::SCHEME_HTTPS:
188 return std::string("https://") +
189 ConstructHostPortString(proxy_server.GetHost(),
190 proxy_server.GetPort());
191 case ProxyServer::SCHEME_QUIC:
192 return std::string("quic://") +
193 ConstructHostPortString(proxy_server.GetHost(),
194 proxy_server.GetPort());
195 default:
196 // Got called with an invalid scheme.
197 NOTREACHED();
198 }
199 }
200
ProxySchemeHostAndPortToProxyServer(ProxyServer::Scheme scheme,std::string_view host_and_port)201 ProxyServer ProxySchemeHostAndPortToProxyServer(
202 ProxyServer::Scheme scheme,
203 std::string_view host_and_port) {
204 // Trim leading/trailing space.
205 host_and_port = HttpUtil::TrimLWS(host_and_port);
206
207 if (scheme == ProxyServer::SCHEME_INVALID) {
208 return ProxyServer();
209 }
210
211 url::Component username_component;
212 url::Component password_component;
213 url::Component hostname_component;
214 url::Component port_component;
215 url::ParseAuthority(host_and_port.data(),
216 url::Component(0, host_and_port.size()),
217 &username_component, &password_component,
218 &hostname_component, &port_component);
219 if (username_component.is_valid() || password_component.is_valid() ||
220 hostname_component.is_empty()) {
221 return ProxyServer();
222 }
223
224 std::string_view hostname =
225 host_and_port.substr(hostname_component.begin, hostname_component.len);
226
227 // Reject inputs like "foo:". /url parsing and canonicalization code generally
228 // allows it and treats it the same as a URL without a specified port, but
229 // Chrome has traditionally disallowed it in proxy specifications.
230 if (port_component.is_valid() && port_component.is_empty()) {
231 return ProxyServer();
232 }
233 std::string_view port =
234 port_component.is_nonempty()
235 ? host_and_port.substr(port_component.begin, port_component.len)
236 : "";
237
238 return ProxyServer::FromSchemeHostAndPort(scheme, hostname, port);
239 }
240
GetSchemeFromUriScheme(std::string_view scheme,bool is_quic_allowed)241 ProxyServer::Scheme GetSchemeFromUriScheme(std::string_view scheme,
242 bool is_quic_allowed) {
243 if (base::EqualsCaseInsensitiveASCII(scheme, "http")) {
244 return ProxyServer::SCHEME_HTTP;
245 }
246 if (base::EqualsCaseInsensitiveASCII(scheme, "socks4")) {
247 return ProxyServer::SCHEME_SOCKS4;
248 }
249 if (base::EqualsCaseInsensitiveASCII(scheme, "socks")) {
250 return ProxyServer::SCHEME_SOCKS5;
251 }
252 if (base::EqualsCaseInsensitiveASCII(scheme, "socks5")) {
253 return ProxyServer::SCHEME_SOCKS5;
254 }
255 if (base::EqualsCaseInsensitiveASCII(scheme, "https")) {
256 return ProxyServer::SCHEME_HTTPS;
257 }
258 #if BUILDFLAG(ENABLE_QUIC_PROXY_SUPPORT)
259 if (is_quic_allowed && base::EqualsCaseInsensitiveASCII(scheme, "quic")) {
260 return ProxyServer::SCHEME_QUIC;
261 }
262 #endif // BUILDFLAG(ENABLE_QUIC_PROXY_SUPPORT)
263 return ProxyServer::SCHEME_INVALID;
264 }
265
MultiProxyUrisToProxyChain(std::string_view uris,ProxyServer::Scheme default_scheme,bool is_quic_allowed)266 ProxyChain MultiProxyUrisToProxyChain(std::string_view uris,
267 ProxyServer::Scheme default_scheme,
268 bool is_quic_allowed) {
269 #if BUILDFLAG(ENABLE_BRACKETED_PROXY_URIS)
270 uris = HttpUtil::TrimLWS(uris);
271 if (uris.empty()) {
272 return ProxyChain();
273 }
274
275 bool has_multi_proxy_brackets = uris.front() == '[' && uris.back() == ']';
276 // Remove `[]` if present
277 if (has_multi_proxy_brackets) {
278 uris = HttpUtil::TrimLWS(uris.substr(1, uris.size() - 2));
279 }
280
281 std::vector<ProxyServer> proxy_server_list;
282 std::vector<std::string_view> uris_list = base::SplitStringPiece(
283 uris, " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
284 size_t number_of_proxy_uris = uris_list.size();
285 bool has_invalid_format =
286 number_of_proxy_uris > 1 && !has_multi_proxy_brackets;
287
288 // If uris list is empty or has invalid formatting for multi-proxy chains, an
289 // invalid `ProxyChain` should be returned.
290 if (uris_list.empty() || has_invalid_format) {
291 return ProxyChain();
292 }
293
294 for (const auto& uri : uris_list) {
295 // If direct is found, it MUST be the only uri in the list. Otherwise, it is
296 // an invalid `ProxyChain()`.
297 if (base::EqualsCaseInsensitiveASCII(uri, "direct://")) {
298 return number_of_proxy_uris > 1 ? ProxyChain() : ProxyChain::Direct();
299 }
300
301 proxy_server_list.push_back(
302 ProxyUriToProxyServer(uri, default_scheme, is_quic_allowed));
303 }
304
305 return ProxyChain(std::move(proxy_server_list));
306 #else
307 // This function should not be called in non-debug modes.
308 NOTREACHED();
309 #endif // !BUILDFLAG(ENABLE_BRACKETED_PROXY_URIS)
310 }
311 } // namespace net
312