1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/proxy/proxy_script_fetcher_impl.h"
6
7 #include "base/compiler_specific.h"
8 #include "base/logging.h"
9 #include "base/message_loop/message_loop.h"
10 #include "base/strings/string_util.h"
11 #include "net/base/data_url.h"
12 #include "net/base/io_buffer.h"
13 #include "net/base/load_flags.h"
14 #include "net/base/net_errors.h"
15 #include "net/base/net_string_util.h"
16 #include "net/base/request_priority.h"
17 #include "net/cert/cert_status_flags.h"
18 #include "net/http/http_response_headers.h"
19 #include "net/url_request/url_request_context.h"
20
21 // TODO(eroman):
22 // - Support auth-prompts (http://crbug.com/77366)
23
24 namespace net {
25
26 namespace {
27
28 // The maximum size (in bytes) allowed for a PAC script. Responses exceeding
29 // this will fail with ERR_FILE_TOO_BIG.
30 const int kDefaultMaxResponseBytes = 1048576; // 1 megabyte
31
32 // The maximum duration (in milliseconds) allowed for fetching the PAC script.
33 // Responses exceeding this will fail with ERR_TIMED_OUT.
34 const int kDefaultMaxDurationMs = 300000; // 5 minutes
35
36 // Returns true if |mime_type| is one of the known PAC mime type.
IsPacMimeType(const std::string & mime_type)37 bool IsPacMimeType(const std::string& mime_type) {
38 static const char * const kSupportedPacMimeTypes[] = {
39 "application/x-ns-proxy-autoconfig",
40 "application/x-javascript-config",
41 };
42 for (size_t i = 0; i < arraysize(kSupportedPacMimeTypes); ++i) {
43 if (LowerCaseEqualsASCII(mime_type, kSupportedPacMimeTypes[i]))
44 return true;
45 }
46 return false;
47 }
48
49 // Converts |bytes| (which is encoded by |charset|) to UTF16, saving the resul
50 // to |*utf16|.
51 // If |charset| is empty, then we don't know what it was and guess.
ConvertResponseToUTF16(const std::string & charset,const std::string & bytes,base::string16 * utf16)52 void ConvertResponseToUTF16(const std::string& charset,
53 const std::string& bytes,
54 base::string16* utf16) {
55 const char* codepage;
56
57 if (charset.empty()) {
58 // Assume ISO-8859-1 if no charset was specified.
59 codepage = kCharsetLatin1;
60 } else {
61 // Otherwise trust the charset that was provided.
62 codepage = charset.c_str();
63 }
64
65 // Be generous in the conversion -- if any characters lie outside of |charset|
66 // (i.e. invalid), then substitute them with U+FFFD rather than failing.
67 ConvertToUTF16WithSubstitutions(bytes, codepage, utf16);
68 }
69
70 } // namespace
71
ProxyScriptFetcherImpl(URLRequestContext * url_request_context)72 ProxyScriptFetcherImpl::ProxyScriptFetcherImpl(
73 URLRequestContext* url_request_context)
74 : url_request_context_(url_request_context),
75 buf_(new IOBuffer(kBufSize)),
76 next_id_(0),
77 cur_request_id_(0),
78 result_code_(OK),
79 result_text_(NULL),
80 max_response_bytes_(kDefaultMaxResponseBytes),
81 max_duration_(base::TimeDelta::FromMilliseconds(kDefaultMaxDurationMs)),
82 weak_factory_(this) {
83 DCHECK(url_request_context);
84 }
85
~ProxyScriptFetcherImpl()86 ProxyScriptFetcherImpl::~ProxyScriptFetcherImpl() {
87 // The URLRequest's destructor will cancel the outstanding request, and
88 // ensure that the delegate (this) is not called again.
89 }
90
SetTimeoutConstraint(base::TimeDelta timeout)91 base::TimeDelta ProxyScriptFetcherImpl::SetTimeoutConstraint(
92 base::TimeDelta timeout) {
93 base::TimeDelta prev = max_duration_;
94 max_duration_ = timeout;
95 return prev;
96 }
97
SetSizeConstraint(size_t size_bytes)98 size_t ProxyScriptFetcherImpl::SetSizeConstraint(size_t size_bytes) {
99 size_t prev = max_response_bytes_;
100 max_response_bytes_ = size_bytes;
101 return prev;
102 }
103
OnResponseCompleted(URLRequest * request)104 void ProxyScriptFetcherImpl::OnResponseCompleted(URLRequest* request) {
105 DCHECK_EQ(request, cur_request_.get());
106
107 // Use |result_code_| as the request's error if we have already set it to
108 // something specific.
109 if (result_code_ == OK && !request->status().is_success())
110 result_code_ = request->status().error();
111
112 FetchCompleted();
113 }
114
Fetch(const GURL & url,base::string16 * text,const CompletionCallback & callback)115 int ProxyScriptFetcherImpl::Fetch(
116 const GURL& url, base::string16* text, const CompletionCallback& callback) {
117 // It is invalid to call Fetch() while a request is already in progress.
118 DCHECK(!cur_request_.get());
119 DCHECK(!callback.is_null());
120 DCHECK(text);
121
122 // Handle base-64 encoded data-urls that contain custom PAC scripts.
123 if (url.SchemeIs("data")) {
124 std::string mime_type;
125 std::string charset;
126 std::string data;
127 if (!DataURL::Parse(url, &mime_type, &charset, &data))
128 return ERR_FAILED;
129
130 ConvertResponseToUTF16(charset, data, text);
131 return OK;
132 }
133
134 cur_request_ =
135 url_request_context_->CreateRequest(url, DEFAULT_PRIORITY, this, NULL);
136 cur_request_->set_method("GET");
137
138 // Make sure that the PAC script is downloaded using a direct connection,
139 // to avoid circular dependencies (fetching is a part of proxy resolution).
140 // Also disable the use of the disk cache. The cache is disabled so that if
141 // the user switches networks we don't potentially use the cached response
142 // from old network when we should in fact be re-fetching on the new network.
143 // If the PAC script is hosted on an HTTPS server we bypass revocation
144 // checking in order to avoid a circular dependency when attempting to fetch
145 // the OCSP response or CRL. We could make the revocation check go direct but
146 // the proxy might be the only way to the outside world.
147 cur_request_->SetLoadFlags(LOAD_BYPASS_PROXY | LOAD_DISABLE_CACHE |
148 LOAD_DISABLE_CERT_REVOCATION_CHECKING);
149
150 // Save the caller's info for notification on completion.
151 callback_ = callback;
152 result_text_ = text;
153
154 bytes_read_so_far_.clear();
155
156 // Post a task to timeout this request if it takes too long.
157 cur_request_id_ = ++next_id_;
158 base::MessageLoop::current()->PostDelayedTask(
159 FROM_HERE,
160 base::Bind(&ProxyScriptFetcherImpl::OnTimeout,
161 weak_factory_.GetWeakPtr(),
162 cur_request_id_),
163 max_duration_);
164
165 // Start the request.
166 cur_request_->Start();
167 return ERR_IO_PENDING;
168 }
169
Cancel()170 void ProxyScriptFetcherImpl::Cancel() {
171 // ResetCurRequestState will free the URLRequest, which will cause
172 // cancellation.
173 ResetCurRequestState();
174 }
175
GetRequestContext() const176 URLRequestContext* ProxyScriptFetcherImpl::GetRequestContext() const {
177 return url_request_context_;
178 }
179
OnAuthRequired(URLRequest * request,AuthChallengeInfo * auth_info)180 void ProxyScriptFetcherImpl::OnAuthRequired(URLRequest* request,
181 AuthChallengeInfo* auth_info) {
182 DCHECK_EQ(request, cur_request_.get());
183 // TODO(eroman): http://crbug.com/77366
184 LOG(WARNING) << "Auth required to fetch PAC script, aborting.";
185 result_code_ = ERR_NOT_IMPLEMENTED;
186 request->CancelAuth();
187 }
188
OnSSLCertificateError(URLRequest * request,const SSLInfo & ssl_info,bool fatal)189 void ProxyScriptFetcherImpl::OnSSLCertificateError(URLRequest* request,
190 const SSLInfo& ssl_info,
191 bool fatal) {
192 DCHECK_EQ(request, cur_request_.get());
193 // Revocation check failures are not fatal.
194 if (IsCertStatusMinorError(ssl_info.cert_status)) {
195 request->ContinueDespiteLastError();
196 return;
197 }
198 LOG(WARNING) << "SSL certificate error when fetching PAC script, aborting.";
199 // Certificate errors are in same space as net errors.
200 result_code_ = MapCertStatusToNetError(ssl_info.cert_status);
201 request->Cancel();
202 }
203
OnResponseStarted(URLRequest * request)204 void ProxyScriptFetcherImpl::OnResponseStarted(URLRequest* request) {
205 DCHECK_EQ(request, cur_request_.get());
206
207 if (!request->status().is_success()) {
208 OnResponseCompleted(request);
209 return;
210 }
211
212 // Require HTTP responses to have a success status code.
213 if (request->url().SchemeIsHTTPOrHTTPS()) {
214 // NOTE about status codes: We are like Firefox 3 in this respect.
215 // {IE 7, Safari 3, Opera 9.5} do not care about the status code.
216 if (request->GetResponseCode() != 200) {
217 VLOG(1) << "Fetched PAC script had (bad) status line: "
218 << request->response_headers()->GetStatusLine();
219 result_code_ = ERR_PAC_STATUS_NOT_OK;
220 request->Cancel();
221 return;
222 }
223
224 // NOTE about mime types: We do not enforce mime types on PAC files.
225 // This is for compatibility with {IE 7, Firefox 3, Opera 9.5}. We will
226 // however log mismatches to help with debugging.
227 std::string mime_type;
228 cur_request_->GetMimeType(&mime_type);
229 if (!IsPacMimeType(mime_type)) {
230 VLOG(1) << "Fetched PAC script does not have a proper mime type: "
231 << mime_type;
232 }
233 }
234
235 ReadBody(request);
236 }
237
OnReadCompleted(URLRequest * request,int num_bytes)238 void ProxyScriptFetcherImpl::OnReadCompleted(URLRequest* request,
239 int num_bytes) {
240 DCHECK_EQ(request, cur_request_.get());
241 if (ConsumeBytesRead(request, num_bytes)) {
242 // Keep reading.
243 ReadBody(request);
244 }
245 }
246
ReadBody(URLRequest * request)247 void ProxyScriptFetcherImpl::ReadBody(URLRequest* request) {
248 // Read as many bytes as are available synchronously.
249 while (true) {
250 int num_bytes;
251 if (!request->Read(buf_.get(), kBufSize, &num_bytes)) {
252 // Check whether the read failed synchronously.
253 if (!request->status().is_io_pending())
254 OnResponseCompleted(request);
255 return;
256 }
257 if (!ConsumeBytesRead(request, num_bytes))
258 return;
259 }
260 }
261
ConsumeBytesRead(URLRequest * request,int num_bytes)262 bool ProxyScriptFetcherImpl::ConsumeBytesRead(URLRequest* request,
263 int num_bytes) {
264 if (num_bytes <= 0) {
265 // Error while reading, or EOF.
266 OnResponseCompleted(request);
267 return false;
268 }
269
270 // Enforce maximum size bound.
271 if (num_bytes + bytes_read_so_far_.size() >
272 static_cast<size_t>(max_response_bytes_)) {
273 result_code_ = ERR_FILE_TOO_BIG;
274 request->Cancel();
275 return false;
276 }
277
278 bytes_read_so_far_.append(buf_->data(), num_bytes);
279 return true;
280 }
281
FetchCompleted()282 void ProxyScriptFetcherImpl::FetchCompleted() {
283 if (result_code_ == OK) {
284 // The caller expects the response to be encoded as UTF16.
285 std::string charset;
286 cur_request_->GetCharset(&charset);
287 ConvertResponseToUTF16(charset, bytes_read_so_far_, result_text_);
288 } else {
289 // On error, the caller expects empty string for bytes.
290 result_text_->clear();
291 }
292
293 int result_code = result_code_;
294 CompletionCallback callback = callback_;
295
296 ResetCurRequestState();
297
298 callback.Run(result_code);
299 }
300
ResetCurRequestState()301 void ProxyScriptFetcherImpl::ResetCurRequestState() {
302 cur_request_.reset();
303 cur_request_id_ = 0;
304 callback_.Reset();
305 result_code_ = OK;
306 result_text_ = NULL;
307 }
308
OnTimeout(int id)309 void ProxyScriptFetcherImpl::OnTimeout(int id) {
310 // Timeout tasks may outlive the URLRequest they reference. Make sure it
311 // is still applicable.
312 if (cur_request_id_ != id)
313 return;
314
315 DCHECK(cur_request_.get());
316 result_code_ = ERR_TIMED_OUT;
317 cur_request_->Cancel();
318 }
319
320 } // namespace net
321