• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/proxy_resolution/pac_file_fetcher_impl.h"
6 
7 #include "base/compiler_specific.h"
8 #include "base/functional/bind.h"
9 #include "base/location.h"
10 #include "base/logging.h"
11 #include "base/memory/ptr_util.h"
12 #include "base/metrics/histogram_macros.h"
13 #include "base/ranges/algorithm.h"
14 #include "base/strings/string_piece.h"
15 #include "base/strings/string_util.h"
16 #include "base/task/single_thread_task_runner.h"
17 #include "net/base/data_url.h"
18 #include "net/base/io_buffer.h"
19 #include "net/base/load_flags.h"
20 #include "net/base/net_errors.h"
21 #include "net/base/net_string_util.h"
22 #include "net/base/request_priority.h"
23 #include "net/cert/cert_status_flags.h"
24 #include "net/http/http_response_headers.h"
25 #include "net/url_request/redirect_info.h"
26 #include "net/url_request/url_request_context.h"
27 
28 // TODO(eroman):
29 //   - Support auth-prompts (http://crbug.com/77366)
30 
31 namespace net {
32 
33 namespace {
34 
35 // The maximum size (in bytes) allowed for a PAC script. Responses exceeding
36 // this will fail with ERR_FILE_TOO_BIG.
37 const int kDefaultMaxResponseBytes = 1048576;  // 1 megabyte
38 
39 // The maximum duration (in milliseconds) allowed for fetching the PAC script.
40 // Responses exceeding this will fail with ERR_TIMED_OUT.
41 //
42 // This timeout applies to both scripts fetched in the course of WPAD, as well
43 // as explicitly configured ones.
44 //
45 // If the default timeout is too high, auto-detect can stall for a long time,
46 // and if it is too low then slow loading scripts may be skipped.
47 //
48 // 30 seconds is a compromise between those competing goals. This value also
49 // appears to match Microsoft Edge (based on testing).
50 constexpr base::TimeDelta kDefaultMaxDuration = base::Seconds(30);
51 
52 // Returns true if |mime_type| is one of the known PAC mime type.
IsPacMimeType(base::StringPiece mime_type)53 constexpr bool IsPacMimeType(base::StringPiece mime_type) {
54   constexpr base::StringPiece kSupportedPacMimeTypes[] = {
55       "application/x-ns-proxy-autoconfig",
56       "application/x-javascript-config",
57   };
58   return base::ranges::any_of(kSupportedPacMimeTypes, [&](auto pac_mime_type) {
59     return base::EqualsCaseInsensitiveASCII(pac_mime_type, mime_type);
60   });
61 }
62 
63 struct BomMapping {
64   base::StringPiece prefix;
65   const char* charset;
66 };
67 
68 const BomMapping kBomMappings[] = {
69     {"\xFE\xFF", "utf-16be"},
70     {"\xFF\xFE", "utf-16le"},
71     {"\xEF\xBB\xBF", "utf-8"},
72 };
73 
74 // Converts |bytes| (which is encoded by |charset|) to UTF16, saving the resul
75 // to |*utf16|.
76 // If |charset| is empty, then we don't know what it was and guess.
ConvertResponseToUTF16(const std::string & charset,const std::string & bytes,std::u16string * utf16)77 void ConvertResponseToUTF16(const std::string& charset,
78                             const std::string& bytes,
79                             std::u16string* utf16) {
80   if (charset.empty()) {
81     // Guess the charset by looking at the BOM.
82     base::StringPiece bytes_str(bytes);
83     for (const auto& bom : kBomMappings) {
84       if (base::StartsWith(bytes_str, bom.prefix)) {
85         return ConvertResponseToUTF16(
86             bom.charset,
87             // Strip the BOM in the converted response.
88             bytes.substr(bom.prefix.size()), utf16);
89       }
90     }
91 
92     // Otherwise assume ISO-8859-1 if no charset was specified.
93     return ConvertResponseToUTF16(kCharsetLatin1, bytes, utf16);
94   }
95 
96   DCHECK(!charset.empty());
97 
98   // Be generous in the conversion -- if any characters lie outside of |charset|
99   // (i.e. invalid), then substitute them with U+FFFD rather than failing.
100   ConvertToUTF16WithSubstitutions(bytes, charset.c_str(), utf16);
101 }
102 
103 }  // namespace
104 
Create(URLRequestContext * url_request_context)105 std::unique_ptr<PacFileFetcherImpl> PacFileFetcherImpl::Create(
106     URLRequestContext* url_request_context) {
107   return base::WrapUnique(new PacFileFetcherImpl(url_request_context));
108 }
109 
~PacFileFetcherImpl()110 PacFileFetcherImpl::~PacFileFetcherImpl() {
111   // The URLRequest's destructor will cancel the outstanding request, and
112   // ensure that the delegate (this) is not called again.
113 }
114 
SetTimeoutConstraint(base::TimeDelta timeout)115 base::TimeDelta PacFileFetcherImpl::SetTimeoutConstraint(
116     base::TimeDelta timeout) {
117   base::TimeDelta prev = max_duration_;
118   max_duration_ = timeout;
119   return prev;
120 }
121 
SetSizeConstraint(size_t size_bytes)122 size_t PacFileFetcherImpl::SetSizeConstraint(size_t size_bytes) {
123   size_t prev = max_response_bytes_;
124   max_response_bytes_ = size_bytes;
125   return prev;
126 }
127 
OnResponseCompleted(URLRequest * request,int net_error)128 void PacFileFetcherImpl::OnResponseCompleted(URLRequest* request,
129                                              int net_error) {
130   DCHECK_EQ(request, cur_request_.get());
131 
132   // Use |result_code_| as the request's error if we have already set it to
133   // something specific.
134   if (result_code_ == OK && net_error != OK)
135     result_code_ = net_error;
136 
137   FetchCompleted();
138 }
139 
Fetch(const GURL & url,std::u16string * text,CompletionOnceCallback callback,const NetworkTrafficAnnotationTag traffic_annotation)140 int PacFileFetcherImpl::Fetch(
141     const GURL& url,
142     std::u16string* text,
143     CompletionOnceCallback callback,
144     const NetworkTrafficAnnotationTag traffic_annotation) {
145   // It is invalid to call Fetch() while a request is already in progress.
146   DCHECK(!cur_request_.get());
147   DCHECK(!callback.is_null());
148   DCHECK(text);
149 
150   if (!url_request_context_)
151     return ERR_CONTEXT_SHUT_DOWN;
152 
153   if (!IsUrlSchemeAllowed(url))
154     return ERR_DISALLOWED_URL_SCHEME;
155 
156   // Handle base-64 encoded data-urls that contain custom PAC scripts.
157   if (url.SchemeIs("data")) {
158     std::string mime_type;
159     std::string charset;
160     std::string data;
161     if (!DataURL::Parse(url, &mime_type, &charset, &data))
162       return ERR_FAILED;
163 
164     ConvertResponseToUTF16(charset, data, text);
165     return OK;
166   }
167 
168   DCHECK(fetch_start_time_.is_null());
169   fetch_start_time_ = base::TimeTicks::Now();
170 
171   // Use highest priority, so if socket pools are being used for other types of
172   // requests, PAC requests are aren't blocked on them.
173   cur_request_ = url_request_context_->CreateRequest(url, MAXIMUM_PRIORITY,
174                                                      this, traffic_annotation);
175 
176   cur_request_->set_isolation_info(isolation_info());
177 
178   // Make sure that the PAC script is downloaded using a direct connection,
179   // to avoid circular dependencies (fetching is a part of proxy resolution).
180   // Also disable the use of the disk cache. The cache is disabled so that if
181   // the user switches networks we don't potentially use the cached response
182   // from old network when we should in fact be re-fetching on the new network.
183   // If the PAC script is hosted on an HTTPS server we bypass revocation
184   // checking in order to avoid a circular dependency when attempting to fetch
185   // the OCSP response or CRL. We could make the revocation check go direct but
186   // the proxy might be the only way to the outside world.  IGNORE_LIMITS is
187   // used to avoid blocking proxy resolution on other network requests.
188   cur_request_->SetLoadFlags(LOAD_BYPASS_PROXY | LOAD_DISABLE_CACHE |
189                              LOAD_DISABLE_CERT_NETWORK_FETCHES |
190                              LOAD_IGNORE_LIMITS);
191 
192   // Save the caller's info for notification on completion.
193   callback_ = std::move(callback);
194   result_text_ = text;
195 
196   bytes_read_so_far_.clear();
197 
198   // Post a task to timeout this request if it takes too long.
199   cur_request_id_ = ++next_id_;
200 
201   base::SingleThreadTaskRunner::GetCurrentDefault()->PostDelayedTask(
202       FROM_HERE,
203       base::BindOnce(&PacFileFetcherImpl::OnTimeout, weak_factory_.GetWeakPtr(),
204                      cur_request_id_),
205       max_duration_);
206 
207   // Start the request.
208   cur_request_->Start();
209   return ERR_IO_PENDING;
210 }
211 
Cancel()212 void PacFileFetcherImpl::Cancel() {
213   // ResetCurRequestState will free the URLRequest, which will cause
214   // cancellation.
215   ResetCurRequestState();
216 }
217 
GetRequestContext() const218 URLRequestContext* PacFileFetcherImpl::GetRequestContext() const {
219   return url_request_context_;
220 }
221 
OnShutdown()222 void PacFileFetcherImpl::OnShutdown() {
223   url_request_context_ = nullptr;
224 
225   if (cur_request_) {
226     result_code_ = ERR_CONTEXT_SHUT_DOWN;
227     FetchCompleted();
228   }
229 }
230 
OnReceivedRedirect(URLRequest * request,const RedirectInfo & redirect_info,bool * defer_redirect)231 void PacFileFetcherImpl::OnReceivedRedirect(URLRequest* request,
232                                             const RedirectInfo& redirect_info,
233                                             bool* defer_redirect) {
234   int error = OK;
235 
236   // Redirection to file:// is never OK. Ordinarily this is handled lower in the
237   // stack (|FileProtocolHandler::IsSafeRedirectTarget|), but this is reachable
238   // when built without file:// suppport. Return the same error for consistency.
239   if (redirect_info.new_url.SchemeIsFile()) {
240     error = ERR_UNSAFE_REDIRECT;
241   } else if (!IsUrlSchemeAllowed(redirect_info.new_url)) {
242     error = ERR_DISALLOWED_URL_SCHEME;
243   }
244 
245   if (error != OK) {
246     // Fail the redirect.
247     request->CancelWithError(error);
248     OnResponseCompleted(request, error);
249   }
250 }
251 
OnAuthRequired(URLRequest * request,const AuthChallengeInfo & auth_info)252 void PacFileFetcherImpl::OnAuthRequired(URLRequest* request,
253                                         const AuthChallengeInfo& auth_info) {
254   DCHECK_EQ(request, cur_request_.get());
255   // TODO(eroman): http://crbug.com/77366
256   LOG(WARNING) << "Auth required to fetch PAC script, aborting.";
257   result_code_ = ERR_NOT_IMPLEMENTED;
258   request->CancelAuth();
259 }
260 
OnSSLCertificateError(URLRequest * request,int net_error,const SSLInfo & ssl_info,bool fatal)261 void PacFileFetcherImpl::OnSSLCertificateError(URLRequest* request,
262                                                int net_error,
263                                                const SSLInfo& ssl_info,
264                                                bool fatal) {
265   DCHECK_EQ(request, cur_request_.get());
266   LOG(WARNING) << "SSL certificate error when fetching PAC script, aborting.";
267   // Certificate errors are in same space as net errors.
268   result_code_ = net_error;
269   request->Cancel();
270 }
271 
OnResponseStarted(URLRequest * request,int net_error)272 void PacFileFetcherImpl::OnResponseStarted(URLRequest* request, int net_error) {
273   DCHECK_EQ(request, cur_request_.get());
274   DCHECK_NE(ERR_IO_PENDING, net_error);
275 
276   if (net_error != OK) {
277     OnResponseCompleted(request, net_error);
278     return;
279   }
280 
281   // Require HTTP responses to have a success status code.
282   if (request->url().SchemeIsHTTPOrHTTPS()) {
283     // NOTE about status codes: We are like Firefox 3 in this respect.
284     // {IE 7, Safari 3, Opera 9.5} do not care about the status code.
285     if (request->GetResponseCode() != 200) {
286       VLOG(1) << "Fetched PAC script had (bad) status line: "
287               << request->response_headers()->GetStatusLine();
288       result_code_ = ERR_HTTP_RESPONSE_CODE_FAILURE;
289       request->Cancel();
290       return;
291     }
292 
293     // NOTE about mime types: We do not enforce mime types on PAC files.
294     // This is for compatibility with {IE 7, Firefox 3, Opera 9.5}. We will
295     // however log mismatches to help with debugging.
296     std::string mime_type;
297     cur_request_->GetMimeType(&mime_type);
298     if (!IsPacMimeType(mime_type)) {
299       VLOG(1) << "Fetched PAC script does not have a proper mime type: "
300               << mime_type;
301     }
302   }
303 
304   ReadBody(request);
305 }
306 
OnReadCompleted(URLRequest * request,int num_bytes)307 void PacFileFetcherImpl::OnReadCompleted(URLRequest* request, int num_bytes) {
308   DCHECK_NE(ERR_IO_PENDING, num_bytes);
309 
310   DCHECK_EQ(request, cur_request_.get());
311   if (ConsumeBytesRead(request, num_bytes)) {
312     // Keep reading.
313     ReadBody(request);
314   }
315 }
316 
PacFileFetcherImpl(URLRequestContext * url_request_context)317 PacFileFetcherImpl::PacFileFetcherImpl(URLRequestContext* url_request_context)
318     : url_request_context_(url_request_context),
319       buf_(base::MakeRefCounted<IOBuffer>(kBufSize)),
320       max_response_bytes_(kDefaultMaxResponseBytes),
321       max_duration_(kDefaultMaxDuration) {
322   DCHECK(url_request_context);
323 }
324 
IsUrlSchemeAllowed(const GURL & url) const325 bool PacFileFetcherImpl::IsUrlSchemeAllowed(const GURL& url) const {
326   // Always allow http://, https://, and data:.
327   if (url.SchemeIsHTTPOrHTTPS() || url.SchemeIs("data"))
328     return true;
329 
330   // Disallow any other URL scheme.
331   return false;
332 }
333 
ReadBody(URLRequest * request)334 void PacFileFetcherImpl::ReadBody(URLRequest* request) {
335   // Read as many bytes as are available synchronously.
336   while (true) {
337     int num_bytes = request->Read(buf_.get(), kBufSize);
338     if (num_bytes == ERR_IO_PENDING)
339       return;
340 
341     if (num_bytes < 0) {
342       OnResponseCompleted(request, num_bytes);
343       return;
344     }
345 
346     if (!ConsumeBytesRead(request, num_bytes))
347       return;
348   }
349 }
350 
ConsumeBytesRead(URLRequest * request,int num_bytes)351 bool PacFileFetcherImpl::ConsumeBytesRead(URLRequest* request, int num_bytes) {
352   if (fetch_time_to_first_byte_.is_null())
353     fetch_time_to_first_byte_ = base::TimeTicks::Now();
354 
355   if (num_bytes <= 0) {
356     // Error while reading, or EOF.
357     OnResponseCompleted(request, num_bytes);
358     return false;
359   }
360 
361   // Enforce maximum size bound.
362   if (num_bytes + bytes_read_so_far_.size() >
363       static_cast<size_t>(max_response_bytes_)) {
364     result_code_ = ERR_FILE_TOO_BIG;
365     request->Cancel();
366     return false;
367   }
368 
369   bytes_read_so_far_.append(buf_->data(), num_bytes);
370   return true;
371 }
372 
FetchCompleted()373 void PacFileFetcherImpl::FetchCompleted() {
374   if (result_code_ == OK) {
375     // Calculate duration of time for PAC file fetch to complete.
376     DCHECK(!fetch_start_time_.is_null());
377     DCHECK(!fetch_time_to_first_byte_.is_null());
378     UMA_HISTOGRAM_MEDIUM_TIMES("Net.ProxyScriptFetcher.SuccessDuration",
379                                base::TimeTicks::Now() - fetch_start_time_);
380     UMA_HISTOGRAM_MEDIUM_TIMES("Net.ProxyScriptFetcher.FirstByteDuration",
381                                fetch_time_to_first_byte_ - fetch_start_time_);
382 
383     // The caller expects the response to be encoded as UTF16.
384     std::string charset;
385     cur_request_->GetCharset(&charset);
386     ConvertResponseToUTF16(charset, bytes_read_so_far_, result_text_);
387   } else {
388     // On error, the caller expects empty string for bytes.
389     result_text_->clear();
390   }
391 
392   int result_code = result_code_;
393   CompletionOnceCallback callback = std::move(callback_);
394 
395   ResetCurRequestState();
396 
397   std::move(callback).Run(result_code);
398 }
399 
ResetCurRequestState()400 void PacFileFetcherImpl::ResetCurRequestState() {
401   cur_request_.reset();
402   cur_request_id_ = 0;
403   callback_.Reset();
404   result_code_ = OK;
405   result_text_ = nullptr;
406   fetch_start_time_ = base::TimeTicks();
407   fetch_time_to_first_byte_ = base::TimeTicks();
408 }
409 
OnTimeout(int id)410 void PacFileFetcherImpl::OnTimeout(int id) {
411   // Timeout tasks may outlive the URLRequest they reference. Make sure it
412   // is still applicable.
413   if (cur_request_id_ != id)
414     return;
415 
416   DCHECK(cur_request_.get());
417   result_code_ = ERR_TIMED_OUT;
418   FetchCompleted();
419 }
420 
421 }  // namespace net
422