• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/socket/tcp_client_socket_win.h"
6 
7 #include <mstcpip.h>
8 
9 #include "base/basictypes.h"
10 #include "base/compiler_specific.h"
11 #include "base/memory/memory_debug.h"
12 #include "base/metrics/stats_counters.h"
13 #include "base/string_util.h"
14 #include "base/sys_info.h"
15 #include "base/win/object_watcher.h"
16 #include "net/base/address_list_net_log_param.h"
17 #include "net/base/connection_type_histograms.h"
18 #include "net/base/io_buffer.h"
19 #include "net/base/ip_endpoint.h"
20 #include "net/base/net_errors.h"
21 #include "net/base/net_log.h"
22 #include "net/base/net_util.h"
23 #include "net/base/network_change_notifier.h"
24 #include "net/base/sys_addrinfo.h"
25 #include "net/base/winsock_init.h"
26 #include "net/base/winsock_util.h"
27 
28 namespace net {
29 
30 namespace {
31 
MapConnectError(int os_error)32 int MapConnectError(int os_error) {
33   switch (os_error) {
34     // connect fails with WSAEACCES when Windows Firewall blocks the
35     // connection.
36     case WSAEACCES:
37       return ERR_NETWORK_ACCESS_DENIED;
38     case WSAETIMEDOUT:
39       return ERR_CONNECTION_TIMED_OUT;
40     default: {
41       int net_error = MapSystemError(os_error);
42       if (net_error == ERR_FAILED)
43         return ERR_CONNECTION_FAILED;  // More specific than ERR_FAILED.
44 
45       // Give a more specific error when the user is offline.
46       if (net_error == ERR_ADDRESS_UNREACHABLE &&
47           NetworkChangeNotifier::IsOffline()) {
48         return ERR_INTERNET_DISCONNECTED;
49       }
50 
51       return net_error;
52     }
53   }
54 }
55 
56 }  // namespace
57 
58 //-----------------------------------------------------------------------------
59 
60 // This class encapsulates all the state that has to be preserved as long as
61 // there is a network IO operation in progress. If the owner TCPClientSocketWin
62 // is destroyed while an operation is in progress, the Core is detached and it
63 // lives until the operation completes and the OS doesn't reference any resource
64 // declared on this class anymore.
65 class TCPClientSocketWin::Core : public base::RefCounted<Core> {
66  public:
67   explicit Core(TCPClientSocketWin* socket);
68 
69   // Start watching for the end of a read or write operation.
70   void WatchForRead();
71   void WatchForWrite();
72 
73   // The TCPClientSocketWin is going away.
Detach()74   void Detach() { socket_ = NULL; }
75 
76   // The separate OVERLAPPED variables for asynchronous operation.
77   // |read_overlapped_| is used for both Connect() and Read().
78   // |write_overlapped_| is only used for Write();
79   OVERLAPPED read_overlapped_;
80   OVERLAPPED write_overlapped_;
81 
82   // The buffers used in Read() and Write().
83   WSABUF read_buffer_;
84   WSABUF write_buffer_;
85   scoped_refptr<IOBuffer> read_iobuffer_;
86   scoped_refptr<IOBuffer> write_iobuffer_;
87   int write_buffer_length_;
88 
89   // Throttle the read size based on our current slow start state.
90   // Returns the throttled read size.
ThrottleReadSize(int size)91   int ThrottleReadSize(int size) {
92     if (slow_start_throttle_ < kMaxSlowStartThrottle) {
93       size = std::min(size, slow_start_throttle_);
94       slow_start_throttle_ *= 2;
95     }
96     return size;
97   }
98 
99  private:
100   friend class base::RefCounted<Core>;
101 
102   class ReadDelegate : public base::win::ObjectWatcher::Delegate {
103    public:
ReadDelegate(Core * core)104     explicit ReadDelegate(Core* core) : core_(core) {}
~ReadDelegate()105     virtual ~ReadDelegate() {}
106 
107     // base::ObjectWatcher::Delegate methods:
108     virtual void OnObjectSignaled(HANDLE object);
109 
110    private:
111     Core* const core_;
112   };
113 
114   class WriteDelegate : public base::win::ObjectWatcher::Delegate {
115    public:
WriteDelegate(Core * core)116     explicit WriteDelegate(Core* core) : core_(core) {}
~WriteDelegate()117     virtual ~WriteDelegate() {}
118 
119     // base::ObjectWatcher::Delegate methods:
120     virtual void OnObjectSignaled(HANDLE object);
121 
122    private:
123     Core* const core_;
124   };
125 
126   ~Core();
127 
128   // The socket that created this object.
129   TCPClientSocketWin* socket_;
130 
131   // |reader_| handles the signals from |read_watcher_|.
132   ReadDelegate reader_;
133   // |writer_| handles the signals from |write_watcher_|.
134   WriteDelegate writer_;
135 
136   // |read_watcher_| watches for events from Connect() and Read().
137   base::win::ObjectWatcher read_watcher_;
138   // |write_watcher_| watches for events from Write();
139   base::win::ObjectWatcher write_watcher_;
140 
141   // When doing reads from the socket, we try to mirror TCP's slow start.
142   // We do this because otherwise the async IO subsystem artifically delays
143   // returning data to the application.
144   static const int kInitialSlowStartThrottle = 1 * 1024;
145   static const int kMaxSlowStartThrottle = 32 * kInitialSlowStartThrottle;
146   int slow_start_throttle_;
147 
148   DISALLOW_COPY_AND_ASSIGN(Core);
149 };
150 
Core(TCPClientSocketWin * socket)151 TCPClientSocketWin::Core::Core(
152     TCPClientSocketWin* socket)
153     : write_buffer_length_(0),
154       socket_(socket),
155       ALLOW_THIS_IN_INITIALIZER_LIST(reader_(this)),
156       ALLOW_THIS_IN_INITIALIZER_LIST(writer_(this)),
157       slow_start_throttle_(kInitialSlowStartThrottle) {
158   memset(&read_overlapped_, 0, sizeof(read_overlapped_));
159   memset(&write_overlapped_, 0, sizeof(write_overlapped_));
160 }
161 
~Core()162 TCPClientSocketWin::Core::~Core() {
163   // Make sure the message loop is not watching this object anymore.
164   read_watcher_.StopWatching();
165   write_watcher_.StopWatching();
166 
167   WSACloseEvent(read_overlapped_.hEvent);
168   memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_));
169   WSACloseEvent(write_overlapped_.hEvent);
170   memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_));
171 }
172 
WatchForRead()173 void TCPClientSocketWin::Core::WatchForRead() {
174   // We grab an extra reference because there is an IO operation in progress.
175   // Balanced in ReadDelegate::OnObjectSignaled().
176   AddRef();
177   read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_);
178 }
179 
WatchForWrite()180 void TCPClientSocketWin::Core::WatchForWrite() {
181   // We grab an extra reference because there is an IO operation in progress.
182   // Balanced in WriteDelegate::OnObjectSignaled().
183   AddRef();
184   write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_);
185 }
186 
OnObjectSignaled(HANDLE object)187 void TCPClientSocketWin::Core::ReadDelegate::OnObjectSignaled(
188     HANDLE object) {
189   DCHECK_EQ(object, core_->read_overlapped_.hEvent);
190   if (core_->socket_) {
191     if (core_->socket_->waiting_connect()) {
192       core_->socket_->DidCompleteConnect();
193     } else {
194       core_->socket_->DidCompleteRead();
195     }
196   }
197 
198   core_->Release();
199 }
200 
OnObjectSignaled(HANDLE object)201 void TCPClientSocketWin::Core::WriteDelegate::OnObjectSignaled(
202     HANDLE object) {
203   DCHECK_EQ(object, core_->write_overlapped_.hEvent);
204   if (core_->socket_)
205     core_->socket_->DidCompleteWrite();
206 
207   core_->Release();
208 }
209 
210 //-----------------------------------------------------------------------------
211 
TCPClientSocketWin(const AddressList & addresses,net::NetLog * net_log,const net::NetLog::Source & source)212 TCPClientSocketWin::TCPClientSocketWin(const AddressList& addresses,
213                                        net::NetLog* net_log,
214                                        const net::NetLog::Source& source)
215     : socket_(INVALID_SOCKET),
216       addresses_(addresses),
217       current_ai_(NULL),
218       waiting_read_(false),
219       waiting_write_(false),
220       read_callback_(NULL),
221       write_callback_(NULL),
222       next_connect_state_(CONNECT_STATE_NONE),
223       connect_os_error_(0),
224       net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)),
225       previously_disconnected_(false) {
226   scoped_refptr<NetLog::EventParameters> params;
227   if (source.is_valid())
228     params = new NetLogSourceParameter("source_dependency", source);
229   net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE, params);
230   EnsureWinsockInit();
231 }
232 
~TCPClientSocketWin()233 TCPClientSocketWin::~TCPClientSocketWin() {
234   Disconnect();
235   net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE, NULL);
236 }
237 
AdoptSocket(SOCKET socket)238 void TCPClientSocketWin::AdoptSocket(SOCKET socket) {
239   DCHECK_EQ(socket_, INVALID_SOCKET);
240   socket_ = socket;
241   int error = SetupSocket();
242   DCHECK_EQ(0, error);
243   core_ = new Core(this);
244   current_ai_ = addresses_.head();
245   use_history_.set_was_ever_connected();
246 }
247 
248 #ifdef ANDROID
249 // TODO(kristianm): handle the case when wait_for_connect is true
250 // (sync requests)
251 #endif
Connect(CompletionCallback * callback,bool wait_for_connect)252 int TCPClientSocketWin::Connect(CompletionCallback* callback
253 #ifdef ANDROID
254                                 , bool wait_for_connect
255 #endif
256                                ) {
257   DCHECK(CalledOnValidThread());
258 
259   // If already connected, then just return OK.
260   if (socket_ != INVALID_SOCKET)
261     return OK;
262 
263   base::StatsCounter connects("tcp.connect");
264   connects.Increment();
265 
266   net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
267                       new AddressListNetLogParam(addresses_));
268 
269   // We will try to connect to each address in addresses_. Start with the
270   // first one in the list.
271   next_connect_state_ = CONNECT_STATE_CONNECT;
272   current_ai_ = addresses_.head();
273 
274   int rv = DoConnectLoop(OK);
275   if (rv == ERR_IO_PENDING) {
276     // Synchronous operation not supported.
277     DCHECK(callback);
278     read_callback_ = callback;
279   } else {
280     LogConnectCompletion(rv);
281   }
282 
283   return rv;
284 }
285 
DoConnectLoop(int result)286 int TCPClientSocketWin::DoConnectLoop(int result) {
287   DCHECK_NE(next_connect_state_, CONNECT_STATE_NONE);
288 
289   int rv = result;
290   do {
291     ConnectState state = next_connect_state_;
292     next_connect_state_ = CONNECT_STATE_NONE;
293     switch (state) {
294       case CONNECT_STATE_CONNECT:
295         DCHECK_EQ(OK, rv);
296         rv = DoConnect();
297         break;
298       case CONNECT_STATE_CONNECT_COMPLETE:
299         rv = DoConnectComplete(rv);
300         break;
301       default:
302         LOG(DFATAL) << "bad state " << state;
303         rv = ERR_UNEXPECTED;
304         break;
305     }
306   } while (rv != ERR_IO_PENDING && next_connect_state_ != CONNECT_STATE_NONE);
307 
308   return rv;
309 }
310 
DoConnect()311 int TCPClientSocketWin::DoConnect() {
312   const struct addrinfo* ai = current_ai_;
313   DCHECK(ai);
314   DCHECK_EQ(0, connect_os_error_);
315 
316   if (previously_disconnected_) {
317     use_history_.Reset();
318     previously_disconnected_ = false;
319   }
320 
321   net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
322                       new NetLogStringParameter(
323                           "address", NetAddressToStringWithPort(current_ai_)));
324 
325   next_connect_state_ = CONNECT_STATE_CONNECT_COMPLETE;
326 
327   connect_os_error_ = CreateSocket(ai);
328   if (connect_os_error_ != 0)
329     return MapSystemError(connect_os_error_);
330 
331   DCHECK(!core_);
332   core_ = new Core(this);
333 
334   // WSACreateEvent creates a manual-reset event object.
335   core_->read_overlapped_.hEvent = WSACreateEvent();
336   // WSAEventSelect sets the socket to non-blocking mode as a side effect.
337   // Our connect() and recv() calls require that the socket be non-blocking.
338   WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT);
339 
340   core_->write_overlapped_.hEvent = WSACreateEvent();
341 
342   if (!connect(socket_, ai->ai_addr, static_cast<int>(ai->ai_addrlen))) {
343     // Connected without waiting!
344     //
345     // The MSDN page for connect says:
346     //   With a nonblocking socket, the connection attempt cannot be completed
347     //   immediately. In this case, connect will return SOCKET_ERROR, and
348     //   WSAGetLastError will return WSAEWOULDBLOCK.
349     // which implies that for a nonblocking socket, connect never returns 0.
350     // It's not documented whether the event object will be signaled or not
351     // if connect does return 0.  So the code below is essentially dead code
352     // and we don't know if it's correct.
353     NOTREACHED();
354 
355     if (ResetEventIfSignaled(core_->read_overlapped_.hEvent))
356       return OK;
357   } else {
358     int os_error = WSAGetLastError();
359     if (os_error != WSAEWOULDBLOCK) {
360       LOG(ERROR) << "connect failed: " << os_error;
361       connect_os_error_ = os_error;
362       return MapConnectError(os_error);
363     }
364   }
365 
366   core_->WatchForRead();
367   return ERR_IO_PENDING;
368 }
369 
DoConnectComplete(int result)370 int TCPClientSocketWin::DoConnectComplete(int result) {
371   // Log the end of this attempt (and any OS error it threw).
372   int os_error = connect_os_error_;
373   connect_os_error_ = 0;
374   scoped_refptr<NetLog::EventParameters> params;
375   if (result != OK)
376     params = new NetLogIntegerParameter("os_error", os_error);
377   net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, params);
378 
379   if (result == OK) {
380     use_history_.set_was_ever_connected();
381     return OK;  // Done!
382   }
383 
384   // Close whatever partially connected socket we currently have.
385   DoDisconnect();
386 
387   // Try to fall back to the next address in the list.
388   if (current_ai_->ai_next) {
389     next_connect_state_ = CONNECT_STATE_CONNECT;
390     current_ai_ = current_ai_->ai_next;
391     return OK;
392   }
393 
394   // Otherwise there is nothing to fall back to, so give up.
395   return result;
396 }
397 
Disconnect()398 void TCPClientSocketWin::Disconnect() {
399   DoDisconnect();
400   current_ai_ = NULL;
401 }
402 
DoDisconnect()403 void TCPClientSocketWin::DoDisconnect() {
404   DCHECK(CalledOnValidThread());
405 
406   if (socket_ == INVALID_SOCKET)
407     return;
408 
409   // Note: don't use CancelIo to cancel pending IO because it doesn't work
410   // when there is a Winsock layered service provider.
411 
412   // In most socket implementations, closing a socket results in a graceful
413   // connection shutdown, but in Winsock we have to call shutdown explicitly.
414   // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure"
415   // at http://msdn.microsoft.com/en-us/library/ms738547.aspx
416   shutdown(socket_, SD_SEND);
417 
418   // This cancels any pending IO.
419   closesocket(socket_);
420   socket_ = INVALID_SOCKET;
421 
422   if (waiting_connect()) {
423     // We closed the socket, so this notification will never come.
424     // From MSDN' WSAEventSelect documentation:
425     // "Closing a socket with closesocket also cancels the association and
426     // selection of network events specified in WSAEventSelect for the socket".
427     core_->Release();
428   }
429 
430   waiting_read_ = false;
431   waiting_write_ = false;
432 
433   core_->Detach();
434   core_ = NULL;
435 
436   previously_disconnected_ = true;
437 }
438 
IsConnected() const439 bool TCPClientSocketWin::IsConnected() const {
440   DCHECK(CalledOnValidThread());
441 
442   if (socket_ == INVALID_SOCKET || waiting_connect())
443     return false;
444 
445   // Check if connection is alive.
446   char c;
447   int rv = recv(socket_, &c, 1, MSG_PEEK);
448   if (rv == 0)
449     return false;
450   if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK)
451     return false;
452 
453   return true;
454 }
455 
IsConnectedAndIdle() const456 bool TCPClientSocketWin::IsConnectedAndIdle() const {
457   DCHECK(CalledOnValidThread());
458 
459   if (socket_ == INVALID_SOCKET || waiting_connect())
460     return false;
461 
462   // Check if connection is alive and we haven't received any data
463   // unexpectedly.
464   char c;
465   int rv = recv(socket_, &c, 1, MSG_PEEK);
466   if (rv >= 0)
467     return false;
468   if (WSAGetLastError() != WSAEWOULDBLOCK)
469     return false;
470 
471   return true;
472 }
473 
GetPeerAddress(AddressList * address) const474 int TCPClientSocketWin::GetPeerAddress(AddressList* address) const {
475   DCHECK(CalledOnValidThread());
476   DCHECK(address);
477   if (!IsConnected())
478     return ERR_SOCKET_NOT_CONNECTED;
479   address->Copy(current_ai_, false);
480   return OK;
481 }
482 
GetLocalAddress(IPEndPoint * address) const483 int TCPClientSocketWin::GetLocalAddress(IPEndPoint* address) const {
484   DCHECK(CalledOnValidThread());
485   DCHECK(address);
486   if (!IsConnected())
487     return ERR_SOCKET_NOT_CONNECTED;
488 
489   struct sockaddr_storage addr_storage;
490   socklen_t addr_len = sizeof(addr_storage);
491   struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
492   if (getsockname(socket_, addr, &addr_len))
493     return MapSystemError(WSAGetLastError());
494   if (!address->FromSockAddr(addr, addr_len))
495     return ERR_FAILED;
496   return OK;
497 }
498 
SetSubresourceSpeculation()499 void TCPClientSocketWin::SetSubresourceSpeculation() {
500   use_history_.set_subresource_speculation();
501 }
502 
SetOmniboxSpeculation()503 void TCPClientSocketWin::SetOmniboxSpeculation() {
504   use_history_.set_omnibox_speculation();
505 }
506 
WasEverUsed() const507 bool TCPClientSocketWin::WasEverUsed() const {
508   return use_history_.was_used_to_convey_data();
509 }
510 
UsingTCPFastOpen() const511 bool TCPClientSocketWin::UsingTCPFastOpen() const {
512   // Not supported on windows.
513   return false;
514 }
515 
Read(IOBuffer * buf,int buf_len,CompletionCallback * callback)516 int TCPClientSocketWin::Read(IOBuffer* buf,
517                              int buf_len,
518                              CompletionCallback* callback) {
519   DCHECK(CalledOnValidThread());
520   DCHECK_NE(socket_, INVALID_SOCKET);
521   DCHECK(!waiting_read_);
522   DCHECK(!read_callback_);
523   DCHECK(!core_->read_iobuffer_);
524 
525   buf_len = core_->ThrottleReadSize(buf_len);
526 
527   core_->read_buffer_.len = buf_len;
528   core_->read_buffer_.buf = buf->data();
529 
530   // TODO(wtc): Remove the assertion after enough testing.
531   AssertEventNotSignaled(core_->read_overlapped_.hEvent);
532   DWORD num, flags = 0;
533   int rv = WSARecv(socket_, &core_->read_buffer_, 1, &num, &flags,
534                    &core_->read_overlapped_, NULL);
535   if (rv == 0) {
536     if (ResetEventIfSignaled(core_->read_overlapped_.hEvent)) {
537       // Because of how WSARecv fills memory when used asynchronously, Purify
538       // isn't able to detect that it's been initialized, so it scans for 0xcd
539       // in the buffer and reports UMRs (uninitialized memory reads) for those
540       // individual bytes. We override that in PURIFY builds to avoid the
541       // false error reports.
542       // See bug 5297.
543       base::MemoryDebug::MarkAsInitialized(core_->read_buffer_.buf, num);
544       base::StatsCounter read_bytes("tcp.read_bytes");
545       read_bytes.Add(num);
546       if (num > 0)
547         use_history_.set_was_used_to_convey_data();
548       LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_RECEIVED, num,
549                       core_->read_buffer_.buf);
550       return static_cast<int>(num);
551     }
552   } else {
553     int os_error = WSAGetLastError();
554     if (os_error != WSA_IO_PENDING)
555       return MapSystemError(os_error);
556   }
557   core_->WatchForRead();
558   waiting_read_ = true;
559   read_callback_ = callback;
560   core_->read_iobuffer_ = buf;
561   return ERR_IO_PENDING;
562 }
563 
Write(IOBuffer * buf,int buf_len,CompletionCallback * callback)564 int TCPClientSocketWin::Write(IOBuffer* buf,
565                               int buf_len,
566                               CompletionCallback* callback) {
567   DCHECK(CalledOnValidThread());
568   DCHECK_NE(socket_, INVALID_SOCKET);
569   DCHECK(!waiting_write_);
570   DCHECK(!write_callback_);
571   DCHECK_GT(buf_len, 0);
572   DCHECK(!core_->write_iobuffer_);
573 
574   base::StatsCounter writes("tcp.writes");
575   writes.Increment();
576 
577   core_->write_buffer_.len = buf_len;
578   core_->write_buffer_.buf = buf->data();
579   core_->write_buffer_length_ = buf_len;
580 
581   // TODO(wtc): Remove the assertion after enough testing.
582   AssertEventNotSignaled(core_->write_overlapped_.hEvent);
583   DWORD num;
584   int rv = WSASend(socket_, &core_->write_buffer_, 1, &num, 0,
585                    &core_->write_overlapped_, NULL);
586   if (rv == 0) {
587     if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) {
588       rv = static_cast<int>(num);
589       if (rv > buf_len || rv < 0) {
590         // It seems that some winsock interceptors report that more was written
591         // than was available. Treat this as an error.  http://crbug.com/27870
592         LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len
593                    << " bytes, but " << rv << " bytes reported.";
594         return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
595       }
596       base::StatsCounter write_bytes("tcp.write_bytes");
597       write_bytes.Add(rv);
598       if (rv > 0)
599         use_history_.set_was_used_to_convey_data();
600       LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_SENT, rv,
601                       core_->write_buffer_.buf);
602       return rv;
603     }
604   } else {
605     int os_error = WSAGetLastError();
606     if (os_error != WSA_IO_PENDING)
607       return MapSystemError(os_error);
608   }
609   core_->WatchForWrite();
610   waiting_write_ = true;
611   write_callback_ = callback;
612   core_->write_iobuffer_ = buf;
613   return ERR_IO_PENDING;
614 }
615 
SetReceiveBufferSize(int32 size)616 bool TCPClientSocketWin::SetReceiveBufferSize(int32 size) {
617   DCHECK(CalledOnValidThread());
618   int rv = setsockopt(socket_, SOL_SOCKET, SO_RCVBUF,
619                       reinterpret_cast<const char*>(&size), sizeof(size));
620   DCHECK(!rv) << "Could not set socket receive buffer size: " << GetLastError();
621   return rv == 0;
622 }
623 
SetSendBufferSize(int32 size)624 bool TCPClientSocketWin::SetSendBufferSize(int32 size) {
625   DCHECK(CalledOnValidThread());
626   int rv = setsockopt(socket_, SOL_SOCKET, SO_SNDBUF,
627                       reinterpret_cast<const char*>(&size), sizeof(size));
628   DCHECK(!rv) << "Could not set socket send buffer size: " << GetLastError();
629   return rv == 0;
630 }
631 
CreateSocket(const struct addrinfo * ai)632 int TCPClientSocketWin::CreateSocket(const struct addrinfo* ai) {
633   socket_ = WSASocket(ai->ai_family, ai->ai_socktype, ai->ai_protocol, NULL, 0,
634                       WSA_FLAG_OVERLAPPED);
635   if (socket_ == INVALID_SOCKET) {
636     int os_error = WSAGetLastError();
637     LOG(ERROR) << "WSASocket failed: " << os_error;
638     return os_error;
639   }
640   return SetupSocket();
641 }
642 
SetupSocket()643 int TCPClientSocketWin::SetupSocket() {
644   // Increase the socket buffer sizes from the default sizes for WinXP.  In
645   // performance testing, there is substantial benefit by increasing from 8KB
646   // to 64KB.
647   // See also:
648   //    http://support.microsoft.com/kb/823764/EN-US
649   // On Vista, if we manually set these sizes, Vista turns off its receive
650   // window auto-tuning feature.
651   //    http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx
652   // Since Vista's auto-tune is better than any static value we can could set,
653   // only change these on pre-vista machines.
654   int32 major_version, minor_version, fix_version;
655   base::SysInfo::OperatingSystemVersionNumbers(&major_version, &minor_version,
656     &fix_version);
657   if (major_version < 6) {
658     const int32 kSocketBufferSize = 64 * 1024;
659     SetReceiveBufferSize(kSocketBufferSize);
660     SetSendBufferSize(kSocketBufferSize);
661   }
662 
663   // Disable Nagle.
664   // The Nagle implementation on windows is governed by RFC 896.  The idea
665   // behind Nagle is to reduce small packets on the network.  When Nagle is
666   // enabled, if a partial packet has been sent, the TCP stack will disallow
667   // further *partial* packets until an ACK has been received from the other
668   // side.  Good applications should always strive to send as much data as
669   // possible and avoid partial-packet sends.  However, in most real world
670   // applications, there are edge cases where this does not happen, and two
671   // partil packets may be sent back to back.  For a browser, it is NEVER
672   // a benefit to delay for an RTT before the second packet is sent.
673   //
674   // As a practical example in Chromium today, consider the case of a small
675   // POST.  I have verified this:
676   //     Client writes 649 bytes of header  (partial packet #1)
677   //     Client writes 50 bytes of POST data (partial packet #2)
678   // In the above example, with Nagle, a RTT delay is inserted between these
679   // two sends due to nagle.  RTTs can easily be 100ms or more.  The best
680   // fix is to make sure that for POSTing data, we write as much data as
681   // possible and minimize partial packets.  We will fix that.  But disabling
682   // Nagle also ensure we don't run into this delay in other edge cases.
683   // See also:
684   //    http://technet.microsoft.com/en-us/library/bb726981.aspx
685   const BOOL kDisableNagle = TRUE;
686   int rv = setsockopt(socket_, IPPROTO_TCP, TCP_NODELAY,
687                       reinterpret_cast<const char*>(&kDisableNagle),
688                       sizeof(kDisableNagle));
689   DCHECK(!rv) << "Could not disable nagle";
690 
691   // Enable TCP Keep-Alive to prevent NAT routers from timing out TCP
692   // connections. See http://crbug.com/27400 for details.
693 
694   struct tcp_keepalive keepalive_vals = {
695     1, // TCP keep-alive on.
696     45000,  // Wait 45s until sending first TCP keep-alive packet.
697     45000,  // Wait 45s between sending TCP keep-alive packets.
698   };
699   DWORD bytes_returned = 0xABAB;
700   rv = WSAIoctl(socket_, SIO_KEEPALIVE_VALS, &keepalive_vals,
701                 sizeof(keepalive_vals), NULL, 0,
702                 &bytes_returned, NULL, NULL);
703   DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket_
704               << " [error: " << WSAGetLastError() << "].";
705 
706   // Disregard any failure in disabling nagle or enabling TCP Keep-Alive.
707   return 0;
708 }
709 
LogConnectCompletion(int net_error)710 void TCPClientSocketWin::LogConnectCompletion(int net_error) {
711   if (net_error == OK)
712     UpdateConnectionTypeHistograms(CONNECTION_ANY);
713 
714   if (net_error != OK) {
715     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
716     return;
717   }
718 
719   struct sockaddr_storage source_address;
720   socklen_t addrlen = sizeof(source_address);
721   int rv = getsockname(
722       socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen);
723   if (rv != 0) {
724     LOG(ERROR) << "getsockname() [rv: " << rv
725                << "] error: " << WSAGetLastError();
726     NOTREACHED();
727     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
728     return;
729   }
730 
731   const std::string source_address_str =
732       NetAddressToStringWithPort(
733           reinterpret_cast<const struct sockaddr*>(&source_address),
734           sizeof(source_address));
735   net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT,
736                     make_scoped_refptr(new NetLogStringParameter(
737                         "source address",
738                         source_address_str)));
739 }
740 
DoReadCallback(int rv)741 void TCPClientSocketWin::DoReadCallback(int rv) {
742   DCHECK_NE(rv, ERR_IO_PENDING);
743   DCHECK(read_callback_);
744 
745   // since Run may result in Read being called, clear read_callback_ up front.
746   CompletionCallback* c = read_callback_;
747   read_callback_ = NULL;
748   c->Run(rv);
749 }
750 
DoWriteCallback(int rv)751 void TCPClientSocketWin::DoWriteCallback(int rv) {
752   DCHECK_NE(rv, ERR_IO_PENDING);
753   DCHECK(write_callback_);
754 
755   // since Run may result in Write being called, clear write_callback_ up front.
756   CompletionCallback* c = write_callback_;
757   write_callback_ = NULL;
758   c->Run(rv);
759 }
760 
DidCompleteConnect()761 void TCPClientSocketWin::DidCompleteConnect() {
762   DCHECK_EQ(next_connect_state_, CONNECT_STATE_CONNECT_COMPLETE);
763   int result;
764 
765   WSANETWORKEVENTS events;
766   int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
767                                 &events);
768   int os_error = 0;
769   if (rv == SOCKET_ERROR) {
770     NOTREACHED();
771     os_error = WSAGetLastError();
772     result = MapSystemError(os_error);
773   } else if (events.lNetworkEvents & FD_CONNECT) {
774     os_error = events.iErrorCode[FD_CONNECT_BIT];
775     result = MapConnectError(os_error);
776   } else {
777     NOTREACHED();
778     result = ERR_UNEXPECTED;
779   }
780 
781   connect_os_error_ = os_error;
782   rv = DoConnectLoop(result);
783   if (rv != ERR_IO_PENDING) {
784     LogConnectCompletion(rv);
785     DoReadCallback(rv);
786   }
787 }
788 
DidCompleteRead()789 void TCPClientSocketWin::DidCompleteRead() {
790   DCHECK(waiting_read_);
791   DWORD num_bytes, flags;
792   BOOL ok = WSAGetOverlappedResult(socket_, &core_->read_overlapped_,
793                                    &num_bytes, FALSE, &flags);
794   WSAResetEvent(core_->read_overlapped_.hEvent);
795   waiting_read_ = false;
796   core_->read_iobuffer_ = NULL;
797   if (ok) {
798     base::StatsCounter read_bytes("tcp.read_bytes");
799     read_bytes.Add(num_bytes);
800     if (num_bytes > 0)
801       use_history_.set_was_used_to_convey_data();
802     LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_RECEIVED, num_bytes,
803                     core_->read_buffer_.buf);
804   }
805   DoReadCallback(ok ? num_bytes : MapSystemError(WSAGetLastError()));
806 }
807 
DidCompleteWrite()808 void TCPClientSocketWin::DidCompleteWrite() {
809   DCHECK(waiting_write_);
810 
811   DWORD num_bytes, flags;
812   BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_,
813                                    &num_bytes, FALSE, &flags);
814   WSAResetEvent(core_->write_overlapped_.hEvent);
815   waiting_write_ = false;
816   int rv;
817   if (!ok) {
818     rv = MapSystemError(WSAGetLastError());
819   } else {
820     rv = static_cast<int>(num_bytes);
821     if (rv > core_->write_buffer_length_ || rv < 0) {
822       // It seems that some winsock interceptors report that more was written
823       // than was available. Treat this as an error.  http://crbug.com/27870
824       LOG(ERROR) << "Detected broken LSP: Asked to write "
825                  << core_->write_buffer_length_ << " bytes, but " << rv
826                  << " bytes reported.";
827       rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
828     } else {
829       base::StatsCounter write_bytes("tcp.write_bytes");
830       write_bytes.Add(num_bytes);
831       if (num_bytes > 0)
832         use_history_.set_was_used_to_convey_data();
833       LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes,
834                       core_->write_buffer_.buf);
835     }
836   }
837   core_->write_iobuffer_ = NULL;
838   DoWriteCallback(rv);
839 }
840 
841 }  // namespace net
842