1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/socket/tcp_client_socket_win.h"
6
7 #include <mstcpip.h>
8
9 #include "base/basictypes.h"
10 #include "base/compiler_specific.h"
11 #include "base/memory/memory_debug.h"
12 #include "base/metrics/stats_counters.h"
13 #include "base/string_util.h"
14 #include "base/sys_info.h"
15 #include "base/win/object_watcher.h"
16 #include "net/base/address_list_net_log_param.h"
17 #include "net/base/connection_type_histograms.h"
18 #include "net/base/io_buffer.h"
19 #include "net/base/ip_endpoint.h"
20 #include "net/base/net_errors.h"
21 #include "net/base/net_log.h"
22 #include "net/base/net_util.h"
23 #include "net/base/network_change_notifier.h"
24 #include "net/base/sys_addrinfo.h"
25 #include "net/base/winsock_init.h"
26 #include "net/base/winsock_util.h"
27
28 namespace net {
29
30 namespace {
31
MapConnectError(int os_error)32 int MapConnectError(int os_error) {
33 switch (os_error) {
34 // connect fails with WSAEACCES when Windows Firewall blocks the
35 // connection.
36 case WSAEACCES:
37 return ERR_NETWORK_ACCESS_DENIED;
38 case WSAETIMEDOUT:
39 return ERR_CONNECTION_TIMED_OUT;
40 default: {
41 int net_error = MapSystemError(os_error);
42 if (net_error == ERR_FAILED)
43 return ERR_CONNECTION_FAILED; // More specific than ERR_FAILED.
44
45 // Give a more specific error when the user is offline.
46 if (net_error == ERR_ADDRESS_UNREACHABLE &&
47 NetworkChangeNotifier::IsOffline()) {
48 return ERR_INTERNET_DISCONNECTED;
49 }
50
51 return net_error;
52 }
53 }
54 }
55
56 } // namespace
57
58 //-----------------------------------------------------------------------------
59
60 // This class encapsulates all the state that has to be preserved as long as
61 // there is a network IO operation in progress. If the owner TCPClientSocketWin
62 // is destroyed while an operation is in progress, the Core is detached and it
63 // lives until the operation completes and the OS doesn't reference any resource
64 // declared on this class anymore.
65 class TCPClientSocketWin::Core : public base::RefCounted<Core> {
66 public:
67 explicit Core(TCPClientSocketWin* socket);
68
69 // Start watching for the end of a read or write operation.
70 void WatchForRead();
71 void WatchForWrite();
72
73 // The TCPClientSocketWin is going away.
Detach()74 void Detach() { socket_ = NULL; }
75
76 // The separate OVERLAPPED variables for asynchronous operation.
77 // |read_overlapped_| is used for both Connect() and Read().
78 // |write_overlapped_| is only used for Write();
79 OVERLAPPED read_overlapped_;
80 OVERLAPPED write_overlapped_;
81
82 // The buffers used in Read() and Write().
83 WSABUF read_buffer_;
84 WSABUF write_buffer_;
85 scoped_refptr<IOBuffer> read_iobuffer_;
86 scoped_refptr<IOBuffer> write_iobuffer_;
87 int write_buffer_length_;
88
89 // Throttle the read size based on our current slow start state.
90 // Returns the throttled read size.
ThrottleReadSize(int size)91 int ThrottleReadSize(int size) {
92 if (slow_start_throttle_ < kMaxSlowStartThrottle) {
93 size = std::min(size, slow_start_throttle_);
94 slow_start_throttle_ *= 2;
95 }
96 return size;
97 }
98
99 private:
100 friend class base::RefCounted<Core>;
101
102 class ReadDelegate : public base::win::ObjectWatcher::Delegate {
103 public:
ReadDelegate(Core * core)104 explicit ReadDelegate(Core* core) : core_(core) {}
~ReadDelegate()105 virtual ~ReadDelegate() {}
106
107 // base::ObjectWatcher::Delegate methods:
108 virtual void OnObjectSignaled(HANDLE object);
109
110 private:
111 Core* const core_;
112 };
113
114 class WriteDelegate : public base::win::ObjectWatcher::Delegate {
115 public:
WriteDelegate(Core * core)116 explicit WriteDelegate(Core* core) : core_(core) {}
~WriteDelegate()117 virtual ~WriteDelegate() {}
118
119 // base::ObjectWatcher::Delegate methods:
120 virtual void OnObjectSignaled(HANDLE object);
121
122 private:
123 Core* const core_;
124 };
125
126 ~Core();
127
128 // The socket that created this object.
129 TCPClientSocketWin* socket_;
130
131 // |reader_| handles the signals from |read_watcher_|.
132 ReadDelegate reader_;
133 // |writer_| handles the signals from |write_watcher_|.
134 WriteDelegate writer_;
135
136 // |read_watcher_| watches for events from Connect() and Read().
137 base::win::ObjectWatcher read_watcher_;
138 // |write_watcher_| watches for events from Write();
139 base::win::ObjectWatcher write_watcher_;
140
141 // When doing reads from the socket, we try to mirror TCP's slow start.
142 // We do this because otherwise the async IO subsystem artifically delays
143 // returning data to the application.
144 static const int kInitialSlowStartThrottle = 1 * 1024;
145 static const int kMaxSlowStartThrottle = 32 * kInitialSlowStartThrottle;
146 int slow_start_throttle_;
147
148 DISALLOW_COPY_AND_ASSIGN(Core);
149 };
150
Core(TCPClientSocketWin * socket)151 TCPClientSocketWin::Core::Core(
152 TCPClientSocketWin* socket)
153 : write_buffer_length_(0),
154 socket_(socket),
155 ALLOW_THIS_IN_INITIALIZER_LIST(reader_(this)),
156 ALLOW_THIS_IN_INITIALIZER_LIST(writer_(this)),
157 slow_start_throttle_(kInitialSlowStartThrottle) {
158 memset(&read_overlapped_, 0, sizeof(read_overlapped_));
159 memset(&write_overlapped_, 0, sizeof(write_overlapped_));
160 }
161
~Core()162 TCPClientSocketWin::Core::~Core() {
163 // Make sure the message loop is not watching this object anymore.
164 read_watcher_.StopWatching();
165 write_watcher_.StopWatching();
166
167 WSACloseEvent(read_overlapped_.hEvent);
168 memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_));
169 WSACloseEvent(write_overlapped_.hEvent);
170 memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_));
171 }
172
WatchForRead()173 void TCPClientSocketWin::Core::WatchForRead() {
174 // We grab an extra reference because there is an IO operation in progress.
175 // Balanced in ReadDelegate::OnObjectSignaled().
176 AddRef();
177 read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_);
178 }
179
WatchForWrite()180 void TCPClientSocketWin::Core::WatchForWrite() {
181 // We grab an extra reference because there is an IO operation in progress.
182 // Balanced in WriteDelegate::OnObjectSignaled().
183 AddRef();
184 write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_);
185 }
186
OnObjectSignaled(HANDLE object)187 void TCPClientSocketWin::Core::ReadDelegate::OnObjectSignaled(
188 HANDLE object) {
189 DCHECK_EQ(object, core_->read_overlapped_.hEvent);
190 if (core_->socket_) {
191 if (core_->socket_->waiting_connect()) {
192 core_->socket_->DidCompleteConnect();
193 } else {
194 core_->socket_->DidCompleteRead();
195 }
196 }
197
198 core_->Release();
199 }
200
OnObjectSignaled(HANDLE object)201 void TCPClientSocketWin::Core::WriteDelegate::OnObjectSignaled(
202 HANDLE object) {
203 DCHECK_EQ(object, core_->write_overlapped_.hEvent);
204 if (core_->socket_)
205 core_->socket_->DidCompleteWrite();
206
207 core_->Release();
208 }
209
210 //-----------------------------------------------------------------------------
211
TCPClientSocketWin(const AddressList & addresses,net::NetLog * net_log,const net::NetLog::Source & source)212 TCPClientSocketWin::TCPClientSocketWin(const AddressList& addresses,
213 net::NetLog* net_log,
214 const net::NetLog::Source& source)
215 : socket_(INVALID_SOCKET),
216 addresses_(addresses),
217 current_ai_(NULL),
218 waiting_read_(false),
219 waiting_write_(false),
220 read_callback_(NULL),
221 write_callback_(NULL),
222 next_connect_state_(CONNECT_STATE_NONE),
223 connect_os_error_(0),
224 net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)),
225 previously_disconnected_(false) {
226 scoped_refptr<NetLog::EventParameters> params;
227 if (source.is_valid())
228 params = new NetLogSourceParameter("source_dependency", source);
229 net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE, params);
230 EnsureWinsockInit();
231 }
232
~TCPClientSocketWin()233 TCPClientSocketWin::~TCPClientSocketWin() {
234 Disconnect();
235 net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE, NULL);
236 }
237
AdoptSocket(SOCKET socket)238 void TCPClientSocketWin::AdoptSocket(SOCKET socket) {
239 DCHECK_EQ(socket_, INVALID_SOCKET);
240 socket_ = socket;
241 int error = SetupSocket();
242 DCHECK_EQ(0, error);
243 core_ = new Core(this);
244 current_ai_ = addresses_.head();
245 use_history_.set_was_ever_connected();
246 }
247
248 #ifdef ANDROID
249 // TODO(kristianm): handle the case when wait_for_connect is true
250 // (sync requests)
251 #endif
Connect(CompletionCallback * callback,bool wait_for_connect)252 int TCPClientSocketWin::Connect(CompletionCallback* callback
253 #ifdef ANDROID
254 , bool wait_for_connect
255 #endif
256 ) {
257 DCHECK(CalledOnValidThread());
258
259 // If already connected, then just return OK.
260 if (socket_ != INVALID_SOCKET)
261 return OK;
262
263 base::StatsCounter connects("tcp.connect");
264 connects.Increment();
265
266 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
267 new AddressListNetLogParam(addresses_));
268
269 // We will try to connect to each address in addresses_. Start with the
270 // first one in the list.
271 next_connect_state_ = CONNECT_STATE_CONNECT;
272 current_ai_ = addresses_.head();
273
274 int rv = DoConnectLoop(OK);
275 if (rv == ERR_IO_PENDING) {
276 // Synchronous operation not supported.
277 DCHECK(callback);
278 read_callback_ = callback;
279 } else {
280 LogConnectCompletion(rv);
281 }
282
283 return rv;
284 }
285
DoConnectLoop(int result)286 int TCPClientSocketWin::DoConnectLoop(int result) {
287 DCHECK_NE(next_connect_state_, CONNECT_STATE_NONE);
288
289 int rv = result;
290 do {
291 ConnectState state = next_connect_state_;
292 next_connect_state_ = CONNECT_STATE_NONE;
293 switch (state) {
294 case CONNECT_STATE_CONNECT:
295 DCHECK_EQ(OK, rv);
296 rv = DoConnect();
297 break;
298 case CONNECT_STATE_CONNECT_COMPLETE:
299 rv = DoConnectComplete(rv);
300 break;
301 default:
302 LOG(DFATAL) << "bad state " << state;
303 rv = ERR_UNEXPECTED;
304 break;
305 }
306 } while (rv != ERR_IO_PENDING && next_connect_state_ != CONNECT_STATE_NONE);
307
308 return rv;
309 }
310
DoConnect()311 int TCPClientSocketWin::DoConnect() {
312 const struct addrinfo* ai = current_ai_;
313 DCHECK(ai);
314 DCHECK_EQ(0, connect_os_error_);
315
316 if (previously_disconnected_) {
317 use_history_.Reset();
318 previously_disconnected_ = false;
319 }
320
321 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
322 new NetLogStringParameter(
323 "address", NetAddressToStringWithPort(current_ai_)));
324
325 next_connect_state_ = CONNECT_STATE_CONNECT_COMPLETE;
326
327 connect_os_error_ = CreateSocket(ai);
328 if (connect_os_error_ != 0)
329 return MapSystemError(connect_os_error_);
330
331 DCHECK(!core_);
332 core_ = new Core(this);
333
334 // WSACreateEvent creates a manual-reset event object.
335 core_->read_overlapped_.hEvent = WSACreateEvent();
336 // WSAEventSelect sets the socket to non-blocking mode as a side effect.
337 // Our connect() and recv() calls require that the socket be non-blocking.
338 WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT);
339
340 core_->write_overlapped_.hEvent = WSACreateEvent();
341
342 if (!connect(socket_, ai->ai_addr, static_cast<int>(ai->ai_addrlen))) {
343 // Connected without waiting!
344 //
345 // The MSDN page for connect says:
346 // With a nonblocking socket, the connection attempt cannot be completed
347 // immediately. In this case, connect will return SOCKET_ERROR, and
348 // WSAGetLastError will return WSAEWOULDBLOCK.
349 // which implies that for a nonblocking socket, connect never returns 0.
350 // It's not documented whether the event object will be signaled or not
351 // if connect does return 0. So the code below is essentially dead code
352 // and we don't know if it's correct.
353 NOTREACHED();
354
355 if (ResetEventIfSignaled(core_->read_overlapped_.hEvent))
356 return OK;
357 } else {
358 int os_error = WSAGetLastError();
359 if (os_error != WSAEWOULDBLOCK) {
360 LOG(ERROR) << "connect failed: " << os_error;
361 connect_os_error_ = os_error;
362 return MapConnectError(os_error);
363 }
364 }
365
366 core_->WatchForRead();
367 return ERR_IO_PENDING;
368 }
369
DoConnectComplete(int result)370 int TCPClientSocketWin::DoConnectComplete(int result) {
371 // Log the end of this attempt (and any OS error it threw).
372 int os_error = connect_os_error_;
373 connect_os_error_ = 0;
374 scoped_refptr<NetLog::EventParameters> params;
375 if (result != OK)
376 params = new NetLogIntegerParameter("os_error", os_error);
377 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, params);
378
379 if (result == OK) {
380 use_history_.set_was_ever_connected();
381 return OK; // Done!
382 }
383
384 // Close whatever partially connected socket we currently have.
385 DoDisconnect();
386
387 // Try to fall back to the next address in the list.
388 if (current_ai_->ai_next) {
389 next_connect_state_ = CONNECT_STATE_CONNECT;
390 current_ai_ = current_ai_->ai_next;
391 return OK;
392 }
393
394 // Otherwise there is nothing to fall back to, so give up.
395 return result;
396 }
397
Disconnect()398 void TCPClientSocketWin::Disconnect() {
399 DoDisconnect();
400 current_ai_ = NULL;
401 }
402
DoDisconnect()403 void TCPClientSocketWin::DoDisconnect() {
404 DCHECK(CalledOnValidThread());
405
406 if (socket_ == INVALID_SOCKET)
407 return;
408
409 // Note: don't use CancelIo to cancel pending IO because it doesn't work
410 // when there is a Winsock layered service provider.
411
412 // In most socket implementations, closing a socket results in a graceful
413 // connection shutdown, but in Winsock we have to call shutdown explicitly.
414 // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure"
415 // at http://msdn.microsoft.com/en-us/library/ms738547.aspx
416 shutdown(socket_, SD_SEND);
417
418 // This cancels any pending IO.
419 closesocket(socket_);
420 socket_ = INVALID_SOCKET;
421
422 if (waiting_connect()) {
423 // We closed the socket, so this notification will never come.
424 // From MSDN' WSAEventSelect documentation:
425 // "Closing a socket with closesocket also cancels the association and
426 // selection of network events specified in WSAEventSelect for the socket".
427 core_->Release();
428 }
429
430 waiting_read_ = false;
431 waiting_write_ = false;
432
433 core_->Detach();
434 core_ = NULL;
435
436 previously_disconnected_ = true;
437 }
438
IsConnected() const439 bool TCPClientSocketWin::IsConnected() const {
440 DCHECK(CalledOnValidThread());
441
442 if (socket_ == INVALID_SOCKET || waiting_connect())
443 return false;
444
445 // Check if connection is alive.
446 char c;
447 int rv = recv(socket_, &c, 1, MSG_PEEK);
448 if (rv == 0)
449 return false;
450 if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK)
451 return false;
452
453 return true;
454 }
455
IsConnectedAndIdle() const456 bool TCPClientSocketWin::IsConnectedAndIdle() const {
457 DCHECK(CalledOnValidThread());
458
459 if (socket_ == INVALID_SOCKET || waiting_connect())
460 return false;
461
462 // Check if connection is alive and we haven't received any data
463 // unexpectedly.
464 char c;
465 int rv = recv(socket_, &c, 1, MSG_PEEK);
466 if (rv >= 0)
467 return false;
468 if (WSAGetLastError() != WSAEWOULDBLOCK)
469 return false;
470
471 return true;
472 }
473
GetPeerAddress(AddressList * address) const474 int TCPClientSocketWin::GetPeerAddress(AddressList* address) const {
475 DCHECK(CalledOnValidThread());
476 DCHECK(address);
477 if (!IsConnected())
478 return ERR_SOCKET_NOT_CONNECTED;
479 address->Copy(current_ai_, false);
480 return OK;
481 }
482
GetLocalAddress(IPEndPoint * address) const483 int TCPClientSocketWin::GetLocalAddress(IPEndPoint* address) const {
484 DCHECK(CalledOnValidThread());
485 DCHECK(address);
486 if (!IsConnected())
487 return ERR_SOCKET_NOT_CONNECTED;
488
489 struct sockaddr_storage addr_storage;
490 socklen_t addr_len = sizeof(addr_storage);
491 struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
492 if (getsockname(socket_, addr, &addr_len))
493 return MapSystemError(WSAGetLastError());
494 if (!address->FromSockAddr(addr, addr_len))
495 return ERR_FAILED;
496 return OK;
497 }
498
SetSubresourceSpeculation()499 void TCPClientSocketWin::SetSubresourceSpeculation() {
500 use_history_.set_subresource_speculation();
501 }
502
SetOmniboxSpeculation()503 void TCPClientSocketWin::SetOmniboxSpeculation() {
504 use_history_.set_omnibox_speculation();
505 }
506
WasEverUsed() const507 bool TCPClientSocketWin::WasEverUsed() const {
508 return use_history_.was_used_to_convey_data();
509 }
510
UsingTCPFastOpen() const511 bool TCPClientSocketWin::UsingTCPFastOpen() const {
512 // Not supported on windows.
513 return false;
514 }
515
Read(IOBuffer * buf,int buf_len,CompletionCallback * callback)516 int TCPClientSocketWin::Read(IOBuffer* buf,
517 int buf_len,
518 CompletionCallback* callback) {
519 DCHECK(CalledOnValidThread());
520 DCHECK_NE(socket_, INVALID_SOCKET);
521 DCHECK(!waiting_read_);
522 DCHECK(!read_callback_);
523 DCHECK(!core_->read_iobuffer_);
524
525 buf_len = core_->ThrottleReadSize(buf_len);
526
527 core_->read_buffer_.len = buf_len;
528 core_->read_buffer_.buf = buf->data();
529
530 // TODO(wtc): Remove the assertion after enough testing.
531 AssertEventNotSignaled(core_->read_overlapped_.hEvent);
532 DWORD num, flags = 0;
533 int rv = WSARecv(socket_, &core_->read_buffer_, 1, &num, &flags,
534 &core_->read_overlapped_, NULL);
535 if (rv == 0) {
536 if (ResetEventIfSignaled(core_->read_overlapped_.hEvent)) {
537 // Because of how WSARecv fills memory when used asynchronously, Purify
538 // isn't able to detect that it's been initialized, so it scans for 0xcd
539 // in the buffer and reports UMRs (uninitialized memory reads) for those
540 // individual bytes. We override that in PURIFY builds to avoid the
541 // false error reports.
542 // See bug 5297.
543 base::MemoryDebug::MarkAsInitialized(core_->read_buffer_.buf, num);
544 base::StatsCounter read_bytes("tcp.read_bytes");
545 read_bytes.Add(num);
546 if (num > 0)
547 use_history_.set_was_used_to_convey_data();
548 LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_RECEIVED, num,
549 core_->read_buffer_.buf);
550 return static_cast<int>(num);
551 }
552 } else {
553 int os_error = WSAGetLastError();
554 if (os_error != WSA_IO_PENDING)
555 return MapSystemError(os_error);
556 }
557 core_->WatchForRead();
558 waiting_read_ = true;
559 read_callback_ = callback;
560 core_->read_iobuffer_ = buf;
561 return ERR_IO_PENDING;
562 }
563
Write(IOBuffer * buf,int buf_len,CompletionCallback * callback)564 int TCPClientSocketWin::Write(IOBuffer* buf,
565 int buf_len,
566 CompletionCallback* callback) {
567 DCHECK(CalledOnValidThread());
568 DCHECK_NE(socket_, INVALID_SOCKET);
569 DCHECK(!waiting_write_);
570 DCHECK(!write_callback_);
571 DCHECK_GT(buf_len, 0);
572 DCHECK(!core_->write_iobuffer_);
573
574 base::StatsCounter writes("tcp.writes");
575 writes.Increment();
576
577 core_->write_buffer_.len = buf_len;
578 core_->write_buffer_.buf = buf->data();
579 core_->write_buffer_length_ = buf_len;
580
581 // TODO(wtc): Remove the assertion after enough testing.
582 AssertEventNotSignaled(core_->write_overlapped_.hEvent);
583 DWORD num;
584 int rv = WSASend(socket_, &core_->write_buffer_, 1, &num, 0,
585 &core_->write_overlapped_, NULL);
586 if (rv == 0) {
587 if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) {
588 rv = static_cast<int>(num);
589 if (rv > buf_len || rv < 0) {
590 // It seems that some winsock interceptors report that more was written
591 // than was available. Treat this as an error. http://crbug.com/27870
592 LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len
593 << " bytes, but " << rv << " bytes reported.";
594 return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
595 }
596 base::StatsCounter write_bytes("tcp.write_bytes");
597 write_bytes.Add(rv);
598 if (rv > 0)
599 use_history_.set_was_used_to_convey_data();
600 LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_SENT, rv,
601 core_->write_buffer_.buf);
602 return rv;
603 }
604 } else {
605 int os_error = WSAGetLastError();
606 if (os_error != WSA_IO_PENDING)
607 return MapSystemError(os_error);
608 }
609 core_->WatchForWrite();
610 waiting_write_ = true;
611 write_callback_ = callback;
612 core_->write_iobuffer_ = buf;
613 return ERR_IO_PENDING;
614 }
615
SetReceiveBufferSize(int32 size)616 bool TCPClientSocketWin::SetReceiveBufferSize(int32 size) {
617 DCHECK(CalledOnValidThread());
618 int rv = setsockopt(socket_, SOL_SOCKET, SO_RCVBUF,
619 reinterpret_cast<const char*>(&size), sizeof(size));
620 DCHECK(!rv) << "Could not set socket receive buffer size: " << GetLastError();
621 return rv == 0;
622 }
623
SetSendBufferSize(int32 size)624 bool TCPClientSocketWin::SetSendBufferSize(int32 size) {
625 DCHECK(CalledOnValidThread());
626 int rv = setsockopt(socket_, SOL_SOCKET, SO_SNDBUF,
627 reinterpret_cast<const char*>(&size), sizeof(size));
628 DCHECK(!rv) << "Could not set socket send buffer size: " << GetLastError();
629 return rv == 0;
630 }
631
CreateSocket(const struct addrinfo * ai)632 int TCPClientSocketWin::CreateSocket(const struct addrinfo* ai) {
633 socket_ = WSASocket(ai->ai_family, ai->ai_socktype, ai->ai_protocol, NULL, 0,
634 WSA_FLAG_OVERLAPPED);
635 if (socket_ == INVALID_SOCKET) {
636 int os_error = WSAGetLastError();
637 LOG(ERROR) << "WSASocket failed: " << os_error;
638 return os_error;
639 }
640 return SetupSocket();
641 }
642
SetupSocket()643 int TCPClientSocketWin::SetupSocket() {
644 // Increase the socket buffer sizes from the default sizes for WinXP. In
645 // performance testing, there is substantial benefit by increasing from 8KB
646 // to 64KB.
647 // See also:
648 // http://support.microsoft.com/kb/823764/EN-US
649 // On Vista, if we manually set these sizes, Vista turns off its receive
650 // window auto-tuning feature.
651 // http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx
652 // Since Vista's auto-tune is better than any static value we can could set,
653 // only change these on pre-vista machines.
654 int32 major_version, minor_version, fix_version;
655 base::SysInfo::OperatingSystemVersionNumbers(&major_version, &minor_version,
656 &fix_version);
657 if (major_version < 6) {
658 const int32 kSocketBufferSize = 64 * 1024;
659 SetReceiveBufferSize(kSocketBufferSize);
660 SetSendBufferSize(kSocketBufferSize);
661 }
662
663 // Disable Nagle.
664 // The Nagle implementation on windows is governed by RFC 896. The idea
665 // behind Nagle is to reduce small packets on the network. When Nagle is
666 // enabled, if a partial packet has been sent, the TCP stack will disallow
667 // further *partial* packets until an ACK has been received from the other
668 // side. Good applications should always strive to send as much data as
669 // possible and avoid partial-packet sends. However, in most real world
670 // applications, there are edge cases where this does not happen, and two
671 // partil packets may be sent back to back. For a browser, it is NEVER
672 // a benefit to delay for an RTT before the second packet is sent.
673 //
674 // As a practical example in Chromium today, consider the case of a small
675 // POST. I have verified this:
676 // Client writes 649 bytes of header (partial packet #1)
677 // Client writes 50 bytes of POST data (partial packet #2)
678 // In the above example, with Nagle, a RTT delay is inserted between these
679 // two sends due to nagle. RTTs can easily be 100ms or more. The best
680 // fix is to make sure that for POSTing data, we write as much data as
681 // possible and minimize partial packets. We will fix that. But disabling
682 // Nagle also ensure we don't run into this delay in other edge cases.
683 // See also:
684 // http://technet.microsoft.com/en-us/library/bb726981.aspx
685 const BOOL kDisableNagle = TRUE;
686 int rv = setsockopt(socket_, IPPROTO_TCP, TCP_NODELAY,
687 reinterpret_cast<const char*>(&kDisableNagle),
688 sizeof(kDisableNagle));
689 DCHECK(!rv) << "Could not disable nagle";
690
691 // Enable TCP Keep-Alive to prevent NAT routers from timing out TCP
692 // connections. See http://crbug.com/27400 for details.
693
694 struct tcp_keepalive keepalive_vals = {
695 1, // TCP keep-alive on.
696 45000, // Wait 45s until sending first TCP keep-alive packet.
697 45000, // Wait 45s between sending TCP keep-alive packets.
698 };
699 DWORD bytes_returned = 0xABAB;
700 rv = WSAIoctl(socket_, SIO_KEEPALIVE_VALS, &keepalive_vals,
701 sizeof(keepalive_vals), NULL, 0,
702 &bytes_returned, NULL, NULL);
703 DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket_
704 << " [error: " << WSAGetLastError() << "].";
705
706 // Disregard any failure in disabling nagle or enabling TCP Keep-Alive.
707 return 0;
708 }
709
LogConnectCompletion(int net_error)710 void TCPClientSocketWin::LogConnectCompletion(int net_error) {
711 if (net_error == OK)
712 UpdateConnectionTypeHistograms(CONNECTION_ANY);
713
714 if (net_error != OK) {
715 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
716 return;
717 }
718
719 struct sockaddr_storage source_address;
720 socklen_t addrlen = sizeof(source_address);
721 int rv = getsockname(
722 socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen);
723 if (rv != 0) {
724 LOG(ERROR) << "getsockname() [rv: " << rv
725 << "] error: " << WSAGetLastError();
726 NOTREACHED();
727 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
728 return;
729 }
730
731 const std::string source_address_str =
732 NetAddressToStringWithPort(
733 reinterpret_cast<const struct sockaddr*>(&source_address),
734 sizeof(source_address));
735 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT,
736 make_scoped_refptr(new NetLogStringParameter(
737 "source address",
738 source_address_str)));
739 }
740
DoReadCallback(int rv)741 void TCPClientSocketWin::DoReadCallback(int rv) {
742 DCHECK_NE(rv, ERR_IO_PENDING);
743 DCHECK(read_callback_);
744
745 // since Run may result in Read being called, clear read_callback_ up front.
746 CompletionCallback* c = read_callback_;
747 read_callback_ = NULL;
748 c->Run(rv);
749 }
750
DoWriteCallback(int rv)751 void TCPClientSocketWin::DoWriteCallback(int rv) {
752 DCHECK_NE(rv, ERR_IO_PENDING);
753 DCHECK(write_callback_);
754
755 // since Run may result in Write being called, clear write_callback_ up front.
756 CompletionCallback* c = write_callback_;
757 write_callback_ = NULL;
758 c->Run(rv);
759 }
760
DidCompleteConnect()761 void TCPClientSocketWin::DidCompleteConnect() {
762 DCHECK_EQ(next_connect_state_, CONNECT_STATE_CONNECT_COMPLETE);
763 int result;
764
765 WSANETWORKEVENTS events;
766 int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
767 &events);
768 int os_error = 0;
769 if (rv == SOCKET_ERROR) {
770 NOTREACHED();
771 os_error = WSAGetLastError();
772 result = MapSystemError(os_error);
773 } else if (events.lNetworkEvents & FD_CONNECT) {
774 os_error = events.iErrorCode[FD_CONNECT_BIT];
775 result = MapConnectError(os_error);
776 } else {
777 NOTREACHED();
778 result = ERR_UNEXPECTED;
779 }
780
781 connect_os_error_ = os_error;
782 rv = DoConnectLoop(result);
783 if (rv != ERR_IO_PENDING) {
784 LogConnectCompletion(rv);
785 DoReadCallback(rv);
786 }
787 }
788
DidCompleteRead()789 void TCPClientSocketWin::DidCompleteRead() {
790 DCHECK(waiting_read_);
791 DWORD num_bytes, flags;
792 BOOL ok = WSAGetOverlappedResult(socket_, &core_->read_overlapped_,
793 &num_bytes, FALSE, &flags);
794 WSAResetEvent(core_->read_overlapped_.hEvent);
795 waiting_read_ = false;
796 core_->read_iobuffer_ = NULL;
797 if (ok) {
798 base::StatsCounter read_bytes("tcp.read_bytes");
799 read_bytes.Add(num_bytes);
800 if (num_bytes > 0)
801 use_history_.set_was_used_to_convey_data();
802 LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_RECEIVED, num_bytes,
803 core_->read_buffer_.buf);
804 }
805 DoReadCallback(ok ? num_bytes : MapSystemError(WSAGetLastError()));
806 }
807
DidCompleteWrite()808 void TCPClientSocketWin::DidCompleteWrite() {
809 DCHECK(waiting_write_);
810
811 DWORD num_bytes, flags;
812 BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_,
813 &num_bytes, FALSE, &flags);
814 WSAResetEvent(core_->write_overlapped_.hEvent);
815 waiting_write_ = false;
816 int rv;
817 if (!ok) {
818 rv = MapSystemError(WSAGetLastError());
819 } else {
820 rv = static_cast<int>(num_bytes);
821 if (rv > core_->write_buffer_length_ || rv < 0) {
822 // It seems that some winsock interceptors report that more was written
823 // than was available. Treat this as an error. http://crbug.com/27870
824 LOG(ERROR) << "Detected broken LSP: Asked to write "
825 << core_->write_buffer_length_ << " bytes, but " << rv
826 << " bytes reported.";
827 rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
828 } else {
829 base::StatsCounter write_bytes("tcp.write_bytes");
830 write_bytes.Add(num_bytes);
831 if (num_bytes > 0)
832 use_history_.set_was_used_to_convey_data();
833 LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes,
834 core_->write_buffer_.buf);
835 }
836 }
837 core_->write_iobuffer_ = NULL;
838 DoWriteCallback(rv);
839 }
840
841 } // namespace net
842