• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * libjingle
3  * Copyright 2004--2005, Google Inc.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *  1. Redistributions of source code must retain the above copyright notice,
9  *     this list of conditions and the following disclaimer.
10  *  2. Redistributions in binary form must reproduce the above copyright notice,
11  *     this list of conditions and the following disclaimer in the documentation
12  *     and/or other materials provided with the distribution.
13  *  3. The name of the author may not be used to endorse or promote products
14  *     derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19  * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "talk/p2p/base/pseudotcp.h"
29 
30 #include <cstdio>
31 #include <cstdlib>
32 
33 #include "talk/base/basictypes.h"
34 #include "talk/base/byteorder.h"
35 #include "talk/base/common.h"
36 #include "talk/base/logging.h"
37 #include "talk/base/socket.h"
38 #include "talk/base/stringutils.h"
39 #include "talk/base/time.h"
40 
41 // The following logging is for detailed (packet-level) analysis only.
42 #define _DBG_NONE     0
43 #define _DBG_NORMAL   1
44 #define _DBG_VERBOSE  2
45 #define _DEBUGMSG _DBG_NONE
46 
47 namespace cricket {
48 
49 //////////////////////////////////////////////////////////////////////
50 // Network Constants
51 //////////////////////////////////////////////////////////////////////
52 
53 // Standard MTUs
54 const uint16 PACKET_MAXIMUMS[] = {
55   65535,    // Theoretical maximum, Hyperchannel
56   32000,    // Nothing
57   17914,    // 16Mb IBM Token Ring
58   8166,   // IEEE 802.4
59   //4464,   // IEEE 802.5 (4Mb max)
60   4352,   // FDDI
61   //2048,   // Wideband Network
62   2002,   // IEEE 802.5 (4Mb recommended)
63   //1536,   // Expermental Ethernet Networks
64   //1500,   // Ethernet, Point-to-Point (default)
65   1492,   // IEEE 802.3
66   1006,   // SLIP, ARPANET
67   //576,    // X.25 Networks
68   //544,    // DEC IP Portal
69   //512,    // NETBIOS
70   508,    // IEEE 802/Source-Rt Bridge, ARCNET
71   296,    // Point-to-Point (low delay)
72   //68,     // Official minimum
73   0,      // End of list marker
74 };
75 
76 const uint32 MAX_PACKET = 65535;
77 // Note: we removed lowest level because packet overhead was larger!
78 const uint32 MIN_PACKET = 296;
79 
80 const uint32 IP_HEADER_SIZE = 20; // (+ up to 40 bytes of options?)
81 const uint32 ICMP_HEADER_SIZE = 8;
82 const uint32 UDP_HEADER_SIZE = 8;
83 // TODO: Make JINGLE_HEADER_SIZE transparent to this code?
84 const uint32 JINGLE_HEADER_SIZE = 64; // when relay framing is in use
85 
86 //////////////////////////////////////////////////////////////////////
87 // Global Constants and Functions
88 //////////////////////////////////////////////////////////////////////
89 //
90 //    0                   1                   2                   3
91 //    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
92 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
93 //  0 |                      Conversation Number                      |
94 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
95 //  4 |                        Sequence Number                        |
96 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
97 //  8 |                     Acknowledgment Number                     |
98 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
99 //    |               |   |U|A|P|R|S|F|                               |
100 // 12 |    Control    |   |R|C|S|S|Y|I|            Window             |
101 //    |               |   |G|K|H|T|N|N|                               |
102 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
103 // 16 |                       Timestamp sending                       |
104 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
105 // 20 |                      Timestamp receiving                      |
106 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
107 // 24 |                             data                              |
108 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
109 //
110 //////////////////////////////////////////////////////////////////////
111 
112 #define PSEUDO_KEEPALIVE 0
113 
114 const uint32 MAX_SEQ = 0xFFFFFFFF;
115 const uint32 HEADER_SIZE = 24;
116 const uint32 PACKET_OVERHEAD = HEADER_SIZE + UDP_HEADER_SIZE + IP_HEADER_SIZE + JINGLE_HEADER_SIZE;
117 
118 const uint32 MIN_RTO   =   250; // 250 ms (RFC1122, Sec 4.2.3.1 "fractions of a second")
119 const uint32 DEF_RTO   =  3000; // 3 seconds (RFC1122, Sec 4.2.3.1)
120 const uint32 MAX_RTO   = 60000; // 60 seconds
121 const uint32 DEF_ACK_DELAY = 100; // 100 milliseconds
122 
123 const uint8 FLAG_CTL = 0x02;
124 const uint8 FLAG_RST = 0x04;
125 
126 const uint8 CTL_CONNECT = 0;
127 //const uint8 CTL_REDIRECT = 1;
128 const uint8 CTL_EXTRA = 255;
129 
130 /*
131 const uint8 FLAG_FIN = 0x01;
132 const uint8 FLAG_SYN = 0x02;
133 const uint8 FLAG_ACK = 0x10;
134 */
135 
136 const uint32 CTRL_BOUND = 0x80000000;
137 
138 const long DEFAULT_TIMEOUT = 4000; // If there are no pending clocks, wake up every 4 seconds
139 const long CLOSED_TIMEOUT = 60 * 1000; // If the connection is closed, once per minute
140 
141 #if PSEUDO_KEEPALIVE
142 // !?! Rethink these times
143 const uint32 IDLE_PING = 20 * 1000; // 20 seconds (note: WinXP SP2 firewall udp timeout is 90 seconds)
144 const uint32 IDLE_TIMEOUT = 90 * 1000; // 90 seconds;
145 #endif // PSEUDO_KEEPALIVE
146 
147 //////////////////////////////////////////////////////////////////////
148 // Helper Functions
149 //////////////////////////////////////////////////////////////////////
150 
long_to_bytes(uint32 val,void * buf)151 inline void long_to_bytes(uint32 val, void* buf) {
152   *static_cast<uint32*>(buf) = talk_base::HostToNetwork32(val);
153 }
154 
short_to_bytes(uint16 val,void * buf)155 inline void short_to_bytes(uint16 val, void* buf) {
156   *static_cast<uint16*>(buf) = talk_base::HostToNetwork16(val);
157 }
158 
bytes_to_long(const void * buf)159 inline uint32 bytes_to_long(const void* buf) {
160   return talk_base::NetworkToHost32(*static_cast<const uint32*>(buf));
161 }
162 
bytes_to_short(const void * buf)163 inline uint16 bytes_to_short(const void* buf) {
164   return talk_base::NetworkToHost16(*static_cast<const uint16*>(buf));
165 }
166 
bound(uint32 lower,uint32 middle,uint32 upper)167 uint32 bound(uint32 lower, uint32 middle, uint32 upper) {
168   return talk_base::_min(talk_base::_max(lower, middle), upper);
169 }
170 
171 //////////////////////////////////////////////////////////////////////
172 // Debugging Statistics
173 //////////////////////////////////////////////////////////////////////
174 
175 #if 0  // Not used yet
176 
177 enum Stat {
178   S_SENT_PACKET,   // All packet sends
179   S_RESENT_PACKET, // All packet sends that are retransmits
180   S_RECV_PACKET,   // All packet receives
181   S_RECV_NEW,      // All packet receives that are too new
182   S_RECV_OLD,      // All packet receives that are too old
183   S_NUM_STATS
184 };
185 
186 const char* const STAT_NAMES[S_NUM_STATS] = {
187   "snt",
188   "snt-r",
189   "rcv"
190   "rcv-n",
191   "rcv-o"
192 };
193 
194 int g_stats[S_NUM_STATS];
195 inline void Incr(Stat s) { ++g_stats[s]; }
196 void ReportStats() {
197   char buffer[256];
198   size_t len = 0;
199   for (int i = 0; i < S_NUM_STATS; ++i) {
200     len += talk_base::sprintfn(buffer, ARRAY_SIZE(buffer), "%s%s:%d",
201                                (i == 0) ? "" : ",", STAT_NAMES[i], g_stats[i]);
202     g_stats[i] = 0;
203   }
204   LOG(LS_INFO) << "Stats[" << buffer << "]";
205 }
206 
207 #endif
208 
209 //////////////////////////////////////////////////////////////////////
210 // PseudoTcp
211 //////////////////////////////////////////////////////////////////////
212 
Now()213 uint32 PseudoTcp::Now() {
214 #if 0  // Use this to synchronize timers with logging timestamps (easier debug)
215   return talk_base::TimeSince(StartTime());
216 #else
217   return talk_base::Time();
218 #endif
219 }
220 
PseudoTcp(IPseudoTcpNotify * notify,uint32 conv)221 PseudoTcp::PseudoTcp(IPseudoTcpNotify* notify, uint32 conv)
222     : m_notify(notify), m_shutdown(SD_NONE), m_error(0) {
223 
224   // Sanity check on buffer sizes (needed for OnTcpWriteable notification logic)
225   ASSERT(sizeof(m_rbuf) + MIN_PACKET < sizeof(m_sbuf));
226 
227   uint32 now = Now();
228 
229   m_state = TCP_LISTEN;
230   m_conv = conv;
231   m_rcv_wnd = sizeof(m_rbuf);
232   m_snd_nxt = m_slen = 0;
233   m_snd_wnd = 1;
234   m_snd_una = m_rcv_nxt = m_rlen = 0;
235   m_bReadEnable = true;
236   m_bWriteEnable = false;
237   m_t_ack = 0;
238 
239   m_msslevel = 0;
240   m_largest = 0;
241   ASSERT(MIN_PACKET > PACKET_OVERHEAD);
242   m_mss = MIN_PACKET - PACKET_OVERHEAD;
243   m_mtu_advise = MAX_PACKET;
244 
245   m_rto_base = 0;
246 
247   m_cwnd = 2 * m_mss;
248   m_ssthresh = sizeof(m_rbuf);
249   m_lastrecv = m_lastsend = m_lasttraffic = now;
250   m_bOutgoing = false;
251 
252   m_dup_acks = 0;
253   m_recover = 0;
254 
255   m_ts_recent = m_ts_lastack = 0;
256 
257   m_rx_rto = DEF_RTO;
258   m_rx_srtt = m_rx_rttvar = 0;
259 
260   m_use_nagling = true;
261   m_ack_delay = DEF_ACK_DELAY;
262 }
263 
~PseudoTcp()264 PseudoTcp::~PseudoTcp() {
265 }
266 
Connect()267 int PseudoTcp::Connect() {
268   if (m_state != TCP_LISTEN) {
269     m_error = EINVAL;
270     return -1;
271   }
272 
273   m_state = TCP_SYN_SENT;
274   LOG(LS_INFO) << "State: TCP_SYN_SENT";
275 
276   char buffer[1];
277   buffer[0] = CTL_CONNECT;
278   queue(buffer, 1, true);
279   attemptSend();
280 
281   return 0;
282 }
283 
NotifyMTU(uint16 mtu)284 void PseudoTcp::NotifyMTU(uint16 mtu) {
285   m_mtu_advise = mtu;
286   if (m_state == TCP_ESTABLISHED) {
287     adjustMTU();
288   }
289 }
290 
NotifyClock(uint32 now)291 void PseudoTcp::NotifyClock(uint32 now) {
292   if (m_state == TCP_CLOSED)
293     return;
294 
295     // Check if it's time to retransmit a segment
296   if (m_rto_base && (talk_base::TimeDiff(m_rto_base + m_rx_rto, now) <= 0)) {
297     if (m_slist.empty()) {
298       ASSERT(false);
299     } else {
300       // Note: (m_slist.front().xmit == 0)) {
301       // retransmit segments
302 #if _DEBUGMSG >= _DBG_NORMAL
303       LOG(LS_INFO) << "timeout retransmit (rto: " << m_rx_rto
304                    << ") (rto_base: " << m_rto_base
305                    << ") (now: " << now
306                    << ") (dup_acks: " << static_cast<unsigned>(m_dup_acks)
307                    << ")";
308 #endif // _DEBUGMSG
309       if (!transmit(m_slist.begin(), now)) {
310         closedown(ECONNABORTED);
311         return;
312       }
313 
314       uint32 nInFlight = m_snd_nxt - m_snd_una;
315       m_ssthresh = talk_base::_max(nInFlight / 2, 2 * m_mss);
316       //LOG(LS_INFO) << "m_ssthresh: " << m_ssthresh << "  nInFlight: " << nInFlight << "  m_mss: " << m_mss;
317       m_cwnd = m_mss;
318 
319       // Back off retransmit timer.  Note: the limit is lower when connecting.
320       uint32 rto_limit = (m_state < TCP_ESTABLISHED) ? DEF_RTO : MAX_RTO;
321       m_rx_rto = talk_base::_min(rto_limit, m_rx_rto * 2);
322       m_rto_base = now;
323     }
324   }
325 
326   // Check if it's time to probe closed windows
327   if ((m_snd_wnd == 0)
328         && (talk_base::TimeDiff(m_lastsend + m_rx_rto, now) <= 0)) {
329     if (talk_base::TimeDiff(now, m_lastrecv) >= 15000) {
330       closedown(ECONNABORTED);
331       return;
332     }
333 
334     // probe the window
335     packet(m_snd_nxt - 1, 0, 0, 0);
336     m_lastsend = now;
337 
338     // back off retransmit timer
339     m_rx_rto = talk_base::_min(MAX_RTO, m_rx_rto * 2);
340   }
341 
342   // Check if it's time to send delayed acks
343   if (m_t_ack && (talk_base::TimeDiff(m_t_ack + m_ack_delay, now) <= 0)) {
344     packet(m_snd_nxt, 0, 0, 0);
345   }
346 
347 #if PSEUDO_KEEPALIVE
348   // Check for idle timeout
349   if ((m_state == TCP_ESTABLISHED) && (TimeDiff(m_lastrecv + IDLE_TIMEOUT, now) <= 0)) {
350     closedown(ECONNABORTED);
351     return;
352   }
353 
354   // Check for ping timeout (to keep udp mapping open)
355   if ((m_state == TCP_ESTABLISHED) && (TimeDiff(m_lasttraffic + (m_bOutgoing ? IDLE_PING * 3/2 : IDLE_PING), now) <= 0)) {
356     packet(m_snd_nxt, 0, 0, 0);
357   }
358 #endif // PSEUDO_KEEPALIVE
359 }
360 
NotifyPacket(const char * buffer,size_t len)361 bool PseudoTcp::NotifyPacket(const char* buffer, size_t len) {
362   if (len > MAX_PACKET) {
363     LOG_F(WARNING) << "packet too large";
364     return false;
365   }
366   return parse(reinterpret_cast<const uint8 *>(buffer), uint32(len));
367 }
368 
GetNextClock(uint32 now,long & timeout)369 bool PseudoTcp::GetNextClock(uint32 now, long& timeout) {
370   return clock_check(now, timeout);
371 }
372 
GetOption(Option opt,int * value)373 void PseudoTcp::GetOption(Option opt, int* value) {
374   if (opt == OPT_NODELAY) {
375     *value = m_use_nagling ? 0 : 1;
376   } else if (opt == OPT_ACKDELAY) {
377     *value = m_ack_delay;
378   } else {
379     ASSERT(false);
380   }
381 }
382 
SetOption(Option opt,int value)383 void PseudoTcp::SetOption(Option opt, int value) {
384   if (opt == OPT_NODELAY) {
385     m_use_nagling = value == 0;
386   } else if (opt == OPT_ACKDELAY) {
387     m_ack_delay = value;
388   } else {
389     ASSERT(false);
390   }
391 }
392 
393 //
394 // IPStream Implementation
395 //
396 
Recv(char * buffer,size_t len)397 int PseudoTcp::Recv(char* buffer, size_t len) {
398   if (m_state != TCP_ESTABLISHED) {
399     m_error = ENOTCONN;
400     return SOCKET_ERROR;
401   }
402 
403   if (m_rlen == 0) {
404     m_bReadEnable = true;
405     m_error = EWOULDBLOCK;
406     return SOCKET_ERROR;
407   }
408 
409   uint32 read = talk_base::_min(uint32(len), m_rlen);
410   memcpy(buffer, m_rbuf, read);
411   m_rlen -= read;
412 
413   // !?! until we create a circular buffer, we need to move all of the rest of the buffer up!
414   memmove(m_rbuf, m_rbuf + read, sizeof(m_rbuf) - read/*m_rlen*/);
415 
416   if ((sizeof(m_rbuf) - m_rlen - m_rcv_wnd)
417       >= talk_base::_min<uint32>(sizeof(m_rbuf) / 2, m_mss)) {
418     bool bWasClosed = (m_rcv_wnd == 0); // !?! Not sure about this was closed business
419 
420     m_rcv_wnd = sizeof(m_rbuf) - m_rlen;
421 
422     if (bWasClosed) {
423       attemptSend(sfImmediateAck);
424     }
425   }
426 
427   return read;
428 }
429 
Send(const char * buffer,size_t len)430 int PseudoTcp::Send(const char* buffer, size_t len) {
431   if (m_state != TCP_ESTABLISHED) {
432     m_error = ENOTCONN;
433     return SOCKET_ERROR;
434   }
435 
436   if (m_slen == sizeof(m_sbuf)) {
437     m_bWriteEnable = true;
438     m_error = EWOULDBLOCK;
439     return SOCKET_ERROR;
440   }
441 
442   int written = queue(buffer, uint32(len), false);
443   attemptSend();
444   return written;
445 }
446 
Close(bool force)447 void PseudoTcp::Close(bool force) {
448   LOG_F(LS_VERBOSE) << "(" << (force ? "true" : "false") << ")";
449   m_shutdown = force ? SD_FORCEFUL : SD_GRACEFUL;
450 }
451 
GetError()452 int PseudoTcp::GetError() {
453   return m_error;
454 }
455 
456 //
457 // Internal Implementation
458 //
459 
queue(const char * data,uint32 len,bool bCtrl)460 uint32 PseudoTcp::queue(const char* data, uint32 len, bool bCtrl) {
461   if (len > sizeof(m_sbuf) - m_slen) {
462     ASSERT(!bCtrl);
463     len = sizeof(m_sbuf) - m_slen;
464   }
465 
466   // We can concatenate data if the last segment is the same type
467   // (control v. regular data), and has not been transmitted yet
468   if (!m_slist.empty() && (m_slist.back().bCtrl == bCtrl) && (m_slist.back().xmit == 0)) {
469     m_slist.back().len += len;
470   } else {
471     SSegment sseg(m_snd_una + m_slen, len, bCtrl);
472     m_slist.push_back(sseg);
473   }
474 
475   memcpy(m_sbuf + m_slen, data, len);
476   m_slen += len;
477   //LOG(LS_INFO) << "PseudoTcp::queue - m_slen = " << m_slen;
478   return len;
479 }
480 
packet(uint32 seq,uint8 flags,const char * data,uint32 len)481 IPseudoTcpNotify::WriteResult PseudoTcp::packet(uint32 seq, uint8 flags,
482                                                 const char* data, uint32 len) {
483   ASSERT(HEADER_SIZE + len <= MAX_PACKET);
484 
485   uint32 now = Now();
486 
487   uint8 buffer[MAX_PACKET];
488   long_to_bytes(m_conv, buffer);
489   long_to_bytes(seq, buffer + 4);
490   long_to_bytes(m_rcv_nxt, buffer + 8);
491   buffer[12] = 0;
492   buffer[13] = flags;
493   short_to_bytes(uint16(m_rcv_wnd), buffer + 14);
494 
495   // Timestamp computations
496   long_to_bytes(now, buffer + 16);
497   long_to_bytes(m_ts_recent, buffer + 20);
498   m_ts_lastack = m_rcv_nxt;
499 
500   memcpy(buffer + HEADER_SIZE, data, len);
501 
502 #if _DEBUGMSG >= _DBG_VERBOSE
503   LOG(LS_INFO) << "<-- <CONV=" << m_conv
504                << "><FLG=" << static_cast<unsigned>(flags)
505                << "><SEQ=" << seq << ":" << seq + len
506                << "><ACK=" << m_rcv_nxt
507                << "><WND=" << m_rcv_wnd
508                << "><TS="  << (now % 10000)
509                << "><TSR=" << (m_ts_recent % 10000)
510                << "><LEN=" << len << ">";
511 #endif // _DEBUGMSG
512 
513   IPseudoTcpNotify::WriteResult wres = m_notify->TcpWritePacket(this, reinterpret_cast<char *>(buffer), len + HEADER_SIZE);
514   // Note: When data is NULL, this is an ACK packet.  We don't read the return value for those,
515   // and thus we won't retry.  So go ahead and treat the packet as a success (basically simulate
516   // as if it were dropped), which will prevent our timers from being messed up.
517   if ((wres != IPseudoTcpNotify::WR_SUCCESS) && (NULL != data))
518     return wres;
519 
520   m_t_ack = 0;
521   if (len > 0) {
522     m_lastsend = now;
523   }
524   m_lasttraffic = now;
525   m_bOutgoing = true;
526 
527   return IPseudoTcpNotify::WR_SUCCESS;
528 }
529 
parse(const uint8 * buffer,uint32 size)530 bool PseudoTcp::parse(const uint8* buffer, uint32 size) {
531   if (size < 12)
532     return false;
533 
534   Segment seg;
535   seg.conv = bytes_to_long(buffer);
536   seg.seq = bytes_to_long(buffer + 4);
537   seg.ack = bytes_to_long(buffer + 8);
538   seg.flags = buffer[13];
539   seg.wnd = bytes_to_short(buffer + 14);
540 
541   seg.tsval = bytes_to_long(buffer + 16);
542   seg.tsecr = bytes_to_long(buffer + 20);
543 
544   seg.data = reinterpret_cast<const char *>(buffer) + HEADER_SIZE;
545   seg.len = size - HEADER_SIZE;
546 
547 #if _DEBUGMSG >= _DBG_VERBOSE
548   LOG(LS_INFO) << "--> <CONV=" << seg.conv
549                << "><FLG=" << static_cast<unsigned>(seg.flags)
550                << "><SEQ=" << seg.seq << ":" << seg.seq + seg.len
551                << "><ACK=" << seg.ack
552                << "><WND=" << seg.wnd
553                << "><TS="  << (seg.tsval % 10000)
554                << "><TSR=" << (seg.tsecr % 10000)
555                << "><LEN=" << seg.len << ">";
556 #endif // _DEBUGMSG
557 
558   return process(seg);
559 }
560 
clock_check(uint32 now,long & nTimeout)561 bool PseudoTcp::clock_check(uint32 now, long& nTimeout) {
562   if (m_shutdown == SD_FORCEFUL)
563     return false;
564 
565   if ((m_shutdown == SD_GRACEFUL)
566       && ((m_state != TCP_ESTABLISHED)
567           || ((m_slen == 0) && (m_t_ack == 0)))) {
568     return false;
569   }
570 
571   if (m_state == TCP_CLOSED) {
572     nTimeout = CLOSED_TIMEOUT;
573     return true;
574   }
575 
576   nTimeout = DEFAULT_TIMEOUT;
577 
578   if (m_t_ack) {
579     nTimeout = talk_base::_min<int32>(nTimeout,
580       talk_base::TimeDiff(m_t_ack + m_ack_delay, now));
581   }
582   if (m_rto_base) {
583     nTimeout = talk_base::_min<int32>(nTimeout,
584       talk_base::TimeDiff(m_rto_base + m_rx_rto, now));
585   }
586   if (m_snd_wnd == 0) {
587     nTimeout = talk_base::_min<int32>(nTimeout, talk_base::TimeDiff(m_lastsend + m_rx_rto, now));
588   }
589 #if PSEUDO_KEEPALIVE
590   if (m_state == TCP_ESTABLISHED) {
591     nTimeout = talk_base::_min<int32>(nTimeout,
592       talk_base::TimeDiff(m_lasttraffic + (m_bOutgoing ? IDLE_PING * 3/2 : IDLE_PING), now));
593   }
594 #endif // PSEUDO_KEEPALIVE
595   return true;
596 }
597 
process(Segment & seg)598 bool PseudoTcp::process(Segment& seg) {
599   // If this is the wrong conversation, send a reset!?! (with the correct conversation?)
600   if (seg.conv != m_conv) {
601     //if ((seg.flags & FLAG_RST) == 0) {
602     //  packet(tcb, seg.ack, 0, FLAG_RST, 0, 0);
603     //}
604     LOG_F(LS_ERROR) << "wrong conversation";
605     return false;
606   }
607 
608   uint32 now = Now();
609   m_lasttraffic = m_lastrecv = now;
610   m_bOutgoing = false;
611 
612   if (m_state == TCP_CLOSED) {
613     // !?! send reset?
614     LOG_F(LS_ERROR) << "closed";
615     return false;
616   }
617 
618   // Check if this is a reset segment
619   if (seg.flags & FLAG_RST) {
620     closedown(ECONNRESET);
621     return false;
622   }
623 
624   // Check for control data
625   bool bConnect = false;
626   if (seg.flags & FLAG_CTL) {
627     if (seg.len == 0) {
628       LOG_F(LS_ERROR) << "Missing control code";
629       return false;
630     } else if (seg.data[0] == CTL_CONNECT) {
631       bConnect = true;
632       if (m_state == TCP_LISTEN) {
633         m_state = TCP_SYN_RECEIVED;
634         LOG(LS_INFO) << "State: TCP_SYN_RECEIVED";
635         //m_notify->associate(addr);
636         char buffer[1];
637         buffer[0] = CTL_CONNECT;
638         queue(buffer, 1, true);
639       } else if (m_state == TCP_SYN_SENT) {
640         m_state = TCP_ESTABLISHED;
641         LOG(LS_INFO) << "State: TCP_ESTABLISHED";
642         adjustMTU();
643         if (m_notify) {
644           m_notify->OnTcpOpen(this);
645         }
646         //notify(evOpen);
647       }
648     } else {
649       LOG_F(LS_WARNING) << "Unknown control code: " << seg.data[0];
650       return false;
651     }
652   }
653 
654   // Update timestamp
655   if ((seg.seq <= m_ts_lastack) && (m_ts_lastack < seg.seq + seg.len)) {
656     m_ts_recent = seg.tsval;
657   }
658 
659   // Check if this is a valuable ack
660   if ((seg.ack > m_snd_una) && (seg.ack <= m_snd_nxt)) {
661     // Calculate round-trip time
662     if (seg.tsecr) {
663       long rtt = talk_base::TimeDiff(now, seg.tsecr);
664       if (rtt >= 0) {
665         if (m_rx_srtt == 0) {
666           m_rx_srtt = rtt;
667           m_rx_rttvar = rtt / 2;
668         } else {
669           m_rx_rttvar = (3 * m_rx_rttvar + abs(long(rtt - m_rx_srtt))) / 4;
670           m_rx_srtt = (7 * m_rx_srtt + rtt) / 8;
671         }
672         m_rx_rto = bound(MIN_RTO, m_rx_srtt +
673             talk_base::_max<uint32>(1, 4 * m_rx_rttvar), MAX_RTO);
674 #if _DEBUGMSG >= _DBG_VERBOSE
675         LOG(LS_INFO) << "rtt: " << rtt
676                      << "  srtt: " << m_rx_srtt
677                      << "  rto: " << m_rx_rto;
678 #endif // _DEBUGMSG
679       } else {
680         ASSERT(false);
681       }
682     }
683 
684     m_snd_wnd = seg.wnd;
685 
686     uint32 nAcked = seg.ack - m_snd_una;
687     m_snd_una = seg.ack;
688 
689     m_rto_base = (m_snd_una == m_snd_nxt) ? 0 : now;
690 
691     m_slen -= nAcked;
692     memmove(m_sbuf, m_sbuf + nAcked, m_slen);
693     //LOG(LS_INFO) << "PseudoTcp::process - m_slen = " << m_slen;
694 
695     for (uint32 nFree = nAcked; nFree > 0; ) {
696       ASSERT(!m_slist.empty());
697       if (nFree < m_slist.front().len) {
698         m_slist.front().len -= nFree;
699         nFree = 0;
700       } else {
701         if (m_slist.front().len > m_largest) {
702           m_largest = m_slist.front().len;
703         }
704         nFree -= m_slist.front().len;
705         m_slist.pop_front();
706       }
707     }
708 
709     if (m_dup_acks >= 3) {
710       if (m_snd_una >= m_recover) { // NewReno
711         uint32 nInFlight = m_snd_nxt - m_snd_una;
712         m_cwnd = talk_base::_min(m_ssthresh, nInFlight + m_mss); // (Fast Retransmit)
713 #if _DEBUGMSG >= _DBG_NORMAL
714         LOG(LS_INFO) << "exit recovery";
715 #endif // _DEBUGMSG
716         m_dup_acks = 0;
717       } else {
718 #if _DEBUGMSG >= _DBG_NORMAL
719         LOG(LS_INFO) << "recovery retransmit";
720 #endif // _DEBUGMSG
721         if (!transmit(m_slist.begin(), now)) {
722           closedown(ECONNABORTED);
723           return false;
724         }
725         m_cwnd += m_mss - talk_base::_min(nAcked, m_cwnd);
726       }
727     } else {
728       m_dup_acks = 0;
729       // Slow start, congestion avoidance
730       if (m_cwnd < m_ssthresh) {
731         m_cwnd += m_mss;
732       } else {
733         m_cwnd += talk_base::_max<uint32>(1, m_mss * m_mss / m_cwnd);
734       }
735     }
736 
737     // !?! A bit hacky
738     if ((m_state == TCP_SYN_RECEIVED) && !bConnect) {
739       m_state = TCP_ESTABLISHED;
740       LOG(LS_INFO) << "State: TCP_ESTABLISHED";
741       adjustMTU();
742       if (m_notify) {
743         m_notify->OnTcpOpen(this);
744       }
745       //notify(evOpen);
746     }
747 
748     // If we make room in the send queue, notify the user
749     // The goal it to make sure we always have at least enough data to fill the
750     // window.  We'd like to notify the app when we are halfway to that point.
751     const uint32 kIdealRefillSize = (sizeof(m_sbuf) + sizeof(m_rbuf)) / 2;
752     if (m_bWriteEnable && (m_slen < kIdealRefillSize)) {
753       m_bWriteEnable = false;
754       if (m_notify) {
755         m_notify->OnTcpWriteable(this);
756       }
757       //notify(evWrite);
758     }
759   } else if (seg.ack == m_snd_una) {
760     // !?! Note, tcp says don't do this... but otherwise how does a closed window become open?
761     m_snd_wnd = seg.wnd;
762 
763     // Check duplicate acks
764     if (seg.len > 0) {
765       // it's a dup ack, but with a data payload, so don't modify m_dup_acks
766     } else if (m_snd_una != m_snd_nxt) {
767       m_dup_acks += 1;
768       if (m_dup_acks == 3) { // (Fast Retransmit)
769 #if _DEBUGMSG >= _DBG_NORMAL
770         LOG(LS_INFO) << "enter recovery";
771         LOG(LS_INFO) << "recovery retransmit";
772 #endif // _DEBUGMSG
773         if (!transmit(m_slist.begin(), now)) {
774           closedown(ECONNABORTED);
775           return false;
776         }
777         m_recover = m_snd_nxt;
778         uint32 nInFlight = m_snd_nxt - m_snd_una;
779         m_ssthresh = talk_base::_max(nInFlight / 2, 2 * m_mss);
780         //LOG(LS_INFO) << "m_ssthresh: " << m_ssthresh << "  nInFlight: " << nInFlight << "  m_mss: " << m_mss;
781         m_cwnd = m_ssthresh + 3 * m_mss;
782       } else if (m_dup_acks > 3) {
783         m_cwnd += m_mss;
784       }
785     } else {
786       m_dup_acks = 0;
787     }
788   }
789 
790   // Conditions were acks must be sent:
791   // 1) Segment is too old (they missed an ACK) (immediately)
792   // 2) Segment is too new (we missed a segment) (immediately)
793   // 3) Segment has data (so we need to ACK!) (delayed)
794   // ... so the only time we don't need to ACK, is an empty segment that points to rcv_nxt!
795 
796   SendFlags sflags = sfNone;
797   if (seg.seq != m_rcv_nxt) {
798     sflags = sfImmediateAck; // (Fast Recovery)
799   } else if (seg.len != 0) {
800     if (m_ack_delay == 0) {
801       sflags = sfImmediateAck;
802     } else {
803       sflags = sfDelayedAck;
804     }
805   }
806 #if _DEBUGMSG >= _DBG_NORMAL
807   if (sflags == sfImmediateAck) {
808     if (seg.seq > m_rcv_nxt) {
809       LOG_F(LS_INFO) << "too new";
810     } else if (seg.seq + seg.len <= m_rcv_nxt) {
811       LOG_F(LS_INFO) << "too old";
812     }
813   }
814 #endif // _DEBUGMSG
815 
816   // Adjust the incoming segment to fit our receive buffer
817   if (seg.seq < m_rcv_nxt) {
818     uint32 nAdjust = m_rcv_nxt - seg.seq;
819     if (nAdjust < seg.len) {
820       seg.seq += nAdjust;
821       seg.data += nAdjust;
822       seg.len -= nAdjust;
823     } else {
824       seg.len = 0;
825     }
826   }
827   if ((seg.seq + seg.len - m_rcv_nxt) > (sizeof(m_rbuf) - m_rlen)) {
828     uint32 nAdjust = seg.seq + seg.len - m_rcv_nxt - (sizeof(m_rbuf) - m_rlen);
829     if (nAdjust < seg.len) {
830       seg.len -= nAdjust;
831     } else {
832       seg.len = 0;
833     }
834   }
835 
836   bool bIgnoreData = (seg.flags & FLAG_CTL) || (m_shutdown != SD_NONE);
837   bool bNewData = false;
838 
839   if (seg.len > 0) {
840     if (bIgnoreData) {
841       if (seg.seq == m_rcv_nxt) {
842         m_rcv_nxt += seg.len;
843       }
844     } else {
845       uint32 nOffset = seg.seq - m_rcv_nxt;
846       memcpy(m_rbuf + m_rlen + nOffset, seg.data, seg.len);
847       if (seg.seq == m_rcv_nxt) {
848         m_rlen += seg.len;
849         m_rcv_nxt += seg.len;
850         m_rcv_wnd -= seg.len;
851         bNewData = true;
852 
853         RList::iterator it = m_rlist.begin();
854         while ((it != m_rlist.end()) && (it->seq <= m_rcv_nxt)) {
855           if (it->seq + it->len > m_rcv_nxt) {
856             sflags = sfImmediateAck; // (Fast Recovery)
857             uint32 nAdjust = (it->seq + it->len) - m_rcv_nxt;
858 #if _DEBUGMSG >= _DBG_NORMAL
859             LOG(LS_INFO) << "Recovered " << nAdjust << " bytes (" << m_rcv_nxt << " -> " << m_rcv_nxt + nAdjust << ")";
860 #endif // _DEBUGMSG
861             m_rlen += nAdjust;
862             m_rcv_nxt += nAdjust;
863             m_rcv_wnd -= nAdjust;
864           }
865           it = m_rlist.erase(it);
866         }
867       } else {
868 #if _DEBUGMSG >= _DBG_NORMAL
869         LOG(LS_INFO) << "Saving " << seg.len << " bytes (" << seg.seq << " -> " << seg.seq + seg.len << ")";
870 #endif // _DEBUGMSG
871         RSegment rseg;
872         rseg.seq = seg.seq;
873         rseg.len = seg.len;
874         RList::iterator it = m_rlist.begin();
875         while ((it != m_rlist.end()) && (it->seq < rseg.seq)) {
876           ++it;
877         }
878         m_rlist.insert(it, rseg);
879       }
880     }
881   }
882 
883   attemptSend(sflags);
884 
885   // If we have new data, notify the user
886   if (bNewData && m_bReadEnable) {
887     m_bReadEnable = false;
888     if (m_notify) {
889       m_notify->OnTcpReadable(this);
890     }
891     //notify(evRead);
892   }
893 
894   return true;
895 }
896 
transmit(const SList::iterator & seg,uint32 now)897 bool PseudoTcp::transmit(const SList::iterator& seg, uint32 now) {
898   if (seg->xmit >= ((m_state == TCP_ESTABLISHED) ? 15 : 30)) {
899     LOG_F(LS_VERBOSE) << "too many retransmits";
900     return false;
901   }
902 
903   uint32 nTransmit = talk_base::_min(seg->len, m_mss);
904 
905   while (true) {
906     uint32 seq = seg->seq;
907     uint8 flags = (seg->bCtrl ? FLAG_CTL : 0);
908     const char* buffer = m_sbuf + (seg->seq - m_snd_una);
909     IPseudoTcpNotify::WriteResult wres = this->packet(seq, flags, buffer, nTransmit);
910 
911     if (wres == IPseudoTcpNotify::WR_SUCCESS)
912       break;
913 
914     if (wres == IPseudoTcpNotify::WR_FAIL) {
915       LOG_F(LS_VERBOSE) << "packet failed";
916       return false;
917     }
918 
919     ASSERT(wres == IPseudoTcpNotify::WR_TOO_LARGE);
920 
921     while (true) {
922       if (PACKET_MAXIMUMS[m_msslevel + 1] == 0) {
923         LOG_F(LS_VERBOSE) << "MTU too small";
924         return false;
925       }
926       // !?! We need to break up all outstanding and pending packets and then retransmit!?!
927 
928       m_mss = PACKET_MAXIMUMS[++m_msslevel] - PACKET_OVERHEAD;
929       m_cwnd = 2 * m_mss; // I added this... haven't researched actual formula
930       if (m_mss < nTransmit) {
931         nTransmit = m_mss;
932         break;
933       }
934     }
935 #if _DEBUGMSG >= _DBG_NORMAL
936     LOG(LS_INFO) << "Adjusting mss to " << m_mss << " bytes";
937 #endif // _DEBUGMSG
938   }
939 
940   if (nTransmit < seg->len) {
941     LOG_F(LS_VERBOSE) << "mss reduced to " << m_mss;
942 
943     SSegment subseg(seg->seq + nTransmit, seg->len - nTransmit, seg->bCtrl);
944     //subseg.tstamp = seg->tstamp;
945     subseg.xmit = seg->xmit;
946     seg->len = nTransmit;
947 
948     SList::iterator next = seg;
949     m_slist.insert(++next, subseg);
950   }
951 
952   if (seg->xmit == 0) {
953     m_snd_nxt += seg->len;
954   }
955   seg->xmit += 1;
956   //seg->tstamp = now;
957   if (m_rto_base == 0) {
958     m_rto_base = now;
959   }
960 
961   return true;
962 }
963 
attemptSend(SendFlags sflags)964 void PseudoTcp::attemptSend(SendFlags sflags) {
965   uint32 now = Now();
966 
967   if (talk_base::TimeDiff(now, m_lastsend) > static_cast<long>(m_rx_rto)) {
968     m_cwnd = m_mss;
969   }
970 
971 #if _DEBUGMSG
972   bool bFirst = true;
973   UNUSED(bFirst);
974 #endif // _DEBUGMSG
975 
976   while (true) {
977     uint32 cwnd = m_cwnd;
978     if ((m_dup_acks == 1) || (m_dup_acks == 2)) { // Limited Transmit
979       cwnd += m_dup_acks * m_mss;
980     }
981     uint32 nWindow = talk_base::_min(m_snd_wnd, cwnd);
982     uint32 nInFlight = m_snd_nxt - m_snd_una;
983     uint32 nUseable = (nInFlight < nWindow) ? (nWindow - nInFlight) : 0;
984 
985     uint32 nAvailable = talk_base::_min(m_slen - nInFlight, m_mss);
986 
987     if (nAvailable > nUseable) {
988       if (nUseable * 4 < nWindow) {
989         // RFC 813 - avoid SWS
990         nAvailable = 0;
991       } else {
992         nAvailable = nUseable;
993       }
994     }
995 
996 #if _DEBUGMSG >= _DBG_VERBOSE
997     if (bFirst) {
998       bFirst = false;
999       LOG(LS_INFO) << "[cwnd: " << m_cwnd
1000                    << "  nWindow: " << nWindow
1001                    << "  nInFlight: " << nInFlight
1002                    << "  nAvailable: " << nAvailable
1003                    << "  nQueued: " << m_slen - nInFlight
1004                    << "  nEmpty: " << sizeof(m_sbuf) - m_slen
1005                    << "  ssthresh: " << m_ssthresh << "]";
1006     }
1007 #endif // _DEBUGMSG
1008 
1009     if (nAvailable == 0) {
1010       if (sflags == sfNone)
1011         return;
1012 
1013       // If this is an immediate ack, or the second delayed ack
1014       if ((sflags == sfImmediateAck) || m_t_ack) {
1015         packet(m_snd_nxt, 0, 0, 0);
1016       } else {
1017         m_t_ack = Now();
1018       }
1019       return;
1020     }
1021 
1022     // Nagle's algorithm.
1023     // If there is data already in-flight, and we haven't a full segment of
1024     // data ready to send then hold off until we get more to send, or the
1025     // in-flight data is acknowledged.
1026     if (m_use_nagling && (m_snd_nxt > m_snd_una) && (nAvailable < m_mss))  {
1027       return;
1028     }
1029 
1030     // Find the next segment to transmit
1031     SList::iterator it = m_slist.begin();
1032     while (it->xmit > 0) {
1033       ++it;
1034       ASSERT(it != m_slist.end());
1035     }
1036     SList::iterator seg = it;
1037 
1038     // If the segment is too large, break it into two
1039     if (seg->len > nAvailable) {
1040       SSegment subseg(seg->seq + nAvailable, seg->len - nAvailable, seg->bCtrl);
1041       seg->len = nAvailable;
1042       m_slist.insert(++it, subseg);
1043     }
1044 
1045     if (!transmit(seg, now)) {
1046       LOG_F(LS_VERBOSE) << "transmit failed";
1047       // TODO: consider closing socket
1048       return;
1049     }
1050 
1051     sflags = sfNone;
1052   }
1053 }
1054 
1055 void
closedown(uint32 err)1056 PseudoTcp::closedown(uint32 err) {
1057   m_slen = 0;
1058 
1059   LOG(LS_INFO) << "State: TCP_CLOSED";
1060   m_state = TCP_CLOSED;
1061   if (m_notify) {
1062     m_notify->OnTcpClosed(this, err);
1063   }
1064   //notify(evClose, err);
1065 }
1066 
1067 void
adjustMTU()1068 PseudoTcp::adjustMTU() {
1069   // Determine our current mss level, so that we can adjust appropriately later
1070   for (m_msslevel = 0; PACKET_MAXIMUMS[m_msslevel + 1] > 0; ++m_msslevel) {
1071     if (static_cast<uint16>(PACKET_MAXIMUMS[m_msslevel]) <= m_mtu_advise) {
1072       break;
1073     }
1074   }
1075   m_mss = m_mtu_advise - PACKET_OVERHEAD;
1076   // !?! Should we reset m_largest here?
1077 #if _DEBUGMSG >= _DBG_NORMAL
1078   LOG(LS_INFO) << "Adjusting mss to " << m_mss << " bytes";
1079 #endif // _DEBUGMSG
1080   // Enforce minimums on ssthresh and cwnd
1081   m_ssthresh = talk_base::_max(m_ssthresh, 2 * m_mss);
1082   m_cwnd = talk_base::_max(m_cwnd, m_mss);
1083 }
1084 
1085 }  // namespace cricket
1086