1 /**
2 * @file
3 * Transmission Control Protocol, incoming traffic
4 *
5 * The input processing functions of the TCP layer.
6 *
7 * These functions are generally called in the order (ip_input() ->)
8 * tcp_input() -> * tcp_process() -> tcp_receive() (-> application).
9 *
10 */
11
12 /*
13 * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
14 * All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without modification,
17 * are permitted provided that the following conditions are met:
18 *
19 * 1. Redistributions of source code must retain the above copyright notice,
20 * this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright notice,
22 * this list of conditions and the following disclaimer in the documentation
23 * and/or other materials provided with the distribution.
24 * 3. The name of the author may not be used to endorse or promote products
25 * derived from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
28 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
30 * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
32 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
35 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
36 * OF SUCH DAMAGE.
37 *
38 * This file is part of the lwIP TCP/IP stack.
39 *
40 * Author: Adam Dunkels <adam@sics.se>
41 *
42 */
43
44 #include "lwip/opt.h"
45
46 #if LWIP_TCP /* don't build if not configured for use in lwipopts.h */
47
48 #include "lwip/priv/tcp_priv.h"
49 #if LWIP_SACK
50 #include "lwip/tcp_sack.h"
51 #endif
52 #include "lwip/def.h"
53 #include "lwip/ip_addr.h"
54 #include "lwip/netif.h"
55 #include "lwip/mem.h"
56 #include "lwip/memp.h"
57 #include "lwip/sys.h"
58 #include "lwip/inet_chksum.h"
59 #include "lwip/stats.h"
60 #include "lwip/ip6.h"
61 #include "lwip/ip6_addr.h"
62 #if LWIP_ND6_TCP_REACHABILITY_HINTS
63 #include "lwip/nd6.h"
64 #endif /* LWIP_ND6_TCP_REACHABILITY_HINTS */
65
66 #include <string.h>
67
68 #ifdef LWIP_HOOK_FILENAME
69 #include LWIP_HOOK_FILENAME
70 #endif
71
72 /* These variables are global to all functions involved in the input
73 processing of TCP segments. They are set by the tcp_input()
74 function. */
75 static struct tcp_seg inseg;
76 static struct tcp_hdr *tcphdr;
77 static u16_t tcphdr_optlen;
78 static u16_t tcphdr_opt1len;
79 static u8_t *tcphdr_opt2;
80
81 static u32_t seqno, ackno;
82 static tcpwnd_size_t recv_acked;
83 static u16_t tcplen;
84 static u8_t flags;
85
86 static u8_t recv_flags;
87 static struct pbuf *recv_data;
88
89 struct tcp_pcb *tcp_input_pcb;
90
91 /* Forward declarations. */
92 static err_t tcp_process(struct tcp_pcb *pcb);
93 static void tcp_receive(struct tcp_pcb *pcb);
94 static void tcp_parseopt(struct tcp_pcb *pcb);
95
96 static void tcp_listen_input(struct tcp_pcb_listen *pcb);
97 static err_t tcp_timewait_input(struct tcp_pcb *pcb);
98
99 static int tcp_input_delayed_close(struct tcp_pcb *pcb);
100
101 #if LWIP_SACK
102 void tcp_update_sack_for_received_ooseq_segs(struct tcp_pcb *pcb);
103
104 void tcp_parseopt_sack_permitted(struct tcp_pcb *pcb);
105
106 void tcp_pcb_reset_sack_seq(struct tcp_pcb *pcb);
107 #endif
108
109 #if LWIP_SACK && TCP_QUEUE_OOSEQ
update_ooseq_order_and_cnt(struct tcp_pcb * pcb,u32_t * order)110 static void update_ooseq_order_and_cnt(struct tcp_pcb *pcb, u32_t *order)
111 {
112 struct tcp_seg *next = NULL;
113 u32_t min_order = 0xffffffff;
114 if (pcb->ooseq == NULL) {
115 pcb->ooseq_cnt = 0;
116 }
117
118 *order = pcb->ooseq_cnt;
119 /* RollOver check */
120 if ((pcb->ooseq_cnt + 1) > pcb->ooseq_cnt) {
121 pcb->ooseq_cnt++;
122 } else {
123 /* Find the minimum ordered packet stored in ooseq queue */
124 for (next = pcb->ooseq; next != NULL; next = next->next) {
125 if (next->order < min_order) {
126 min_order = next->order;
127 }
128 }
129 /* Subtract the order of every packet with the minimum order */
130 for (next = pcb->ooseq; next != NULL; next = next->next) {
131 next->order = next->order - min_order;
132 }
133 /* Subtract the ooseq_ctn with the minimum order */
134 pcb->ooseq_cnt = pcb->ooseq_cnt - min_order;
135 pcb->ooseq_cnt++;
136 }
137 }
138 #endif
139
140 /**
141 * The initial input processing of TCP. It verifies the TCP header, demultiplexes
142 * the segment between the PCBs and passes it on to tcp_process(), which implements
143 * the TCP finite state machine. This function is called by the IP layer (in
144 * ip_input()).
145 *
146 * @param p received TCP segment to process (p->payload pointing to the TCP header)
147 * @param inp network interface on which this segment was received
148 */
149 void
tcp_input(struct pbuf * p,struct netif * inp)150 tcp_input(struct pbuf *p, struct netif *inp)
151 {
152 struct tcp_pcb *pcb, *prev;
153 struct tcp_pcb_listen *lpcb;
154 #if SO_REUSE
155 struct tcp_pcb *lpcb_prev = NULL;
156 struct tcp_pcb_listen *lpcb_any = NULL;
157 #endif /* SO_REUSE */
158 u8_t hdrlen_bytes;
159 err_t err;
160
161 LWIP_UNUSED_ARG(inp);
162 LWIP_ASSERT_CORE_LOCKED();
163 LWIP_ASSERT("tcp_input: invalid pbuf", p != NULL);
164
165 PERF_START;
166
167 TCP_STATS_INC(tcp.recv);
168 MIB2_STATS_INC(mib2.tcpinsegs);
169
170 tcphdr = (struct tcp_hdr *)p->payload;
171
172 #if TCP_INPUT_DEBUG
173 tcp_debug_print(tcphdr);
174 #endif
175
176 /* Check that TCP header fits in payload */
177 if (p->len < TCP_HLEN) {
178 /* drop short packets */
179 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: short packet (%"U16_F" bytes) discarded\n", p->tot_len));
180 TCP_STATS_INC(tcp.lenerr);
181 goto dropped;
182 }
183
184 /* Don't even process incoming broadcasts/multicasts. , suspected TCP LAND attacks */
185 if (ip_addr_isbroadcast(ip_current_dest_addr(), ip_current_netif()) ||
186 ip_addr_ismulticast(ip_current_dest_addr()) ||
187 (tcphdr->src == tcphdr->dest && ip_addr_cmp(ip_current_dest_addr(), ip_current_src_addr()))) {
188 TCP_STATS_INC(tcp.proterr);
189 goto dropped;
190 }
191
192 #if CHECKSUM_CHECK_TCP
193 IF__NETIF_CHECKSUM_ENABLED(inp, NETIF_CHECKSUM_CHECK_TCP) {
194 /* Verify TCP checksum. */
195 u16_t chksum = ip_chksum_pseudo(p, IP_PROTO_TCP, p->tot_len,
196 ip_current_src_addr(), ip_current_dest_addr());
197 if (chksum != 0) {
198 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packet discarded due to failing checksum 0x%04"X16_F"\n",
199 chksum));
200 tcp_debug_print(tcphdr);
201 TCP_STATS_INC(tcp.chkerr);
202 goto dropped;
203 }
204 }
205 #endif /* CHECKSUM_CHECK_TCP */
206
207 /* sanity-check header length */
208 hdrlen_bytes = TCPH_HDRLEN_BYTES(tcphdr);
209 if ((hdrlen_bytes < TCP_HLEN) || (hdrlen_bytes > p->tot_len)) {
210 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: invalid header length (%"U16_F")\n", (u16_t)hdrlen_bytes));
211 TCP_STATS_INC(tcp.lenerr);
212 goto dropped;
213 }
214
215 /* Move the payload pointer in the pbuf so that it points to the
216 TCP data instead of the TCP header. */
217 tcphdr_optlen = (u16_t)(hdrlen_bytes - TCP_HLEN);
218 tcphdr_opt2 = NULL;
219 if (p->len >= hdrlen_bytes) {
220 /* all options are in the first pbuf */
221 tcphdr_opt1len = tcphdr_optlen;
222 pbuf_remove_header(p, hdrlen_bytes); /* cannot fail */
223 } else {
224 u16_t opt2len;
225 /* TCP header fits into first pbuf, options don't - data is in the next pbuf */
226 /* there must be a next pbuf, due to hdrlen_bytes sanity check above */
227 LWIP_ASSERT("p->next != NULL", p->next != NULL);
228
229 /* advance over the TCP header (cannot fail) */
230 pbuf_remove_header(p, TCP_HLEN);
231
232 /* determine how long the first and second parts of the options are */
233 tcphdr_opt1len = p->len;
234 opt2len = (u16_t)(tcphdr_optlen - tcphdr_opt1len);
235
236 /* options continue in the next pbuf: set p to zero length and hide the
237 options in the next pbuf (adjusting p->tot_len) */
238 pbuf_remove_header(p, tcphdr_opt1len);
239
240 /* check that the options fit in the second pbuf */
241 if (opt2len > p->next->len) {
242 /* drop short packets */
243 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: options overflow second pbuf (%"U16_F" bytes)\n", p->next->len));
244 TCP_STATS_INC(tcp.lenerr);
245 goto dropped;
246 }
247
248 /* remember the pointer to the second part of the options */
249 tcphdr_opt2 = (u8_t *)p->next->payload;
250
251 /* advance p->next to point after the options, and manually
252 adjust p->tot_len to keep it consistent with the changed p->next */
253 pbuf_remove_header(p->next, opt2len);
254 p->tot_len = (u16_t)(p->tot_len - opt2len);
255
256 LWIP_ASSERT("p->len == 0", p->len == 0);
257 LWIP_ASSERT("p->tot_len == p->next->tot_len", p->tot_len == p->next->tot_len);
258 }
259
260 /* Convert fields in TCP header to host byte order. */
261 tcphdr->src = lwip_ntohs(tcphdr->src);
262 tcphdr->dest = lwip_ntohs(tcphdr->dest);
263 seqno = tcphdr->seqno = lwip_ntohl(tcphdr->seqno);
264 ackno = tcphdr->ackno = lwip_ntohl(tcphdr->ackno);
265 tcphdr->wnd = lwip_ntohs(tcphdr->wnd);
266
267 flags = TCPH_FLAGS(tcphdr);
268 tcplen = p->tot_len;
269 if (flags & (TCP_FIN | TCP_SYN)) {
270 tcplen++;
271 if (tcplen < p->tot_len) {
272 /* u16_t overflow, cannot handle this */
273 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: length u16_t overflow, cannot handle this\n"));
274 TCP_STATS_INC(tcp.lenerr);
275 goto dropped;
276 }
277 }
278
279 /* Demultiplex an incoming segment. First, we check if it is destined
280 for an active connection. */
281 prev = NULL;
282
283 for (pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) {
284 LWIP_ASSERT("tcp_input: active pcb->state != CLOSED", pcb->state != CLOSED);
285 LWIP_ASSERT("tcp_input: active pcb->state != TIME-WAIT", pcb->state != TIME_WAIT);
286 LWIP_ASSERT("tcp_input: active pcb->state != LISTEN", pcb->state != LISTEN);
287
288 /* check if PCB is bound to specific netif */
289 if ((pcb->netif_idx != NETIF_NO_INDEX) &&
290 (pcb->netif_idx != netif_get_index(ip_data.current_input_netif))) {
291 prev = pcb;
292 continue;
293 }
294
295 if (pcb->remote_port == tcphdr->src &&
296 pcb->local_port == tcphdr->dest &&
297 ip_addr_cmp(&pcb->remote_ip, ip_current_src_addr()) &&
298 ip_addr_cmp(&pcb->local_ip, ip_current_dest_addr())) {
299 /* Move this PCB to the front of the list so that subsequent
300 lookups will be faster (we exploit locality in TCP segment
301 arrivals). */
302 LWIP_ASSERT("tcp_input: pcb->next != pcb (before cache)", pcb->next != pcb);
303 if (prev != NULL) {
304 prev->next = pcb->next;
305 pcb->next = tcp_active_pcbs;
306 tcp_active_pcbs = pcb;
307 } else {
308 TCP_STATS_INC(tcp.cachehit);
309 }
310 LWIP_ASSERT("tcp_input: pcb->next != pcb (after cache)", pcb->next != pcb);
311 break;
312 }
313 prev = pcb;
314 }
315
316 if (pcb == NULL) {
317 /* If it did not go to an active connection, we check the connections
318 in the TIME-WAIT state. */
319 for (pcb = tcp_tw_pcbs; pcb != NULL; pcb = pcb->next) {
320 LWIP_ASSERT("tcp_input: TIME-WAIT pcb->state == TIME-WAIT", pcb->state == TIME_WAIT);
321
322 /* check if PCB is bound to specific netif */
323 if ((pcb->netif_idx != NETIF_NO_INDEX) &&
324 (pcb->netif_idx != netif_get_index(ip_data.current_input_netif))) {
325 continue;
326 }
327
328 if (pcb->remote_port == tcphdr->src &&
329 pcb->local_port == tcphdr->dest &&
330 ip_addr_cmp(&pcb->remote_ip, ip_current_src_addr()) &&
331 ip_addr_cmp(&pcb->local_ip, ip_current_dest_addr())) {
332 /* We don't really care enough to move this PCB to the front
333 of the list since we are not very likely to receive that
334 many segments for connections in TIME-WAIT. */
335 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packed for TIME_WAITing connection.\n"));
336 #ifdef LWIP_HOOK_TCP_INPACKET_PCB
337 if (LWIP_HOOK_TCP_INPACKET_PCB(pcb, tcphdr, tcphdr_optlen, tcphdr_opt1len,
338 tcphdr_opt2, p) == ERR_OK)
339 #endif
340 {
341 if (tcp_timewait_input(pcb) == ERR_ABRT) {
342 /* Peer has sent RST, so we inform the application that the
343 * connection is dead before deallocating the PCB. */
344 TCP_EVENT_ERR(pcb->state, pcb->errf, pcb->callback_arg, ERR_ABRT);
345 tcp_pcb_remove(&tcp_tw_pcbs, pcb);
346 tcp_free(pcb);
347 }
348 }
349 pbuf_free(p);
350 return;
351 }
352 }
353
354 /* Finally, if we still did not get a match, we check all PCBs that
355 are LISTENing for incoming connections. */
356 prev = NULL;
357 for (lpcb = tcp_listen_pcbs.listen_pcbs; lpcb != NULL; lpcb = lpcb->next) {
358 /* check if PCB is bound to specific netif */
359 if ((lpcb->netif_idx != NETIF_NO_INDEX) &&
360 (lpcb->netif_idx != netif_get_index(ip_data.current_input_netif))) {
361 prev = (struct tcp_pcb *)lpcb;
362 continue;
363 }
364
365 if (lpcb->local_port == tcphdr->dest) {
366 if (IP_IS_ANY_TYPE_VAL(lpcb->local_ip)) {
367 /* found an ANY TYPE (IPv4/IPv6) match */
368 #if SO_REUSE
369 lpcb_any = lpcb;
370 lpcb_prev = prev;
371 #else /* SO_REUSE */
372 break;
373 #endif /* SO_REUSE */
374 } else if (IP_ADDR_PCB_VERSION_MATCH_EXACT(lpcb, ip_current_dest_addr())) {
375 if (ip_addr_cmp(&lpcb->local_ip, ip_current_dest_addr())) {
376 /* found an exact match */
377 break;
378 } else if (ip_addr_isany(&lpcb->local_ip)) {
379 /* found an ANY-match */
380 #if SO_REUSE
381 lpcb_any = lpcb;
382 lpcb_prev = prev;
383 #else /* SO_REUSE */
384 break;
385 #endif /* SO_REUSE */
386 }
387 }
388 }
389 prev = (struct tcp_pcb *)lpcb;
390 }
391 #if SO_REUSE
392 /* first try specific local IP */
393 if (lpcb == NULL) {
394 /* only pass to ANY if no specific local IP has been found */
395 lpcb = lpcb_any;
396 prev = lpcb_prev;
397 }
398 #endif /* SO_REUSE */
399 if (lpcb != NULL) {
400 /* Move this PCB to the front of the list so that subsequent
401 lookups will be faster (we exploit locality in TCP segment
402 arrivals). */
403 if (prev != NULL) {
404 ((struct tcp_pcb_listen *)prev)->next = lpcb->next;
405 /* our successor is the remainder of the listening list */
406 lpcb->next = tcp_listen_pcbs.listen_pcbs;
407 /* put this listening pcb at the head of the listening list */
408 tcp_listen_pcbs.listen_pcbs = lpcb;
409 } else {
410 TCP_STATS_INC(tcp.cachehit);
411 }
412
413 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packed for LISTENing connection.\n"));
414 #ifdef LWIP_HOOK_TCP_INPACKET_PCB
415 if (LWIP_HOOK_TCP_INPACKET_PCB((struct tcp_pcb *)lpcb, tcphdr, tcphdr_optlen,
416 tcphdr_opt1len, tcphdr_opt2, p) == ERR_OK)
417 #endif
418 {
419 tcp_listen_input(lpcb);
420 }
421 pbuf_free(p);
422 return;
423 }
424 }
425
426 #if TCP_INPUT_DEBUG
427 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("+-+-+-+-+-+-+-+-+-+-+-+-+-+- tcp_input: flags "));
428 tcp_debug_print_flags(TCPH_FLAGS(tcphdr));
429 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("-+-+-+-+-+-+-+-+-+-+-+-+-+-+\n"));
430 #endif /* TCP_INPUT_DEBUG */
431
432
433 #ifdef LWIP_HOOK_TCP_INPACKET_PCB
434 if ((pcb != NULL) && LWIP_HOOK_TCP_INPACKET_PCB(pcb, tcphdr, tcphdr_optlen,
435 tcphdr_opt1len, tcphdr_opt2, p) != ERR_OK) {
436 pbuf_free(p);
437 return;
438 }
439 #endif
440 if (pcb != NULL) {
441 /* The incoming segment belongs to a connection. */
442 #if TCP_INPUT_DEBUG
443 tcp_debug_print_state(pcb->state);
444 #endif /* TCP_INPUT_DEBUG */
445
446 /* Set up a tcp_seg structure. */
447 inseg.next = NULL;
448 inseg.len = p->tot_len;
449 inseg.p = p;
450 inseg.tcphdr = tcphdr;
451
452 recv_data = NULL;
453 recv_flags = 0;
454 recv_acked = 0;
455
456 if (flags & TCP_PSH) {
457 p->flags |= PBUF_FLAG_PUSH;
458 }
459
460 /* If there is data which was previously "refused" by upper layer */
461 if (pcb->refused_data != NULL) {
462 if ((tcp_process_refused_data(pcb) == ERR_ABRT) ||
463 ((pcb->refused_data != NULL) && (tcplen > 0))) {
464 /* pcb has been aborted or refused data is still refused and the new
465 segment contains data */
466 if (pcb->rcv_ann_wnd == 0) {
467 /* this is a zero-window probe, we respond to it with current RCV.NXT
468 and drop the data segment */
469 tcp_send_empty_ack(pcb);
470 }
471 TCP_STATS_INC(tcp.drop);
472 MIB2_STATS_INC(mib2.tcpinerrs);
473 goto aborted;
474 }
475 }
476 tcp_input_pcb = pcb;
477 err = tcp_process(pcb);
478 /* A return value of ERR_ABRT means that tcp_abort() was called
479 and that the pcb has been freed. If so, we don't do anything. */
480 if (err != ERR_ABRT) {
481 if (recv_flags & TF_RESET) {
482 /* TF_RESET means that the connection was reset by the other
483 end. We then call the error callback to inform the
484 application that the connection is dead before we
485 deallocate the PCB. */
486 TCP_EVENT_ERR(pcb->state, pcb->errf, pcb->callback_arg, err);
487 tcp_pcb_remove(&tcp_active_pcbs, pcb);
488 tcp_free(pcb);
489 } else {
490 err = ERR_OK;
491 /* If the application has registered a "sent" function to be
492 called when new send buffer space is available, we call it
493 now. */
494 if (recv_acked > 0) {
495 u16_t acked16;
496 #if LWIP_WND_SCALE
497 /* recv_acked is u32_t but the sent callback only takes a u16_t,
498 so we might have to call it multiple times. */
499 u32_t acked = recv_acked;
500 while (acked > 0) {
501 acked16 = (u16_t)LWIP_MIN(acked, 0xffffu);
502 acked -= acked16;
503 #else
504 {
505 acked16 = recv_acked;
506 #endif
507 TCP_EVENT_SENT(pcb, (u16_t)acked16, err);
508 if (err == ERR_ABRT) {
509 goto aborted;
510 }
511 }
512 recv_acked = 0;
513 }
514 if (tcp_input_delayed_close(pcb)) {
515 goto aborted;
516 }
517 #if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
518 while (recv_data != NULL) {
519 struct pbuf *rest = NULL;
520 pbuf_split_64k(recv_data, &rest);
521 #else /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
522 if (recv_data != NULL) {
523 #endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
524
525 LWIP_ASSERT("pcb->refused_data == NULL", pcb->refused_data == NULL);
526 if (pcb->flags & TF_RXCLOSED) {
527 /* received data although already closed -> abort (send RST) to
528 notify the remote host that not all data has been processed */
529 (void)pbuf_free(recv_data);
530 #if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
531 if (rest != NULL) {
532 (void)pbuf_free(rest);
533 }
534 #endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
535 tcp_abort(pcb);
536 goto aborted;
537 }
538
539 /* Notify application that data has been received. */
540 TCP_EVENT_RECV(pcb, recv_data, ERR_OK, err);
541 if (err == ERR_ABRT) {
542 #if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
543 if (rest != NULL) {
544 (void)pbuf_free(rest);
545 }
546 #endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
547 goto aborted;
548 }
549
550 /* If the upper layer can't receive this data, store it */
551 if (err != ERR_OK) {
552 #if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
553 if (rest != NULL) {
554 pbuf_cat(recv_data, rest);
555 }
556 #endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
557 pcb->refused_data = recv_data;
558 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: keep incoming packet, because pcb is \"full\"\n"));
559 #if TCP_QUEUE_OOSEQ && LWIP_WND_SCALE
560 break;
561 } else {
562 /* Upper layer received the data, go on with the rest if > 64K */
563 recv_data = rest;
564 #endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */
565 }
566 }
567
568 /* If a FIN segment was received, we call the callback
569 function with a NULL buffer to indicate EOF. */
570 if (recv_flags & TF_GOT_FIN) {
571 if (pcb->refused_data != NULL) {
572 /* Delay this if we have refused data. */
573 pcb->refused_data->flags |= PBUF_FLAG_TCP_FIN;
574 } else {
575 /* correct rcv_wnd as the application won't call tcp_recved()
576 for the FIN's seqno */
577 if (pcb->rcv_wnd != TCP_WND_MAX(pcb)) {
578 pcb->rcv_wnd++;
579 }
580 TCP_EVENT_CLOSED(pcb, err);
581 if (err == ERR_ABRT) {
582 goto aborted;
583 }
584 /*
585 * Scenario: When DATA is already made the recvmbox full and FIN is received (here FIN post has failed)
586 * ---- Flag tcp_pcb_flag is marked TCP_PBUF_FLAG_TCP_FIN_RECV_SYSPOST_FAIL which upon
587 * tcp_fasttmr() expiry will be retried to post again.
588 * Upon user calling recv() will ensure that the refused data will be given to user and
589 * subsequent recv() will return 0 to user for FIN
590 */
591 else if (err != ERR_OK) {
592 pcb->tcp_pcb_flag |= TCP_PBUF_FLAG_TCP_FIN_RECV_SYSPOST_FAIL;
593 }
594 }
595 }
596
597 tcp_input_pcb = NULL;
598 if (tcp_input_delayed_close(pcb)) {
599 goto aborted;
600 }
601 /* Try to send something out. */
602 tcp_output(pcb);
603 #if TCP_INPUT_DEBUG
604 #if TCP_DEBUG
605 tcp_debug_print_state(pcb->state);
606 #endif /* TCP_DEBUG */
607 #endif /* TCP_INPUT_DEBUG */
608 }
609 }
610 /* Jump target if pcb has been aborted in a callback (by calling tcp_abort()).
611 Below this line, 'pcb' may not be dereferenced! */
612 aborted:
613 tcp_input_pcb = NULL;
614 recv_data = NULL;
615
616 /* give up our reference to inseg.p */
617 if (inseg.p != NULL) {
618 pbuf_free(inseg.p);
619 inseg.p = NULL;
620 }
621 } else {
622 /* If no matching PCB was found, send a TCP RST (reset) to the
623 sender. */
624 LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_input: no PCB match found, resetting.\n"));
625 if (!(TCPH_FLAGS(tcphdr) & TCP_RST)) {
626 TCP_STATS_INC(tcp.proterr);
627 TCP_STATS_INC(tcp.drop);
628 tcp_rst(NULL, ackno, seqno + tcplen, ip_current_dest_addr(),
629 ip_current_src_addr(), tcphdr->dest, tcphdr->src);
630 }
631 pbuf_free(p);
632 }
633
634 LWIP_ASSERT("tcp_input: tcp_pcbs_sane()", tcp_pcbs_sane());
635 PERF_STOP("tcp_input");
636 return;
637 dropped:
638 TCP_STATS_INC(tcp.drop);
639 MIB2_STATS_INC(mib2.tcpinerrs);
640 pbuf_free(p);
641 }
642
643 /** Called from tcp_input to check for TF_CLOSED flag. This results in closing
644 * and deallocating a pcb at the correct place to ensure noone references it
645 * any more.
646 * @returns 1 if the pcb has been closed and deallocated, 0 otherwise
647 */
648 static int
649 tcp_input_delayed_close(struct tcp_pcb *pcb)
650 {
651 LWIP_ASSERT("tcp_input_delayed_close: invalid pcb", pcb != NULL);
652
653 if (recv_flags & TF_CLOSED) {
654 /* The connection has been closed and we will deallocate the
655 PCB. */
656 if (!(pcb->flags & TF_RXCLOSED)) {
657 /* Connection closed although the application has only shut down the
658 tx side: call the PCB's err callback and indicate the closure to
659 ensure the application doesn't continue using the PCB. */
660 TCP_EVENT_ERR(pcb->state, pcb->errf, pcb->callback_arg, ERR_CLSD);
661 }
662 tcp_pcb_remove(&tcp_active_pcbs, pcb);
663 tcp_free(pcb);
664 return 1;
665 }
666 return 0;
667 }
668
669 void tcp_sndbuf_init(struct tcp_pcb *pcb)
670 {
671 u32_t sndqueuemax;
672 #if LWIP_SO_SNDBUF
673 u32_t snd_buf = pcb->snd_buf_static;
674 #else
675 u32_t snd_buf = pcb->snd_buf;
676 #endif
677 tcpwnd_size_t mss = pcb->mss;
678
679 if ((snd_buf >> 1) > ((mss << 1) + 1)) {
680 if ((snd_buf >> 1) < (snd_buf - 1)) {
681 pcb->snd_buf_lowat = snd_buf >> 1;
682 } else {
683 pcb->snd_buf_lowat = snd_buf - 1;
684 }
685 } else {
686 if (((mss << 1) + 1) < (snd_buf - 1)) {
687 pcb->snd_buf_lowat = (mss << 1) + 1;
688 } else {
689 pcb->snd_buf_lowat = snd_buf - 1;
690 }
691 }
692
693 sndqueuemax = ((snd_buf / mss) << 3);
694 if (sndqueuemax > USHRT_MAX) {
695 sndqueuemax = USHRT_MAX;
696 }
697 pcb->snd_queuelen_max = (u16_t)sndqueuemax;
698 pcb->snd_queuelen_lowat = pcb->snd_queuelen_max >> 1;
699 if (pcb->snd_queuelen_lowat < TCP_SND_QUEUELEN_LOWAT_THRES) {
700 pcb->snd_queuelen_lowat = LWIP_MIN(TCP_SND_QUEUELEN_LOWAT_THRES, pcb->snd_queuelen_max);
701 }
702 }
703
704 /**
705 * Called by tcp_input() when a segment arrives for a listening
706 * connection (from tcp_input()).
707 *
708 * @param pcb the tcp_pcb_listen for which a segment arrived
709 *
710 * @note the segment which arrived is saved in global variables, therefore only the pcb
711 * involved is passed as a parameter to this function
712 */
713 static void
714 tcp_listen_input(struct tcp_pcb_listen *pcb)
715 {
716 struct tcp_pcb *npcb;
717 u32_t iss;
718 err_t rc;
719 struct netif *netif = NULL;
720
721 LWIP_UNUSED_ARG(netif);
722 if (flags & TCP_RST) {
723 /* An incoming RST should be ignored. Return. */
724 return;
725 }
726
727 LWIP_ASSERT("tcp_listen_input: invalid pcb", pcb != NULL);
728
729 /* In the LISTEN state, we check for incoming SYN segments,
730 creates a new PCB, and responds with a SYN|ACK. */
731 if (flags & TCP_ACK) {
732 /* For incoming segments with the ACK flag set, respond with a
733 RST. */
734 LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_listen_input: ACK in LISTEN, sending reset\n"));
735 tcp_rst((struct tcp_pcb *)pcb, ackno, seqno + tcplen, ip_current_dest_addr(),
736 ip_current_src_addr(), tcphdr->dest, tcphdr->src);
737 } else if (flags & TCP_SYN) {
738 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection request %"U16_F" -> %"U16_F".\n", tcphdr->src, tcphdr->dest));
739 #if TCP_LISTEN_BACKLOG
740 if (pcb->accepts_pending >= pcb->backlog) {
741 LWIP_DEBUGF(TCP_DEBUG, ("tcp_listen_input: listen backlog exceeded for port %"U16_F"\n", tcphdr->dest));
742 return;
743 }
744 #endif /* TCP_LISTEN_BACKLOG */
745 npcb = tcp_alloc(pcb->prio);
746 /* If a new PCB could not be created (probably due to lack of memory),
747 we don't do anything, but rely on the sender will retransmit the
748 SYN at a time when we have more memory available. */
749 if (npcb == NULL) {
750 err_t err;
751 LWIP_DEBUGF(TCP_DEBUG, ("tcp_listen_input: could not allocate PCB\n"));
752 TCP_STATS_INC(tcp.memerr);
753 TCP_EVENT_ACCEPT(pcb, NULL, pcb->callback_arg, ERR_MEM, err);
754 LWIP_UNUSED_ARG(err); /* err not useful here */
755 return;
756 }
757 #if TCP_LISTEN_BACKLOG
758 pcb->accepts_pending++;
759 tcp_set_flags(npcb, TF_BACKLOGPEND);
760 #endif /* TCP_LISTEN_BACKLOG */
761 /* Set up the new PCB. */
762 ip_addr_copy(npcb->local_ip, *ip_current_dest_addr());
763 ip_addr_copy(npcb->remote_ip, *ip_current_src_addr());
764 npcb->local_port = pcb->local_port;
765 npcb->remote_port = tcphdr->src;
766 npcb->state = SYN_RCVD;
767 npcb->rcv_nxt = seqno + 1;
768 npcb->rcv_ann_right_edge = npcb->rcv_nxt;
769 iss = tcp_next_iss(npcb);
770 npcb->snd_wl2 = iss;
771 npcb->snd_nxt = iss;
772 npcb->snd_sml = iss;
773 npcb->lastack = iss;
774 npcb->snd_lbb = iss;
775 npcb->snd_wl1 = seqno - 1;/* initialise to seqno-1 to force window update */
776
777 npcb->rto_end = iss;
778 npcb->fast_recovery_point = iss;
779 #if LWIP_SO_PRIORITY
780 /* Inherit the priority from the listening socket */
781 npcb->priority = pcb->priority;
782 #endif /* LWIP_SO_PRIORITY */
783
784 #if LWIP_SO_SNDBUF
785 npcb->snd_buf = pcb->snd_buf_static;
786 npcb->snd_buf_static = pcb->snd_buf_static;
787 #endif /* LWIP_SO_SNDBUF */
788 npcb->callback_arg = pcb->callback_arg;
789 #if LWIP_CALLBACK_API || TCP_LISTEN_BACKLOG
790 npcb->listener = pcb;
791 #endif /* LWIP_CALLBACK_API || TCP_LISTEN_BACKLOG */
792 /* inherit socket options */
793 npcb->so_options = pcb->so_options & SOF_INHERITED;
794 npcb->netif_idx = pcb->netif_idx;
795
796 #if LWIP_SACK
797 npcb->sack_seq = NULL;
798 npcb->recovery_point = npcb->snd_nxt - 1;
799 npcb->pipe = 0;
800 npcb->high_sacked = npcb->snd_nxt - 1;
801 npcb->high_data = npcb->snd_nxt - 1;
802 npcb->high_rxt = npcb->snd_nxt - 1;
803 npcb->rescue_rxt = npcb->snd_nxt - 1;
804 #if LWIP_FACK_THRESHOLD_BASED_FR
805 npcb->fack = npcb->snd_nxt - 1;
806 #endif /* LWIP_FACK_THRESHOLD_BASED_FR */
807 #if LWIP_TCP_TLP_SUPPORT
808 LWIP_TCP_TLP_CLEAR_VARS(npcb);
809 #endif /* LWIP_TCP_TLP_SUPPORT */
810 #endif /* LWIP_SACK */
811 /* Register the new PCB so that we can begin receiving segments
812 for it. */
813 TCP_REG_ACTIVE(npcb);
814
815 /* Incoming packet information is saved on npcb, not listening pcb, so for route use new pcb */
816 netif = ip_route_pcb(&npcb->remote_ip, (struct ip_pcb*)npcb);
817 #if DRIVER_STATUS_CHECK
818 if (netif != NULL) {
819 if (!(netif->flags & NETIF_FLAG_DRIVER_RDY)) {
820 npcb->drv_status = DRV_NOT_READY;
821 } else {
822 npcb->drv_status = DRV_READY;
823 }
824 } else {
825 /* Mark PCB's driver status as active, later it will identify netif, and then update status for while sending
826 packet out */
827 npcb->drv_status = DRV_READY;
828 }
829 #endif /* DRIVER_STATUS_CHECK */
830
831 /* Parse any options in the SYN. */
832 tcp_parseopt(npcb);
833 npcb->snd_wnd = tcphdr->wnd;
834 npcb->snd_wnd_max = npcb->snd_wnd;
835 npcb->ssthresh = npcb->snd_wnd;
836
837 #if LWIP_SACK
838 tcp_pcb_reset_sack_seq(npcb);
839 #endif /* LWIP_SACK */
840
841 #if TCP_CALCULATE_EFF_SEND_MSS
842 #if LWIP_TCP_MAXSEG
843 npcb->usr_mss = pcb->usr_mss;
844 u16_t mss = ((npcb->usr_mss == 0) ? (TCP_MSS) : (npcb->usr_mss));
845 #else
846 u16_t mss = TCP_MSS;
847 #endif /* LWIP_TCP_MAXSEG */
848 npcb->mss = tcp_eff_send_mss_netif(LWIP_MIN(mss, npcb->rcv_mss), netif, &npcb->remote_ip);
849 #endif /* TCP_CALCULATE_EFF_SEND_MSS */
850
851 tcp_sndbuf_init(npcb);
852 MIB2_STATS_INC(mib2.tcppassiveopens);
853
854 #if LWIP_TCP_PCB_NUM_EXT_ARGS
855 if (tcp_ext_arg_invoke_callbacks_passive_open(pcb, npcb) != ERR_OK) {
856 tcp_abandon(npcb, 0);
857 return;
858 }
859 #endif
860
861 /* Send a SYN|ACK together with the MSS option. */
862 rc = tcp_enqueue_flags(npcb, TCP_SYN | TCP_ACK);
863 if (rc != ERR_OK) {
864 tcp_abandon(npcb, 0);
865 return;
866 }
867 tcp_output(npcb);
868 }
869 return;
870 }
871
872 /**
873 * Called by tcp_input() when a segment arrives for a connection in
874 * TIME_WAIT.
875 *
876 * @param pcb the tcp_pcb for which a segment arrived
877 *
878 * @note the segment which arrived is saved in global variables, therefore only the pcb
879 * involved is passed as a parameter to this function
880 */
881 static err_t
882 tcp_timewait_input(struct tcp_pcb *pcb)
883 {
884 /* RFC 1337: in TIME_WAIT, ignore RST and ACK FINs + any 'acceptable' segments */
885 /* RFC 793 3.9 Event Processing - Segment Arrives:
886 * - first check sequence number - we skip that one in TIME_WAIT (always
887 * acceptable since we only send ACKs)
888 * - second check the RST bit (... return) */
889 if (flags & TCP_RST) {
890 /* TWA only if the seqno exactly matches */
891 if (seqno == pcb->rcv_nxt) {
892 return ERR_ABRT;
893 }
894 }
895 LWIP_ASSERT("tcp_timewait_input: invalid pcb", pcb != NULL);
896
897 /* - fourth, check the SYN bit, */
898 if (flags & TCP_SYN) {
899 /* If SYN received in TW state, send a challenge ack
900 * Right now, lwIP does not support TWA if seq == rcv_nxt,
901 * and move the state to established & fallback if any failure.
902 * Also does not support TAW by determining age of packet via SYN timestamp,
903 * In current behavior, TWA can be achieved by sending ACK, if client is same it can reply back with RST,
904 * and on receiving RST, lwIP will assasinate TW pcb, and abort.
905 */
906 tcp_ack_now(pcb);
907 (void)tcp_output(pcb);
908 return ERR_OK;
909 } else if (flags & TCP_FIN) {
910 /* - eighth, check the FIN bit: Remain in the TIME-WAIT state.
911 Restart the 2 MSL time-wait timeout.*/
912 pcb->tmr = tcp_ticks;
913 }
914
915 if ((tcplen > 0)) {
916 /* Acknowledge data, FIN or out-of-window SYN */
917 tcp_ack_now(pcb);
918 tcp_output(pcb);
919 }
920 return ERR_OK;
921 }
922
923 /**
924 * Implements the TCP state machine. Called by tcp_input. In some
925 * states tcp_receive() is called to receive data. The tcp_seg
926 * argument will be freed by the caller (tcp_input()) unless the
927 * recv_data pointer in the pcb is set.
928 *
929 * @param pcb the tcp_pcb for which a segment arrived
930 *
931 * @note the segment which arrived is saved in global variables, therefore only the pcb
932 * involved is passed as a parameter to this function
933 */
934 static err_t
935 tcp_process(struct tcp_pcb *pcb)
936 {
937 struct tcp_seg *rseg;
938 u8_t acceptable = 0;
939 err_t err;
940
941 err = ERR_OK;
942
943 LWIP_ASSERT("tcp_process: invalid pcb", pcb != NULL);
944
945 /* Process incoming RST segments. */
946 if (flags & TCP_RST) {
947 /* First, determine if the reset is acceptable. */
948 if (pcb->state == SYN_SENT) {
949 /* "In the SYN-SENT state (a RST received in response to an initial SYN),
950 the RST is acceptable if the ACK field acknowledges the SYN." */
951 if (ackno == pcb->snd_nxt) {
952 acceptable = 1;
953 }
954 } else {
955 /* "In all states except SYN-SENT, all reset (RST) segments are validated
956 by checking their SEQ-fields." */
957 if (seqno == pcb->rcv_nxt) {
958 acceptable = 1;
959 } else if (TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt,
960 pcb->rcv_nxt + pcb->rcv_wnd)) {
961 /* If the sequence number is inside the window, we send a challenge ACK
962 and wait for a re-send with matching sequence number.
963 This follows RFC 5961 section 3.2 and addresses CVE-2004-0230
964 (RST spoofing attack), which is present in RFC 793 RST handling. */
965 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_process: RST seq fallin Rx Window, send challenge ACK \n"));
966 tcp_ack_now(pcb);
967 }
968 }
969
970 if (acceptable) {
971 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_process: Connection RESET\n"));
972 LWIP_ASSERT("tcp_input: pcb->state != CLOSED", pcb->state != CLOSED);
973 recv_flags |= TF_RESET;
974 tcp_clear_flags(pcb, TF_ACK_DELAY);
975
976 if (pcb->state == SYN_SENT) {
977 return ERR_CONNREFUSED;
978 } else {
979 return ERR_RST;
980 }
981 } else {
982 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_process: unacceptable reset seqno %"U32_F" rcv_nxt %"U32_F"\n",
983 seqno, pcb->rcv_nxt));
984 LWIP_DEBUGF(TCP_DEBUG, ("tcp_process: unacceptable reset seqno %"U32_F" rcv_nxt %"U32_F"\n",
985 seqno, pcb->rcv_nxt));
986 return ERR_OK;
987 }
988 }
989
990 if ((flags & TCP_SYN) && (pcb->state != SYN_SENT && pcb->state != SYN_RCVD)) {
991 /* Cope with new connection attempt after remote end crashed */
992 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_process: SYN recved in sync state, send challenge ACK \n"));
993 tcp_ack_now(pcb);
994 return ERR_OK;
995 }
996
997 if ((pcb->flags & TF_RXCLOSED) == 0) {
998 /* Update the PCB (in)activity timer unless rx is closed (see tcp_shutdown) */
999 pcb->tmr = tcp_ticks;
1000 }
1001 pcb->keep_cnt_sent = 0;
1002 pcb->persist_probe = 0;
1003
1004 tcp_parseopt(pcb);
1005
1006 /* Do different things depending on the TCP state. */
1007 switch (pcb->state) {
1008 case SYN_SENT:
1009 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("SYN-SENT: ackno %"U32_F" pcb->snd_nxt %"U32_F" unacked %s %"U32_F"\n",
1010 ackno, pcb->snd_nxt, pcb->unacked ? "" : " empty:",
1011 pcb->unacked ? lwip_ntohl(pcb->unacked->tcphdr->seqno) : 0));
1012 /* received SYN ACK with expected sequence number? */
1013 if ((flags & TCP_ACK) && (flags & TCP_SYN)
1014 && (ackno == pcb->lastack + 1)) {
1015 pcb->rcv_nxt = seqno + 1;
1016 pcb->rcv_ann_right_edge = pcb->rcv_nxt;
1017 pcb->lastack = ackno;
1018 /* Adding for WINDOW SCALE */
1019 /* SYNACK will have unscaled window value */
1020 pcb->snd_wnd = tcphdr->wnd;
1021 pcb->snd_wnd_max = pcb->snd_wnd;
1022 pcb->snd_wl1 = seqno - 1; /* initialise to seqno - 1 to force window update */
1023 pcb->state = ESTABLISHED;
1024
1025 #if LWIP_SACK
1026 tcp_pcb_reset_sack_seq(pcb);
1027 #endif
1028
1029 /* Base code issue fix.
1030
1031 LwIP Peer
1032 SYN -->
1033 <-- SYN-ACK
1034 ACK -->
1035
1036 DATA(4380) // 3 segments of 1460 -->
1037
1038 No data is ACKed
1039
1040 <-- 1) DUP ACK (using SYNACK's ACK seq number)
1041 <-- 2) DUP ACK (using SYNACK's ACK seq number)
1042 <-- 3) DUP ACK (using SYNACK's ACK seq number)
1043
1044 Ideally Now lwip should start FR, but it was not triggering.
1045 Because in tcp_receive() function 1) DUP ACK was not
1046 actually considered DUP ACK (it was changing the advertized
1047 window clause 3 of the FR rule). This is fixed below.
1048 */
1049 /* Adding for WINDOW SCALE */
1050 pcb->snd_wnd = SND_WND_SCALE(pcb, tcphdr->wnd);
1051 /* keep tabs on the biggest window announced by the remote host to calculate
1052 the maximum segment size */
1053 if (pcb->snd_wnd_max < SND_WND_SCALE(pcb, tcphdr->wnd)) {
1054 pcb->snd_wnd_max = SND_WND_SCALE(pcb, tcphdr->wnd);
1055 }
1056 pcb->snd_wl2 = ackno;
1057
1058 #if TCP_CALCULATE_EFF_SEND_MSS
1059 #if LWIP_TCP_MAXSEG
1060 u16_t mss = ((pcb->usr_mss == 0) ? (TCP_MSS) : (pcb->usr_mss));
1061 #else
1062 u16_t mss = TCP_MSS;
1063 #endif /* LWIP_TCP_MAXSEG */
1064 pcb->mss = tcp_eff_send_mss(LWIP_MIN(mss, pcb->rcv_mss), (struct ip_pcb *)pcb, &pcb->remote_ip);
1065 #endif /* TCP_CALCULATE_EFF_SEND_MSS */
1066
1067 TCP_CALC_SSTHRESH(pcb, pcb->ssthresh, INITIAL_SSTHRESH, pcb->mss);
1068
1069 LWIP_TCP_CALC_INITIAL_CWND(pcb->mss, pcb->iw);
1070 pcb->cwnd = pcb->iw;
1071
1072 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_process (SENT): cwnd %"TCPWNDSIZE_F
1073 " ssthresh %"TCPWNDSIZE_F"\n",
1074 pcb->cwnd, pcb->ssthresh));
1075 LWIP_ASSERT("pcb->snd_queuelen > 0", (pcb->snd_queuelen > 0));
1076 --pcb->snd_queuelen;
1077 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_process: SYN-SENT --queuelen %"TCPWNDSIZE_F"\n", (tcpwnd_size_t)pcb->snd_queuelen));
1078 rseg = pcb->unacked;
1079 if (rseg == NULL) {
1080 /* might happen if tcp_output fails in tcp_rexmit_rto()
1081 in which case the segment is on the unsent list */
1082 rseg = pcb->unsent;
1083 LWIP_ASSERT("no segment to free", rseg != NULL);
1084 pcb->unsent = rseg->next;
1085 } else {
1086 pcb->unacked = rseg->next;
1087 }
1088 tcp_seg_free(rseg);
1089
1090 /* If there's nothing left to acknowledge, stop the retransmit
1091 timer, otherwise reset it to start again */
1092 if (pcb->unacked == NULL) {
1093 pcb->rtime = -1;
1094 } else {
1095 pcb->rtime = 0;
1096 }
1097
1098 /*
1099 * RFC6298 section 5.7
1100 * If the timer expires awaiting the ACK of a SYN segment and the
1101 * TCP implementation is using an RTO less than 3 seconds, the RTO
1102 * MUST be re-initialized to 3 seconds when data transmission
1103 * begins (i.e., after the three-way handshake completes).
1104 */
1105 if (pcb->nrtx) {
1106 pcb->rto = TCP_RTO_TICKS_AFTER_SYN_RTX;
1107 pcb->nrtx = 0;
1108 /* clear RTO flags, else FR/ER/FACK/TLP can not be triggered */
1109 tcp_clear_flags(pcb, TF_RTO);
1110 #if LWIP_SACK
1111 if (tcp_is_flag_set(pcb, TF_SACK)) {
1112 tcp_clear_flags(pcb, TF_IN_SACK_RTO);
1113 }
1114 #endif /* LWIP_SACK */
1115 }
1116
1117 /* Call the user specified function to call when successfully
1118 * connected. */
1119 TCP_EVENT_CONNECTED(pcb, ERR_OK, err);
1120 if (err == ERR_ABRT) {
1121 return ERR_ABRT;
1122 }
1123 tcp_ack_now(pcb);
1124 }
1125 /* received ACK? possibly a half-open connection */
1126 else if (flags & TCP_ACK) {
1127 /* send a RST to bring the other side in a non-synchronized state. */
1128 tcp_rst(pcb, ackno, seqno + tcplen, ip_current_dest_addr(),
1129 ip_current_src_addr(), tcphdr->dest, tcphdr->src);
1130 /* Resend SYN immediately (don't wait for rto timeout) to establish
1131 connection faster, but do not send more SYNs than we otherwise would
1132 have, or we might get caught in a loop on loopback interfaces. */
1133 if (pcb->nrtx < TCP_SYNMAXRTX) {
1134 pcb->rtime = 0;
1135 tcp_rexmit_rto(pcb);
1136 }
1137 }
1138 break;
1139 case SYN_RCVD:
1140 if (flags & TCP_ACK) {
1141 /* expected ACK number? */
1142 if (TCP_SEQ_BETWEEN(ackno, pcb->lastack + 1, pcb->snd_nxt)) {
1143 pcb->state = ESTABLISHED;
1144 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection established %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
1145 #if LWIP_CALLBACK_API || TCP_LISTEN_BACKLOG
1146 if (pcb->listener == NULL) {
1147 /* listen pcb might be closed by now */
1148 err = ERR_VAL;
1149 } else
1150 #endif /* LWIP_CALLBACK_API || TCP_LISTEN_BACKLOG */
1151 {
1152 #if LWIP_CALLBACK_API
1153 LWIP_ASSERT("pcb->listener->accept != NULL", pcb->listener->accept != NULL);
1154 #endif
1155 tcp_backlog_accepted(pcb);
1156 /* Call the accept function. */
1157 TCP_EVENT_ACCEPT(pcb->listener, pcb, pcb->callback_arg, ERR_OK, err);
1158 }
1159 if (err != ERR_OK) {
1160 /* If the accept function returns with an error, we abort
1161 * the connection. */
1162 /* Already aborted? */
1163 if (err != ERR_ABRT) {
1164 tcp_abort(pcb);
1165 }
1166 return ERR_ABRT;
1167 }
1168 /* If there was any data contained within this ACK,
1169 * we'd better pass it on to the application as well. */
1170 tcp_receive(pcb);
1171
1172 /* Prevent ACK for SYN to generate a sent event */
1173 if (recv_acked != 0) {
1174 recv_acked--;
1175 }
1176
1177 /* As per RFC6928 larger Intial window has low side effect, it can override value of RFC 2581,
1178 and chances of retransmission is only 0.7%--> whether to implement this part is question?
1179
1180 Below changes are from RFC 5681
1181 **/
1182 LWIP_TCP_CALC_INITIAL_CWND(pcb->mss, pcb->iw);
1183 pcb->cwnd = pcb->iw;
1184
1185 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_process (SYN_RCVD): cwnd %"TCPWNDSIZE_F
1186 " ssthresh %"TCPWNDSIZE_F"\n",
1187 pcb->cwnd, pcb->ssthresh));
1188
1189 if (recv_flags & TF_GOT_FIN) {
1190 tcp_ack_now(pcb);
1191 pcb->state = CLOSE_WAIT;
1192 }
1193 } else {
1194 /* incorrect ACK number, send RST */
1195 tcp_rst(pcb, ackno, seqno + tcplen, ip_current_dest_addr(),
1196 ip_current_src_addr(), tcphdr->dest, tcphdr->src);
1197 }
1198 } else if ((flags & TCP_SYN) && (seqno == pcb->rcv_nxt - 1)) {
1199 /* Looks like another copy of the SYN - retransmit our SYN-ACK */
1200 tcp_rexmit(pcb);
1201 }
1202 break;
1203 case CLOSE_WAIT:
1204 /* FALLTHROUGH */
1205 case ESTABLISHED:
1206 tcp_receive(pcb);
1207 if (recv_flags & TF_GOT_FIN) { /* passive close */
1208 tcp_ack_now(pcb);
1209 pcb->state = CLOSE_WAIT;
1210 }
1211 break;
1212 case FIN_WAIT_1:
1213 tcp_receive(pcb);
1214 if (recv_flags & TF_GOT_FIN) {
1215 if ((flags & TCP_ACK) && (ackno == pcb->snd_nxt) &&
1216 pcb->unsent == NULL) {
1217 LWIP_DEBUGF(TCP_DEBUG,
1218 ("TCP connection closed: FIN_WAIT_1 %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
1219 tcp_ack_now(pcb);
1220 tcp_pcb_purge(pcb);
1221 TCP_RMV_ACTIVE(pcb);
1222 pcb->state = TIME_WAIT;
1223 TCP_REG(&tcp_tw_pcbs, pcb);
1224 } else {
1225 tcp_ack_now(pcb);
1226 pcb->state = CLOSING;
1227 }
1228 } else if ((flags & TCP_ACK) && (ackno == pcb->snd_nxt) &&
1229 pcb->unsent == NULL) {
1230 pcb->state = FIN_WAIT_2;
1231 }
1232 break;
1233 case FIN_WAIT_2:
1234 tcp_receive(pcb);
1235 if (recv_flags & TF_GOT_FIN) {
1236 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed: FIN_WAIT_2 %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
1237 tcp_ack_now(pcb);
1238 tcp_pcb_purge(pcb);
1239 TCP_RMV_ACTIVE(pcb);
1240 pcb->state = TIME_WAIT;
1241 TCP_REG(&tcp_tw_pcbs, pcb);
1242 }
1243 break;
1244 case CLOSING:
1245 tcp_receive(pcb);
1246 if ((flags & TCP_ACK) && ackno == pcb->snd_nxt && pcb->unsent == NULL) {
1247 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed: CLOSING %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
1248 tcp_pcb_purge(pcb);
1249 TCP_RMV_ACTIVE(pcb);
1250 pcb->state = TIME_WAIT;
1251 TCP_REG(&tcp_tw_pcbs, pcb);
1252 }
1253 break;
1254 case LAST_ACK:
1255 tcp_receive(pcb);
1256 if ((flags & TCP_ACK) && ackno == pcb->snd_nxt && pcb->unsent == NULL) {
1257 LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed: LAST_ACK %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest));
1258 /* bugfix #21699: don't set pcb->state to CLOSED here or we risk leaking segments */
1259 recv_flags |= TF_CLOSED;
1260 }
1261 break;
1262 default:
1263 break;
1264 }
1265 return ERR_OK;
1266 }
1267
1268 #if TCP_QUEUE_OOSEQ
1269 /**
1270 * Insert segment into the list (segments covered with new one will be deleted)
1271 *
1272 * Called from tcp_receive()
1273 */
1274 static void
1275 tcp_oos_insert_segment(struct tcp_seg *cseg, struct tcp_seg *next)
1276 {
1277 struct tcp_seg *old_seg;
1278
1279 LWIP_ASSERT("tcp_oos_insert_segment: invalid cseg", cseg != NULL);
1280
1281 if (TCPH_FLAGS(cseg->tcphdr) & TCP_FIN) {
1282 /* received segment overlaps all following segments */
1283 tcp_segs_free(next);
1284 next = NULL;
1285 } else {
1286 /* delete some following segments
1287 oos queue may have segments with FIN flag */
1288 while (next &&
1289 TCP_SEQ_GEQ((seqno + cseg->len),
1290 (next->tcphdr->seqno + next->len))) {
1291 /* cseg with FIN already processed */
1292 if (TCPH_FLAGS(next->tcphdr) & TCP_FIN) {
1293 TCPH_SET_FLAG(cseg->tcphdr, TCP_FIN);
1294 }
1295 old_seg = next;
1296 next = next->next;
1297 tcp_seg_free(old_seg);
1298 }
1299 if (next &&
1300 TCP_SEQ_GT(seqno + cseg->len, next->tcphdr->seqno)) {
1301 /* We need to trim the incoming segment. */
1302 cseg->len = (u16_t)(next->tcphdr->seqno - seqno);
1303 pbuf_realloc(cseg->p, cseg->len);
1304 }
1305 }
1306 cseg->next = next;
1307 }
1308 #endif /* TCP_QUEUE_OOSEQ */
1309
1310 #if LWIP_SACK
1311 /* Fucntion called when ACK for new data is received in FRLR state
1312 which is less than recovery_point */
1313 static void tcp_sack_proc_ack_wo_recovery(struct tcp_pcb *pcb)
1314 {
1315 /* Run SetPipe() */
1316 /* As per step B in section 5 of RFC 6675 */
1317 tcp_sack_set_pipe(pcb);
1318
1319 LWIP_DEBUGF(TCP_SACK_DEBUG, ("tcp_receive : Pipe is %"U32_F
1320 " RecoveryPoint is %"U32_F" HighSacked is %"U32_F" HighData is %"U32_F
1321 " HighRxt is %"U32_F" RescueRxt is %"U32_F"\n", pcb->pipe, pcb->recovery_point,
1322 pcb->high_sacked, pcb->high_data, pcb->high_rxt, pcb->rescue_rxt));
1323
1324 /* As per step C of Section 5 of RFC 6675 */
1325 /* Going to do loss recovery algorithm */
1326 if (pcb->next_seg_for_lr) {
1327 tcp_sack_based_loss_recovery_alg(pcb);
1328 }
1329
1330 #if LWIP_SACK_PERF_OPT
1331 if (pcb->fr_segs) {
1332 tcp_sack_rexmit_lost_rexmitted(pcb);
1333 }
1334 #endif
1335
1336 return;
1337 }
1338
1339 /* Fucntion called when ACK for new data is received in FRLR state
1340 which is greater than equal to recovery_point */
1341 static void tcp_sack_proc_ack_with_recovery(struct tcp_pcb *pcb)
1342 {
1343 pcb->flags = (tcpflags_t)(pcb->flags & (~TF_IN_SACK_FRLR));
1344 pcb->next_seg_for_lr = NULL;
1345
1346 #if LWIP_SACK_PERF_OPT
1347 tcp_fr_segs_free(pcb->fr_segs);
1348 pcb->fr_segs = NULL;
1349 pcb->last_frseg = NULL;
1350 #endif
1351 #if LWIP_SACK_CWND_OPT
1352 pcb->cwnd = pcb->recover_cwnd;
1353 pcb->ssthresh = pcb->recover_ssthresh;
1354 pcb->recover_cwnd = 0;
1355 pcb->recover_ssthresh = 0;
1356 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: Restored cwnd to %"TCPWNDSIZE_F"\n", pcb->cwnd));
1357 #endif
1358 LWIP_DEBUGF(TCP_SACK_DEBUG, ("tcp_receive : Disabled SACK loss recovery flag\n"));
1359 }
1360 #endif /* LWIP_SACK */
1361
1362 /** Remove segments from a list if the incoming ACK acknowledges them */
1363 static struct tcp_seg *
1364 tcp_free_acked_segments(struct tcp_pcb *pcb, struct tcp_seg *seg_list, const char *dbg_list_name,
1365 struct tcp_seg *dbg_other_seg_list)
1366 {
1367 struct tcp_seg *next;
1368 u16_t clen;
1369
1370 LWIP_UNUSED_ARG(dbg_list_name);
1371 LWIP_UNUSED_ARG(dbg_other_seg_list);
1372
1373 while (seg_list != NULL &&
1374 TCP_SEQ_LEQ(lwip_ntohl(seg_list->tcphdr->seqno) +
1375 TCP_TCPLEN(seg_list), ackno)) {
1376 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: removing %"U32_F":%"U32_F" from pcb->%s\n",
1377 lwip_ntohl(seg_list->tcphdr->seqno),
1378 lwip_ntohl(seg_list->tcphdr->seqno) + TCP_TCPLEN(seg_list),
1379 dbg_list_name));
1380
1381 next = seg_list;
1382 seg_list = seg_list->next;
1383
1384 #if LWIP_SACK
1385 #if LWIP_SACK_PERF_OPT
1386 if (pcb->flags & TF_SACK) {
1387 if (pcb->next_seg_for_lr == next) {
1388 pcb->next_seg_for_lr = pcb->next_seg_for_lr->next;
1389 LWIP_DEBUGF(TCP_SACK_DEBUG, ("tcp_receive : Next segment is updated\n"));
1390 }
1391
1392 if ((pcb->fr_segs != NULL) && (pcb->fr_segs->seg == next)) {
1393 struct tcp_sack_fast_rxmited *temp_seg = pcb->fr_segs;
1394 pcb->fr_segs = pcb->fr_segs->next;
1395 /* If the acked segment is the latest fast retransmitted segment,
1396 then update the last_frseg pointer to point to next latest unacked fr segment */
1397 if (pcb->last_frseg == temp_seg) {
1398 pcb->last_frseg = pcb->last_frseg->next;
1399 }
1400 mem_free(temp_seg);
1401 }
1402 }
1403 #else
1404 if ((pcb->flags & TF_SACK) && (pcb->next_seg_for_lr == next)) {
1405 pcb->next_seg_for_lr = pcb->next_seg_for_lr->next;
1406 LWIP_DEBUGF(TCP_SACK_DEBUG, ("tcp_receive : Next segment is updated\n"));
1407 }
1408 #endif
1409
1410 if ((pcb->flags & TF_SACK) && (next->state & TF_SEG_SACKED) && (pcb->sacked)) {
1411 pcb->sacked--;
1412 }
1413 #endif
1414
1415 clen = pbuf_clen(next->p);
1416 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_receive: queuelen %"TCPWNDSIZE_F" ... ",
1417 (tcpwnd_size_t)pcb->snd_queuelen));
1418 LWIP_ASSERT("pcb->snd_queuelen >= pbuf_clen(next->p)", (pcb->snd_queuelen >= clen));
1419
1420 pcb->snd_queuelen = (u16_t)(pcb->snd_queuelen - clen);
1421 recv_acked = (tcpwnd_size_t)(recv_acked + next->len);
1422 tcp_seg_free(next);
1423
1424 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("%"TCPWNDSIZE_F" (after freeing %s)\n",
1425 (tcpwnd_size_t)pcb->snd_queuelen,
1426 dbg_list_name));
1427 if (pcb->snd_queuelen != 0) {
1428 LWIP_ASSERT("tcp_receive: valid queue length",
1429 seg_list != NULL || dbg_other_seg_list != NULL);
1430 }
1431 }
1432 return seg_list;
1433 }
1434
1435 /**
1436 * Called by tcp_process. Checks if the given segment is an ACK for outstanding
1437 * data, and if so frees the memory of the buffered data. Next, it places the
1438 * segment on any of the receive queues (pcb->recved or pcb->ooseq). If the segment
1439 * is buffered, the pbuf is referenced by pbuf_ref so that it will not be freed until
1440 * it has been removed from the buffer.
1441 *
1442 * If the incoming segment constitutes an ACK for a segment that was used for RTT
1443 * estimation, the RTT is estimated here as well.
1444 *
1445 * Called from tcp_process().
1446 */
1447 static void
1448 tcp_receive(struct tcp_pcb *pcb)
1449 {
1450 s32_t m;
1451 u32_t right_wnd_edge;
1452 int found_dupack = 0;
1453 #if LWIP_SACK
1454 u32_t new_sacked = 0; /* This variable intrinsically means that new_sack_block has arrived */
1455 #endif
1456 u32_t rcv_wup;
1457
1458 #if (LWIP_TCP_ER_SUPPORT || LWIP_TCP_TLP_SUPPORT)
1459 u32_t wnd;
1460 #endif
1461 #if LWIP_TCP_ER_SUPPORT
1462 struct tcp_seg *unacked_seg = NULL;
1463 u32_t unacked_cnt = 0;
1464 u8_t need_er;
1465 #endif
1466 tcpwnd_size_t outstanding_segments;
1467 LWIP_ASSERT("tcp_receive: invalid pcb", pcb != NULL);
1468 LWIP_ASSERT("tcp_receive: wrong state", pcb->state >= ESTABLISHED);
1469
1470 /* In established state or above, ACK must be present */
1471 if (!(flags & TCP_ACK)) {
1472 return;
1473 }
1474
1475 /* Handle segment with Invalid sequence number */
1476 /* Filter out invalid sequence */
1477 if ((tcplen > 0) && (pcb->state < CLOSE_WAIT)) {
1478 if (!TCP_SEQ_BETWEEN(pcb->rcv_nxt, seqno + 1, seqno + tcplen - 1)) {
1479 if (TCP_SEQ_LT(seqno, pcb->rcv_nxt)) {
1480 /* the whole segment is < rcv_nxt */
1481 /* must be a duplicate of a packet that has already been correctly handled */
1482 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: duplicate seqno %"U32_F"\n", seqno));
1483 tcp_ack_now(pcb);
1484 }
1485 }
1486
1487 /* Ideally, rcv_wup should be the value of rcv_nxt when an ACK is sent last time. But there are
1488 * no such fields kept in struct pcb, hence the lower-limit estimate is made by rewinding the
1489 * current recv window, but no less than 4 times of rcv_mss */
1490 rcv_wup = pcb->rcv_nxt - LWIP_MAX(pcb->rcv_mss * 4, pcb->rcv_wnd);
1491 if (TCP_SEQ_LT(seqno + tcplen, rcv_wup) || TCP_SEQ_GT(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {
1492 /* seqno is too old or to new to be plausible, send an immediate ACK and drop the segment */
1493 (void)tcp_send_empty_ack(pcb);
1494 return;
1495 }
1496 } else {
1497 /* Segments with length 0 is taken care of here. Segments that
1498 fall out of the window are ACKed. */
1499 if (!TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt + pcb->rcv_wnd - 1)) {
1500 /* Ignore the rest of packets */
1501 tcp_ack_now(pcb);
1502 return;
1503 }
1504 }
1505
1506 /* Data from segments acking unsent data is not dropped */
1507 if (TCP_SEQ_GT(ackno, pcb->snd_nxt)) {
1508 /* Check for states */
1509 if ((pcb->state != CLOSE_WAIT) && (pcb->state != CLOSING) && (pcb->state != LAST_ACK)) {
1510 (void)tcp_send_empty_ack(pcb);
1511 }
1512 return;
1513 }
1514 if (flags & TCP_ACK) {
1515 right_wnd_edge = pcb->snd_wnd + pcb->snd_wl2;
1516
1517 /* Update window. */
1518 if (TCP_SEQ_LT(pcb->snd_wl1, seqno) ||
1519 (pcb->snd_wl1 == seqno && TCP_SEQ_LT(pcb->snd_wl2, ackno)) ||
1520 (pcb->snd_wl2 == ackno && (u32_t)SND_WND_SCALE(pcb, tcphdr->wnd) > pcb->snd_wnd)) {
1521 pcb->snd_wnd = SND_WND_SCALE(pcb, tcphdr->wnd);
1522 /* keep track of the biggest window announced by the remote host to calculate
1523 the maximum segment size */
1524 if (pcb->snd_wnd_max < pcb->snd_wnd) {
1525 pcb->snd_wnd_max = pcb->snd_wnd;
1526 }
1527 pcb->snd_wl1 = seqno;
1528 pcb->snd_wl2 = ackno;
1529 if (pcb->snd_wnd == 0) {
1530 if (pcb->persist_backoff == 0) {
1531 /* start persist timer */
1532 pcb->persist_cnt = 0;
1533 pcb->persist_backoff = 1;
1534 pcb->persist_probe = 0;
1535 }
1536 } else if (pcb->persist_backoff > 0) {
1537 /* stop persist timer */
1538 pcb->persist_backoff = 0;
1539 }
1540 LWIP_DEBUGF(TCP_WND_DEBUG, ("tcp_receive: window update %"TCPWNDSIZE_F"\n", pcb->snd_wnd));
1541 #if TCP_WND_DEBUG
1542 } else {
1543 if (pcb->snd_wnd != (tcpwnd_size_t)SND_WND_SCALE(pcb, tcphdr->wnd)) {
1544 LWIP_DEBUGF(TCP_WND_DEBUG,
1545 ("tcp_receive: no window update lastack %"U32_F" ackno %"
1546 U32_F" wl1 %"U32_F" seqno %"U32_F" wl2 %"U32_F"\n",
1547 pcb->lastack, ackno, pcb->snd_wl1, seqno, pcb->snd_wl2));
1548 }
1549 #endif /* TCP_WND_DEBUG */
1550 }
1551
1552 #if LWIP_SACK
1553 if (pcb->flags & TF_SACK) {
1554 #if !LWIP_SACK_DATA_SEG_PIGGYBACK
1555 if (tcplen == 0)
1556 #endif
1557 {
1558 if (pcb->num_sacks) {
1559 /* The return value intrinsically means that new_sack_block has arrived */
1560 new_sacked = tcp_sack_update(pcb, ackno);
1561 pcb->num_sacks = 0;
1562 }
1563 }
1564
1565 #if LWIP_TCP_TLP_SUPPORT
1566 /* draft-dukkipati-tcpm-tcp-loss-probe-01: 3.2. TLP Loss Detection: Algorithm Details.
1567
1568 If the only loss was the last segment, there is the risk that the loss probe itself might repair
1569 the loss, effectively masking it from congestion control. To avoid interfering with mandatory
1570 congestion control [RFC5681] it is imperative that TLP include a mechanism to detect when
1571 the probe might have masked a loss and to properly reduce the congestion window (cwnd)
1572
1573 Scenario:
1574
1575 number of scoreboard after
1576 losses TLP retrans ACKed mechanism final outcome
1577 -------- ----------------- ----------------- -------------
1578 (1) AAAL AAAA TLP loss detection all repaired
1579
1580 As above, with one segment lost, the TLP loss probe itself will repair the loss. In this case,
1581 the sender's TLP loss detection algorithm will notice that a segment was lost and repaired,
1582 and reduce its congestion window in response to the loss.
1583 */
1584 /*
1585 (a) TLPRtxOut > 0
1586 (b) SEG.ACK == TLPHighRxt
1587 (c) the segment contains no SACK blocks for sequence ranges above TLPHighRxt
1588 (d) the ACK does not advance SND.UNA
1589 (e) the segment contains no data
1590 (f) the segment is not a window update
1591 */
1592 if ((pcb->tlp_rtx_out > 0) && (ackno == pcb->tlp_high_rxt) && (new_sacked == 0) && (tcplen == 0)) {
1593 pcb->tlp_rtx_out--;
1594 }
1595
1596 /* ACK advances: there was a loss, so reduce cwnd. */
1597 if (pcb->tlp_rtx_out && TCP_SEQ_GEQ(ackno, pcb->tlp_high_rxt)) {
1598 /* very aggresive, actually we should halve the cwnd as TLP fixes the tail packet loss */
1599 TCP_WND_DEC(pcb->cwnd, pcb->mss, pcb->mss);
1600 pcb->tlp_rtx_out = 0;
1601 }
1602 #endif /* LWIP_TCP_TLP_SUPPORT */
1603 }
1604 #endif /* LWIP_SACK */
1605
1606 /* RFC 5827.
1607 Section: 3.2. Segment-Based Early Retransmit
1608
1609 Upon the arrival of an ACK, a sender employing segment-based Early
1610 Retransmit MUST use the following two conditions to determine when an
1611 Early Retransmit is sent:
1612 (3.a) The number of outstanding segments (oseg) -- segments sent but
1613 not yet acknowledged -- is less than four.
1614
1615 (3.b) There is either no unsent data ready for transmission at the
1616 sender, or the advertised receive window does not permit new
1617 segments to be transmitted.
1618
1619 ACTION_WITHOUT_SACK:
1620 When the above two conditions hold and a TCP connection does not
1621 support SACK, the duplicate ACK threshold used to trigger a
1622 retransmission MUST be reduced to:
1623 ER_thresh = oseg - 1
1624
1625 ACTION_WITH_SACK:
1626 When conditions (3.a) and (3.b) hold and a TCP connection does
1627 support SACK or SCTP is in use, Early Retransmit MUST be used only
1628 when "oseg - 1" segments have been SACKed. A segment is considered
1629 to be SACKed when all of its data bytes (TCP) or data chunks (SCTP)
1630 have been indicated as arrived by the receiver.
1631 */
1632 #if LWIP_TCP_ER_SUPPORT
1633 need_er = 1;
1634
1635 /* Ensure (3.b) */
1636 if (pcb->unsent) {
1637 wnd = LWIP_MIN(pcb->snd_wnd, pcb->cwnd);
1638 /* Just need to check the first segment */
1639 if (((ntohl(pcb->unsent->tcphdr->seqno) - pcb->lastack) + pcb->unsent->len) <= wnd) {
1640 need_er = 0;
1641 }
1642 }
1643
1644 /* Ensure (3.a) */
1645 if (need_er) {
1646 for (unacked_seg = pcb->unacked; unacked_seg != NULL; unacked_seg = unacked_seg->next) {
1647 unacked_cnt++;
1648 if (unacked_cnt > DUPACK_THRESH) {
1649 need_er = 0;
1650 break;
1651 }
1652 }
1653 }
1654 #endif /* LWIP_TCP_ER_SUPPORT */
1655
1656 /* (From Stevens TCP/IP Illustrated Vol II, p970.) Its only a
1657 * duplicate ack if:
1658 * 1) It doesn't ACK new data
1659 * 2) length of received packet is zero (i.e. no payload)
1660 * 3) the advertised window hasn't changed
1661 * 4) There is outstanding unacknowledged data (retransmission timer running)
1662 * 5) The ACK is == biggest ACK sequence number so far seen (snd_una)
1663 *
1664 * If it passes all five, should process as a dupack:
1665 * a) dupacks < 3: do nothing
1666 * b) dupacks == 3: fast retransmit
1667 * c) dupacks > 3: increase cwnd
1668 *
1669 * If it only passes 1-3, should reset dupack counter (and add to
1670 * stats, which we don't do in lwIP)
1671 *
1672 * If it only passes 1, should reset dupack counter
1673 *
1674 */
1675
1676 /* Clause 1 */
1677 #if LWIP_SACK
1678 if ((TCP_SEQ_LEQ(ackno, pcb->lastack)) && (!(pcb->flags & TF_SACK)))
1679 #else
1680 if (TCP_SEQ_LEQ(ackno, pcb->lastack))
1681 #endif
1682 {
1683 /* Clause 2 */
1684 if (tcplen == 0) {
1685 /* Clause 3 */
1686 if (pcb->snd_wl2 + pcb->snd_wnd == right_wnd_edge) {
1687 /* Clause 4 */
1688 if (pcb->rtime >= 0) {
1689 /* Clause 5 */
1690 if (pcb->lastack == ackno) {
1691 found_dupack = 1;
1692 if ((u8_t)(pcb->dupacks + 1) > pcb->dupacks) {
1693 ++pcb->dupacks;
1694 }
1695 if (pcb->dupacks > DUPACK_THRESH) {
1696 /*
1697 4. For each additional duplicate ACK received (after the third),
1698 cwnd MUST be incremented by SMSS. This artificially inflates the
1699 congestion window in order to reflect the additional segment that
1700 has left the network.
1701
1702 Here, assuming atleast one packet is outstanding, hence limiting wnd increase to dupacks -1
1703 */
1704 outstanding_segments = (tcpwnd_size_t)(pcb->snd_nxt - pcb->lastack);
1705 if (outstanding_segments > (tcpwnd_size_t)((pcb->dupacks - 1) * pcb->mss)) {
1706 /* Inflate the congestion window */
1707 TCP_WND_INC(pcb->cwnd, pcb->mss);
1708 }
1709 } else if ((pcb->dupacks == DUPACK_THRESH)
1710 #if LWIP_TCP_ER_SUPPORT /* ACTION_WITHOUT_SACK for Early Retransmit */
1711 || (need_er && unacked_cnt && (pcb->dupacks == (unacked_cnt - 1)))
1712 #endif /* LWIP_TCP_ER_SUPPORT */
1713 ) {
1714 /* Do fast retransmit */
1715 tcp_rexmit_fast(pcb);
1716 }
1717 }
1718 }
1719 }
1720 }
1721 /* If Clause (1) or more is true, but not a duplicate ack, reset
1722 * count of consecutive duplicate acks */
1723 if (!found_dupack) {
1724 pcb->dupacks = 0;
1725 }
1726 }
1727 #if LWIP_SACK
1728 else if ((TCP_SEQ_LEQ(ackno, pcb->lastack)) && (pcb->flags & TF_SACK)) {
1729 /* As per RFC 6675 */
1730 /* Duplicate acknowledgment is a segment that arrives carrying a SACK */
1731 /* block that identifies previously unacknowledged and un-SACKed octets */
1732 /* between HighACK and HighData. */
1733 #if !LWIP_SACK_DATA_SEG_PIGGYBACK
1734 if (tcplen == 0)
1735 #endif
1736 {
1737 if (new_sacked) {
1738 if ((u8_t)(pcb->dupacks + 1) > pcb->dupacks) {
1739 pcb->dupacks++;
1740 LWIP_DEBUGF(TCP_SACK_DEBUG, ("tcp_receive: Incremented dupack "
1741 "%"U32_F"\n", pcb->dupacks));
1742 }
1743
1744 /* As per RFC 6675, Section 5, if the incoming ACK is cumulative */
1745 /* acknowledgement then need to reset DupAcks to zero. */
1746 /* Even if app data seg from peer increments the cumulative ACK, */
1747 /* we are resetting the dupack to 0 in next else if case. */
1748 if (pcb->unacked != NULL) {
1749 if ((pcb->dupacks >= DUPACK_THRESH)
1750 #if LWIP_TCP_ER_SUPPORT /* ACTION_WITH_SACK for Early Retransmit */
1751 /* Basic Early retransmit proposed in RFC 5827 */
1752 || (need_er && (pcb->sacked == (unacked_cnt - 1)))
1753 #endif /* LWIP_TCP_ER_SUPPORT */
1754 #if (LWIP_TCP_TLP_SUPPORT && LWIP_TCP_ER_SUPPORT)
1755 /* Enhanced Early retransmit proposed in TLP draft-dukkipati-tcpm-tcp-loss-probe-01 */
1756 || (need_er && ((unacked_cnt - 1) > pcb->sacked))
1757 #endif /* (LWIP_TCP_TLP_SUPPORT && LWIP_TCP_ER_SUPPORT) */
1758 || ((pcb->dupacks < DUPACK_THRESH) && tcp_sack_is_lost(pcb, pcb->unacked))) {
1759 LWIP_DEBUGF(TCP_SACK_DEBUG, ("tcp_receive: Going to start SACK based fast "
1760 "retransmit and loss recovery phase, dupack is %"U32_F"\n", pcb->dupacks));
1761 /* Do fast retransmit as per point 1 and 2 in Section 5 of RFC 6675 */
1762 /* 1. If dupacks >= Threshold then start fast retransmit and loss recovery */
1763 /* 2. Or if IsLost (HighACK + 1) returns true then there is loss */
1764 /* need to enter fast retransmit and loss recovery phase */
1765 tcp_sack_based_fast_rexmit_alg(pcb);
1766
1767 if (pcb->flags & TF_IN_SACK_FRLR) {
1768 /* Run SetPipe() */
1769 /* As per point 4.4 in section 5 of RFC 6675 */
1770 tcp_sack_set_pipe(pcb);
1771 }
1772
1773 /* Then as per point 4.5, enter step C of Section 5 of RFC 6675 */
1774 /* Going to do loss recovery algorithm */
1775 #if LWIP_SACK_PERF_OPT
1776 if ((pcb->flags & TF_IN_SACK_FRLR))
1777 #else
1778 if ((pcb->flags & TF_IN_SACK_FRLR) && (pcb->next_seg_for_lr))
1779 #endif /* LWIP_SACK_PERF_OPT */
1780 {
1781 tcp_sack_based_loss_recovery_alg(pcb);
1782 }
1783 #if LWIP_TCP_TLP_SUPPORT
1784 if (pcb->tlp_time_stamp) {
1785 LWIP_TCP_TLP_CLEAR_VARS(pcb);
1786 if ((pcb->unacked != NULL) && (pcb->rtime == -1)) {
1787 pcb->rtime = 0;
1788 }
1789 }
1790 #endif /* LWIP_TCP_TLP_SUPPORT */
1791 #if LWIP_SACK_PERF_OPT
1792 if (pcb->dupacks > DUPACK_THRESH) {
1793 /* Inflate the congestion window, but not if it means that the value overflows. */
1794 /* Adding for WINDOW SCALE */
1795 {
1796 if (pcb->cwnd < pcb->ssthresh) {
1797 /* Adding for WINDOW SCALE */
1798 if ((tcpwnd_size_t)(pcb->cwnd + pcb->mss) > pcb->cwnd) {
1799 pcb->cwnd += pcb->mss;
1800 }
1801 /* Adding for WINDOW SCALE */
1802 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive:dupack slow start cwnd %"TCPWNDSIZE_F"\n", pcb->cwnd));
1803 } else {
1804 /* Loss of precision (initialization) (31 bits to 16 bits) */
1805 /* Adding for WINDOW SCALE */
1806 tcpwnd_size_t new_cwnd = (tcpwnd_size_t)(pcb->cwnd +
1807 (u32_t)pcb->mss * (u32_t)pcb->mss / pcb->cwnd);
1808 if (new_cwnd > pcb->cwnd) {
1809 pcb->cwnd = new_cwnd;
1810 }
1811 /* Adding for WINDOW SCALE */
1812 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive:dupack congestion avoidance cwnd %"TCPWNDSIZE_F"\n",
1813 pcb->cwnd));
1814 }
1815 }
1816 }
1817 #endif /* LWIP_SACK_PERF_OPT */
1818 }
1819 }
1820 }
1821 }
1822 }
1823 #endif
1824 else if (TCP_SEQ_BETWEEN(ackno, pcb->lastack + 1, pcb->snd_nxt)) {
1825 /* We come here when the ACK acknowledges new data. */
1826 tcpwnd_size_t acked;
1827 u8_t is_infr = 0;
1828
1829 /* We come here when the ACK acknowledges new data. */
1830 /* Record how much data this ACK acks */
1831 acked = (tcpwnd_size_t)(ackno - pcb->lastack);
1832
1833 /* Reset the "IN Fast Retransmit" flag, since we are no longer
1834 in fast retransmit. Also reset the congestion window to the
1835 slow start threshold. */
1836 if (pcb->flags & TF_INFR) {
1837 is_infr = 1;
1838 if (TCP_SEQ_LT(ackno, pcb->fast_recovery_point)) {
1839 /* Partial ACK recved, retransmit next unacked segment */
1840 tcp_set_flags(pcb, TF_INFR_PACK);
1841
1842 /*
1843 [RFC 6582] Section-3.2
1844 For the first partial ACK that arrives during fast recovery, also
1845 reset the retransmit timer.
1846 If a large number of packets were dropped from a window of
1847 data, the TCP data sender's retransmit timer will ultimately expire,
1848 and the TCP data sender will invoke Slow-Start. We call this the Impatient variant of NewReno.
1849
1850 When N packets have been dropped from a window of data for a large
1851 value of N, the Slow-but-Steady variant can remain in Fast Recovery
1852 for N round-trip times, retransmitting one more dropped packet each
1853 round-trip time; for these scenarios, the Impatient variant gives a
1854 faster recovery and better performance.
1855
1856 */
1857 if (tcp_is_flag_set(pcb, TF_INFR_FPACK)) {
1858 /* Reset the retransmission timer to prevent immediate rto retransmissions */
1859 pcb->rtime = 0;
1860 tcp_clear_flags(pcb, TF_INFR_FPACK);
1861 }
1862
1863 if (acked < pcb->mss) {
1864 TCP_WND_DEC(pcb->cwnd, acked, pcb->mss);
1865 }
1866 } else {
1867 /*
1868 [RFC 5681] : Section 3.2. Fast Retransmit/Fast Recovery
1869 6. When the next ACK arrives that acknowledges previously
1870 unacknowledged data, a TCP MUST set cwnd to ssthresh (the value
1871 set in step 2). This is termed "deflating" the window.
1872
1873 Rationale:
1874 In [RFC3782], the cwnd after Full ACK reception will be set to
1875 (1) min (ssthresh, FlightSize + SMSS) or (2) ssthresh. However, the
1876 first option carries a risk of performance degradation: With the
1877 first option, if FlightSize is zero, the result will be 1 SMSS. This
1878 means TCP can transmit only 1 segment at that moment, which can cause
1879 a delay in ACK transmission at the receiver due to a delayed ACK
1880 algorithm.
1881 The FlightSize on Full ACK reception can be zero in some situations.
1882 A typical example is where the sending window size during fast
1883 recovery is small. In this case, the retransmitted packet and new
1884 data packets can be transmitted within a short interval. If all
1885 these packets successfully arrive, the receiver may generate a Full
1886 ACK that acknowledges all outstanding data. Even if the window size
1887 is not small, loss of ACK packets or a receive buffer shortage during
1888 fast recovery can also increase the possibility of falling into this
1889 situation.
1890
1891 [RFC 6582] Section 3.2
1892 Full acknowledgments:
1893 If this ACK acknowledges all of the data up to and including
1894 recover, then the ACK acknowledges all the intermediate segments
1895 sent between the original transmission of the lost segment and
1896 the receipt of the third duplicate ACK. Set cwnd to either (1)
1897 min (ssthresh, max(FlightSize, SMSS) + SMSS) or (2) ssthresh,
1898 where ssthresh is the value set when fast retransmit was entered,
1899 and where FlightSize in (1) is the amount of data presently
1900 outstanding.
1901
1902 (2) ssthresh,-- Not implemented, because lwIP has not implemented mechanism to avoid burst of data
1903 RFC - Advice, If the
1904 second option is selected, the implementation is encouraged to
1905 take measures to avoid a possible burst of data, in case the
1906 amount of data outstanding in the network is much less than the
1907 new congestion window allows.
1908 */
1909 tcp_clear_flags(pcb, TF_INFR);
1910 outstanding_segments = (tcpwnd_size_t)(pcb->snd_nxt - pcb->lastack);
1911 outstanding_segments = LWIP_MAX(outstanding_segments, pcb->mss);
1912 TCP_WND_INC(outstanding_segments, pcb->mss);
1913 pcb->cwnd = LWIP_MIN(pcb->ssthresh, outstanding_segments);
1914
1915 /* when exiting mark the recovery point */
1916 pcb->fast_recovery_point = ackno;
1917 pcb->bytes_acked = 0;
1918 }
1919 }
1920
1921 #if !LWIP_SACK_PERF_OPT
1922 #if LWIP_SACK
1923 if (pcb->flags & TF_IN_SACK_FRLR) {
1924 if (TCP_SEQ_LT(ackno, pcb->recovery_point)) {
1925 tcp_sack_proc_ack_wo_recovery(pcb);
1926 } else {
1927 tcp_sack_proc_ack_with_recovery(pcb);
1928 }
1929 }
1930 if ((pcb->flags & TF_IN_SACK_RTO != 0) && (TCP_SEQ_GEQ(ackno, pcb->recovery_point))) {
1931 pcb->flags = (tcpflags_t)(pcb->flags & (~TF_IN_SACK_RTO));
1932 LWIP_DEBUGF(TCP_SACK_DEBUG, ("tcp_receive : Disabled SACK retransmit timeout flag\n"));
1933 }
1934 #endif /* LWIP_SACK */
1935 #endif /* LWIP_SACK_PERF_OPT */
1936
1937 /*
1938 Reset RTO backoff when new data was ACKed during RTO phase. As lwIP violates RFC5861
1939 section 3.1, when RTO happened, lwIP tries to retransmit all unacked segments. If any
1940 retransmitted segment was ACKed, it means RTO retransmission succeed, and RTO backoff
1941 could be cleared. A new RTO timeout value was calculated by the stale SRTT and RTTVAR.
1942 It seems that this way is aggressive, but in order to achieve better throughput under high packet
1943 lossy environment, we adopt this processing way temporarily.
1944 */
1945 if (pcb->nrtx) {
1946 pcb->nrtx = 0;
1947 if (pcb->sa != -1) {
1948 pcb->rto = (s16_t)((((u32_t)pcb->sa >> 3) + pcb->sv) / TCP_SLOW_INTERVAL);
1949 pcb->rto = (s16_t)(LWIP_MIN(TCP_MAX_RTO_TICKS, LWIP_MAX(TCP_MIN_RTO_TICKS, pcb->rto)));
1950 } else {
1951 /* if RTO happened for 1st data segment, just use the initial RTO value. */
1952 pcb->rto = TCP_INITIAL_RTO_DURATION / TCP_SLOW_INTERVAL;
1953 }
1954 }
1955
1956
1957 /* Record how much data this ACK acks */
1958 acked = (tcpwnd_size_t)(ackno - pcb->lastack);
1959
1960 /* Reset the fast retransmit variables. */
1961 pcb->dupacks = 0;
1962 pcb->lastack = ackno;
1963
1964 #if LWIP_FACK_THRESHOLD_BASED_FR
1965 pcb->fack = ackno;
1966 #endif /* LWIP_FACK_THRESHOLD_BASED_FR */
1967
1968 /* Update the congestion control variables (cwnd and ssthresh). */
1969 #if LWIP_SACK_CWND_OPT
1970 if ((pcb->flags & TF_IN_SACK_FRLR) && (TCP_SEQ_GEQ(ackno, pcb->recovery_point))) {
1971 tcp_sack_proc_ack_with_recovery(pcb);
1972 } /* else case handled below as it requires the cwnd value updated below */
1973 #endif
1974
1975 /* RFC 6582 specifies specific window updation algorithm for FR state */
1976 if ((pcb->state >= ESTABLISHED) && !is_infr) {
1977 if (pcb->cwnd < pcb->ssthresh) {
1978 tcpwnd_size_t increase;
1979 /* limit to 1 SMSS segment during period following RTO */
1980 u8_t num_seg = (pcb->flags & TF_RTO) ? 1 : 2;
1981
1982 /*
1983 RFC 5681 Section 3.1 Slow Start
1984 During slow start, a TCP increments cwnd by at most SMSS bytes for
1985 each ACK received that cumulatively acknowledges new data. Slow
1986 start ends when cwnd exceeds ssthresh (or, optionally, when it
1987 reaches it, as noted above) or when congestion is observed. While
1988 traditionally TCP implementations have increased cwnd by precisely
1989 SMSS bytes upon receipt of an ACK covering new data, we RECOMMEND
1990 that TCP implementations increase cwnd, per:
1991
1992 cwnd += min (N, SMSS) (2)
1993
1994 where N is the number of previously unacknowledged bytes acknowledged
1995 in the incoming ACK. This adjustment is part of Appropriate Byte
1996 Counting [RFC3465] and provides robustness against misbehaving
1997 receivers that may attempt to induce a sender to artificially inflate
1998 cwnd using a mechanism known as "ACK Division" [SCWA99]. ACK
1999 Division consists of a receiver sending multiple ACKs for a single
2000 TCP data segment, each acknowledging only a portion of its data. A
2001 TCP that increments cwnd by SMSS for each such ACK will
2002 inappropriately inflate the amount of data injected into the network.
2003 */
2004 /* RFC 3465, section 2.2 Slow Start */
2005 increase = LWIP_MIN(acked, (tcpwnd_size_t)(num_seg * pcb->mss));
2006 TCP_WND_INC(pcb->cwnd, increase);
2007 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: slow start cwnd %"TCPWNDSIZE_F"\n", pcb->cwnd));
2008 } else {
2009 /* RFC 3465, section 2.1 Congestion Avoidance */
2010 TCP_WND_INC(pcb->bytes_acked, acked);
2011 if (pcb->bytes_acked >= pcb->cwnd) {
2012 pcb->bytes_acked = (tcpwnd_size_t)(pcb->bytes_acked - pcb->cwnd);
2013 TCP_WND_INC(pcb->cwnd, pcb->mss);
2014 }
2015 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_receive: congestion avoidance cwnd %"TCPWNDSIZE_F"\n", pcb->cwnd));
2016 }
2017 }
2018 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: ACK for %"U32_F", unacked->seqno %"U32_F":%"U32_F"\n",
2019 ackno,
2020 pcb->unacked != NULL ?
2021 lwip_ntohl(pcb->unacked->tcphdr->seqno) : 0,
2022 pcb->unacked != NULL ?
2023 lwip_ntohl(pcb->unacked->tcphdr->seqno) + TCP_TCPLEN(pcb->unacked) : 0));
2024
2025 /* Remove segment from the unacknowledged list if the incoming
2026 ACK acknowledges them. */
2027 pcb->unacked = tcp_free_acked_segments(pcb, pcb->unacked, "unacked", pcb->unsent);
2028 /* We go through the ->unsent list to see if any of the segments
2029 on the list are acknowledged by the ACK. This may seem
2030 strange since an "unsent" segment shouldn't be acked. The
2031 rationale is that lwIP puts all outstanding segments on the
2032 ->unsent list after a retransmission, so these segments may
2033 in fact have been sent once. */
2034 pcb->unsent = tcp_free_acked_segments(pcb, pcb->unsent, "unsent", pcb->unacked);
2035
2036 if (pcb->flags & TF_INFR_PACK) {
2037 /* Partial ACK recved, fast retransmit next unack segment */
2038 LWIP_DEBUGF(TCP_FR_DEBUG,
2039 ("tcp_receive: dupacks %"U16_F" (%"U32_F"), fast retransmit %"U32_F"\n",
2040 (u16_t)pcb->dupacks, (u32_t)pcb->lastack, (u32_t)ntohl(pcb->unacked->tcphdr->seqno)));
2041 (void)tcp_rexmit(pcb);
2042 pcb->flags = (tcpflags_t)(pcb->flags & (~TF_INFR_PACK));
2043 }
2044
2045 #if LWIP_SACK_PERF_OPT
2046 if (pcb->flags & TF_IN_SACK_FRLR) {
2047 if (TCP_SEQ_LT(ackno, pcb->recovery_point)) {
2048 tcp_sack_proc_ack_wo_recovery(pcb);
2049 }
2050 #if !LWIP_SACK_CWND_OPT
2051 /* else will not be hit here as the FRLR flags is disabled above in this case */
2052 else {
2053 tcp_sack_proc_ack_with_recovery(pcb);
2054 }
2055 #endif
2056 }
2057 if (((pcb->flags & TF_IN_SACK_RTO) != 0) && (TCP_SEQ_GEQ(ackno, pcb->recovery_point))) {
2058 pcb->flags = (tcpflags_t)(pcb->flags & (~TF_IN_SACK_RTO));
2059 LWIP_DEBUGF(TCP_SACK_DEBUG, ("tcp_receive : Disabled SACK retransmit timeout flag\n"));
2060 }
2061 #endif /* LWIP_SACK_PERF_OPT */
2062
2063 /* If there's nothing left to acknowledge, stop the retransmit
2064 timer, otherwise reset it to start again */
2065 if (pcb->unacked == NULL) {
2066 pcb->rtime = -1;
2067 } else {
2068 pcb->rtime = 0;
2069 }
2070
2071 pcb->polltmr = 0;
2072
2073 #if TCP_OVERSIZE
2074 if (pcb->unsent == NULL) {
2075 pcb->unsent_oversize = 0;
2076 }
2077 #endif /* TCP_OVERSIZE */
2078
2079 #if LWIP_IPV6 && LWIP_ND6_TCP_REACHABILITY_HINTS
2080 if (ip_current_is_v6()) {
2081 /* Inform neighbor reachability of forward progress. */
2082 nd6_reachability_hint(ip6_current_src_addr());
2083 }
2084 #endif /* LWIP_IPV6 && LWIP_ND6_TCP_REACHABILITY_HINTS*/
2085
2086 pcb->snd_buf += recv_acked;
2087 /* check if this ACK ends our retransmission of in-flight data */
2088 if (pcb->flags & TF_RTO) {
2089 /* RTO is done if
2090 1) both queues are empty or
2091 2) unacked is empty and unsent head contains data not part of RTO or
2092 3) unacked head contains data not part of RTO */
2093 if (pcb->unacked == NULL) {
2094 if ((pcb->unsent == NULL) ||
2095 (TCP_SEQ_LEQ(pcb->rto_end, lwip_ntohl(pcb->unsent->tcphdr->seqno)))) {
2096 tcp_clear_flags(pcb, TF_RTO);
2097 }
2098 } else if (TCP_SEQ_LEQ(pcb->rto_end, lwip_ntohl(pcb->unacked->tcphdr->seqno))) {
2099 tcp_clear_flags(pcb, TF_RTO);
2100 }
2101 }
2102 /* End of ACK for new data processing. */
2103 } else {
2104 /* Out of sequence ACK, didn't really ack anything */
2105 tcp_send_empty_ack(pcb);
2106 }
2107
2108 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: pcb->rttest %"U32_F" rtseq %"U32_F" ackno %"U32_F"\n",
2109 pcb->rttest, pcb->rtseq, ackno));
2110
2111 /* RTT estimation calculations. This is done by checking if the
2112 incoming segment acknowledges the segment we use to take a
2113 round-trip time measurement. */
2114 if (pcb->rttest && TCP_SEQ_LT(pcb->rtseq, ackno)) {
2115 m = (s32_t)(sys_now() - pcb->rttest);
2116
2117 if (pcb->sa != -1) {
2118 /*
2119 RFC 6298 section 2.3
2120 When a subsequent RTT measurement R is made, a host MUST set:
2121 RTTVAR <- (1 - beta) * RTTVAR + beta * |SRTT - R|;
2122 SRTT <- (1 - alpha) * SRTT + alpha * R;
2123 RTO <- SRTT + max (G, K*RTTVAR);
2124 where alpha = 1/8, beta = 1/4;
2125 The value of SRTT used in the update to RTTVAR is its value
2126 before updating SRTT itself using the second assignment. That
2127 is, updating RTTVAR and SRTT MUST be computed in the above order.
2128 */
2129 m = (s32_t)((u32_t)m - ((u32_t)pcb->sa >> 3));
2130 pcb->sa = (s32_t)(pcb->sa + m);
2131 if (m < 0) {
2132 m = (s32_t)-m;
2133 }
2134 m = (s32_t)(m - ((u32_t)pcb->sv >> 2));
2135 pcb->sv = (s32_t)(pcb->sv + m);
2136 } else {
2137 /*
2138 RFC 6298 section 2.2
2139 When the first RTT measurement R is made, the host MUST set:
2140 SRTT <- R
2141 RTTVAR <- R/2
2142 RTO <- SRTT + max (G, K*RTTVAR), where K = 4.
2143 */
2144 pcb->sa = m << 3;
2145 pcb->sv = m << 1;
2146 }
2147
2148 pcb->rto = (s16_t)((((u16_t)pcb->sa >> 3) + pcb->sv) / TCP_SLOW_INTERVAL);
2149 /*
2150 RFC 6298 section 2.5
2151 A maximum value MAY be placed on RTO provided it is at least 60 seconds.
2152 */
2153 pcb->rto = (s16_t)(LWIP_MIN(TCP_MAX_RTO_TICKS, LWIP_MAX(TCP_MIN_RTO_TICKS, pcb->rto)));
2154
2155 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_receive: RTO %"U16_F" (%"U16_F" milliseconds)\n",
2156 pcb->rto, (u16_t)(pcb->rto * TCP_SLOW_INTERVAL)));
2157
2158 pcb->rttest = 0;
2159 }
2160 #if LWIP_TCP_TLP_SUPPORT
2161 /* draft-dukkipati-tcpm-tcp-loss-probe-01: During ACK processing */
2162 /* a) Cancel any existing PTO. */
2163 if (pcb->tlp_time_stamp) {
2164 LWIP_TCP_TLP_CLEAR_VARS(pcb);
2165 if ((pcb->unacked != NULL) && (pcb->rtime == -1)) {
2166 pcb->rtime = 0;
2167 }
2168 }
2169 /*
2170 b) If conditions for scheduling PTO allow:
2171 -> Reschedule PTO relative to the ACK receipt time.
2172 */
2173 wnd = LWIP_MIN(pcb->snd_wnd, pcb->cwnd);
2174 tcp_tlp_schedule_probe(pcb, wnd);
2175 #endif /* LWIP_TCP_TLP_SUPPORT */
2176 }
2177
2178 /* If the incoming segment contains data, we must process it
2179 further unless the pcb already received a FIN.
2180 (RFC 793, chapter 3.9, "SEGMENT ARRIVES" in states CLOSE-WAIT, CLOSING,
2181 LAST-ACK and TIME-WAIT: "Ignore the segment text.") */
2182 if ((tcplen > 0) && (pcb->state < CLOSE_WAIT)) {
2183 /* For sending SACK options along with data */
2184 #if LWIP_SACK_DATA_SEG_PIGGYBACK
2185 if (pcb->sack_seq != NULL) {
2186 do {
2187 struct _sack_seq *ptr = NULL;
2188 ptr = pcb->sack_seq->next;
2189 mem_free(pcb->sack_seq);
2190 pcb->sack_seq = ptr;
2191 } while (pcb->sack_seq != NULL);
2192 pcb->sack_seq = NULL;
2193 }
2194 #endif
2195
2196 /* This code basically does three things:
2197
2198 +) If the incoming segment contains data that is the next
2199 in-sequence data, this data is passed to the application. This
2200 might involve trimming the first edge of the data. The rcv_nxt
2201 variable and the advertised window are adjusted.
2202
2203 +) If the incoming segment has data that is above the next
2204 sequence number expected (->rcv_nxt), the segment is placed on
2205 the ->ooseq queue. This is done by finding the appropriate
2206 place in the ->ooseq queue (which is ordered by sequence
2207 number) and trim the segment in both ends if needed. An
2208 immediate ACK is sent to indicate that we received an
2209 out-of-sequence segment.
2210
2211 +) Finally, we check if the first segment on the ->ooseq queue
2212 now is in sequence (i.e., if rcv_nxt >= ooseq->seqno). If
2213 rcv_nxt > ooseq->seqno, we must trim the first edge of the
2214 segment on ->ooseq before we adjust rcv_nxt. The data in the
2215 segments that are now on sequence are chained onto the
2216 incoming segment so that we only need to call the application
2217 once.
2218 */
2219
2220 /* First, we check if we must trim the first edge. We have to do
2221 this if the sequence number of the incoming segment is less
2222 than rcv_nxt, and the sequence number plus the length of the
2223 segment is larger than rcv_nxt. */
2224 /* if (TCP_SEQ_LT(seqno, pcb->rcv_nxt)) {
2225 if (TCP_SEQ_LT(pcb->rcv_nxt, seqno + tcplen)) {*/
2226 if (TCP_SEQ_BETWEEN(pcb->rcv_nxt, seqno + 1, seqno + tcplen - 1)) {
2227 /* Trimming the first edge is done by pushing the payload
2228 pointer in the pbuf downwards. This is somewhat tricky since
2229 we do not want to discard the full contents of the pbuf up to
2230 the new starting point of the data since we have to keep the
2231 TCP header which is present in the first pbuf in the chain.
2232
2233 What is done is really quite a nasty hack: the first pbuf in
2234 the pbuf chain is pointed to by inseg.p. Since we need to be
2235 able to deallocate the whole pbuf, we cannot change this
2236 inseg.p pointer to point to any of the later pbufs in the
2237 chain. Instead, we point the ->payload pointer in the first
2238 pbuf to data in one of the later pbufs. We also set the
2239 inseg.data pointer to point to the right place. This way, the
2240 ->p pointer will still point to the first pbuf, but the
2241 ->p->payload pointer will point to data in another pbuf.
2242
2243 After we are done with adjusting the pbuf pointers we must
2244 adjust the ->data pointer in the seg and the segment
2245 length.*/
2246
2247 struct pbuf *p = inseg.p;
2248 u32_t off32 = pcb->rcv_nxt - seqno;
2249 u16_t new_tot_len, off;
2250 LWIP_ASSERT("inseg.p != NULL", inseg.p);
2251 LWIP_ASSERT("insane offset!", (off32 < 0xffff));
2252 off = (u16_t)off32;
2253 LWIP_ASSERT("pbuf too short!", (((s32_t)inseg.p->tot_len) >= off));
2254 inseg.len -= off;
2255 new_tot_len = (u16_t)(inseg.p->tot_len - off);
2256 while (p->len < off) {
2257 off -= p->len;
2258 /* all pbufs up to and including this one have len==0, so tot_len is equal */
2259 p->tot_len = new_tot_len;
2260 p->len = 0;
2261 p = p->next;
2262 }
2263 /* cannot fail... */
2264 pbuf_remove_header(p, off);
2265 inseg.tcphdr->seqno = seqno = pcb->rcv_nxt;
2266 } else {
2267 if (TCP_SEQ_LT(seqno, pcb->rcv_nxt)) {
2268 /* the whole segment is < rcv_nxt */
2269 /* must be a duplicate of a packet that has already been correctly handled */
2270
2271 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: duplicate seqno %"U32_F"\n", seqno));
2272 tcp_ack_now(pcb);
2273 }
2274 }
2275
2276 /* The sequence number must be within the window (above rcv_nxt
2277 and below rcv_nxt + rcv_wnd) in order to be further
2278 processed. */
2279 if (TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt,
2280 pcb->rcv_nxt + pcb->rcv_wnd - 1)) {
2281 if (pcb->rcv_nxt == seqno) {
2282 /* The incoming segment is the next in sequence. We check if
2283 we have to trim the end of the segment and update rcv_nxt
2284 and pass the data to the application. */
2285 tcplen = TCP_TCPLEN(&inseg);
2286
2287 if (tcplen > pcb->rcv_wnd) {
2288 LWIP_DEBUGF(TCP_INPUT_DEBUG,
2289 ("tcp_receive: other end overran receive window"
2290 "seqno %"U32_F" len %"U16_F" right edge %"U32_F"\n",
2291 seqno, tcplen, pcb->rcv_nxt + pcb->rcv_wnd));
2292 if (TCPH_FLAGS(inseg.tcphdr) & TCP_FIN) {
2293 /* Must remove the FIN from the header as we're trimming
2294 * that byte of sequence-space from the packet */
2295 TCPH_FLAGS_SET(inseg.tcphdr, TCPH_FLAGS(inseg.tcphdr) & ~(unsigned int)TCP_FIN);
2296 }
2297 /* Adjust length of segment to fit in the window. */
2298 TCPWND_CHECK16(pcb->rcv_wnd);
2299 inseg.len = (u16_t)pcb->rcv_wnd;
2300 if (TCPH_FLAGS(inseg.tcphdr) & TCP_SYN) {
2301 inseg.len -= 1;
2302 }
2303 pbuf_realloc(inseg.p, inseg.len);
2304 tcplen = TCP_TCPLEN(&inseg);
2305 LWIP_ASSERT("tcp_receive: segment not trimmed correctly to rcv_wnd\n",
2306 (seqno + tcplen) == (pcb->rcv_nxt + pcb->rcv_wnd));
2307 }
2308 #if TCP_QUEUE_OOSEQ
2309 /* Received in-sequence data, adjust ooseq data if:
2310 - FIN has been received or
2311 - inseq overlaps with ooseq */
2312 if (pcb->ooseq != NULL) {
2313 if (TCPH_FLAGS(inseg.tcphdr) & TCP_FIN) {
2314 LWIP_DEBUGF(TCP_INPUT_DEBUG,
2315 ("tcp_receive: received in-order FIN, binning ooseq queue\n"));
2316 /* Received in-order FIN means anything that was received
2317 * out of order must now have been received in-order, so
2318 * bin the ooseq queue */
2319 while (pcb->ooseq != NULL) {
2320 struct tcp_seg *old_ooseq = pcb->ooseq;
2321 pcb->ooseq = pcb->ooseq->next;
2322 tcp_seg_free(old_ooseq);
2323 }
2324 } else {
2325 struct tcp_seg *next = pcb->ooseq;
2326 /* Remove all segments on ooseq that are covered by inseg already.
2327 * FIN is copied from ooseq to inseg if present. */
2328 while (next &&
2329 TCP_SEQ_GEQ(seqno + tcplen,
2330 next->tcphdr->seqno + next->len)) {
2331 struct tcp_seg *tmp;
2332 /* inseg cannot have FIN here (already processed above) */
2333 if ((TCPH_FLAGS(next->tcphdr) & TCP_FIN) != 0 &&
2334 (TCPH_FLAGS(inseg.tcphdr) & TCP_SYN) == 0) {
2335 TCPH_SET_FLAG(inseg.tcphdr, TCP_FIN);
2336 tcplen = TCP_TCPLEN(&inseg);
2337 }
2338 tmp = next;
2339 next = next->next;
2340 tcp_seg_free(tmp);
2341 }
2342 /* Now trim right side of inseg if it overlaps with the first
2343 * segment on ooseq */
2344 if (next &&
2345 TCP_SEQ_GT(seqno + tcplen,
2346 next->tcphdr->seqno)) {
2347 /* inseg cannot have FIN here (already processed above) */
2348 inseg.len = (u16_t)(next->tcphdr->seqno - seqno);
2349 if (TCPH_FLAGS(inseg.tcphdr) & TCP_SYN) {
2350 inseg.len -= 1;
2351 }
2352 pbuf_realloc(inseg.p, inseg.len);
2353 tcplen = TCP_TCPLEN(&inseg);
2354 LWIP_ASSERT("tcp_receive: segment not trimmed correctly to ooseq queue\n",
2355 (seqno + tcplen) == next->tcphdr->seqno);
2356 }
2357 pcb->ooseq = next;
2358 }
2359 }
2360 #endif /* TCP_QUEUE_OOSEQ */
2361
2362 pcb->rcv_nxt = seqno + tcplen;
2363
2364 /* Update the receiver's (our) window. */
2365 LWIP_ASSERT("tcp_receive: tcplen > rcv_wnd\n", pcb->rcv_wnd >= tcplen);
2366 pcb->rcv_wnd -= tcplen;
2367
2368 tcp_update_rcv_ann_wnd(pcb);
2369
2370 /* If there is data in the segment, we make preparations to
2371 pass this up to the application. The ->recv_data variable
2372 is used for holding the pbuf that goes to the
2373 application. The code for reassembling out-of-sequence data
2374 chains its data on this pbuf as well.
2375
2376 If the segment was a FIN, we set the TF_GOT_FIN flag that will
2377 be used to indicate to the application that the remote side has
2378 closed its end of the connection. */
2379 if (inseg.p->tot_len > 0) {
2380 recv_data = inseg.p;
2381 /* Since this pbuf now is the responsibility of the
2382 application, we delete our reference to it so that we won't
2383 (mistakingly) deallocate it. */
2384 inseg.p = NULL;
2385 }
2386 if (TCPH_FLAGS(inseg.tcphdr) & TCP_FIN) {
2387 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: received FIN.\n"));
2388 recv_flags |= TF_GOT_FIN;
2389 }
2390
2391 #if TCP_QUEUE_OOSEQ
2392 /* We now check if we have segments on the ->ooseq queue that
2393 are now in sequence. */
2394 while (pcb->ooseq != NULL &&
2395 pcb->ooseq->tcphdr->seqno == pcb->rcv_nxt) {
2396
2397 struct tcp_seg *cseg = pcb->ooseq;
2398 seqno = pcb->ooseq->tcphdr->seqno;
2399
2400 pcb->rcv_nxt += TCP_TCPLEN(cseg);
2401 LWIP_ASSERT("tcp_receive: ooseq tcplen > rcv_wnd\n",
2402 pcb->rcv_wnd >= TCP_TCPLEN(cseg));
2403 pcb->rcv_wnd -= TCP_TCPLEN(cseg);
2404
2405 tcp_update_rcv_ann_wnd(pcb);
2406
2407 if (cseg->p->tot_len > 0) {
2408 /* Chain this pbuf onto the pbuf that we will pass to
2409 the application. */
2410 /* With window scaling, this can overflow recv_data->tot_len, but
2411 that's not a problem since we explicitly fix that before passing
2412 recv_data to the application. */
2413 if (recv_data) {
2414 pbuf_cat(recv_data, cseg->p);
2415 } else {
2416 recv_data = cseg->p;
2417 }
2418 cseg->p = NULL;
2419 }
2420 if (TCPH_FLAGS(cseg->tcphdr) & TCP_FIN) {
2421 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_receive: dequeued FIN.\n"));
2422 recv_flags |= TF_GOT_FIN;
2423 if (pcb->state == ESTABLISHED) { /* force passive close or we can move to active close */
2424 pcb->state = CLOSE_WAIT;
2425 }
2426 }
2427
2428 pcb->ooseq = cseg->next;
2429 tcp_seg_free(cseg);
2430 }
2431 #endif /* TCP_QUEUE_OOSEQ */
2432
2433 #if LWIP_SACK
2434 tcp_update_sack_for_received_ooseq_segs(pcb);
2435 #else
2436 /* Acknowledge the segment(s). */
2437 tcp_ack(pcb);
2438 #endif
2439
2440 #if LWIP_IPV6 && LWIP_ND6_TCP_REACHABILITY_HINTS
2441 if (ip_current_is_v6()) {
2442 /* Inform neighbor reachability of forward progress. */
2443 nd6_reachability_hint(ip6_current_src_addr());
2444 }
2445 #endif /* LWIP_IPV6 && LWIP_ND6_TCP_REACHABILITY_HINTS*/
2446
2447 } else {
2448 /* We get here if the incoming segment is out-of-sequence. */
2449
2450 #if LWIP_SACK && TCP_QUEUE_OOSEQ
2451 if (pcb->flags & TF_SACK) {
2452 update_ooseq_order_and_cnt(pcb, &inseg.order);
2453 }
2454 #endif
2455
2456 #if TCP_QUEUE_OOSEQ
2457 /* We queue the segment on the ->ooseq queue. */
2458 if (pcb->ooseq == NULL) {
2459 pcb->ooseq = tcp_seg_copy(&inseg);
2460 } else {
2461 /* If the queue is not empty, we walk through the queue and
2462 try to find a place where the sequence number of the
2463 incoming segment is between the sequence numbers of the
2464 previous and the next segment on the ->ooseq queue. That is
2465 the place where we put the incoming segment. If needed, we
2466 trim the second edges of the previous and the incoming
2467 segment so that it will fit into the sequence.
2468
2469 If the incoming segment has the same sequence number as a
2470 segment on the ->ooseq queue, we discard the segment that
2471 contains less data. */
2472
2473 struct tcp_seg *next, *prev = NULL;
2474 for (next = pcb->ooseq; next != NULL; next = next->next) {
2475 if (seqno == next->tcphdr->seqno) {
2476 /* The sequence number of the incoming segment is the
2477 same as the sequence number of the segment on
2478 ->ooseq. We check the lengths to see which one to
2479 discard. */
2480 if (inseg.len > next->len) {
2481 /* The incoming segment is larger than the old
2482 segment. We replace some segments with the new
2483 one. */
2484 struct tcp_seg *cseg = tcp_seg_copy(&inseg);
2485 if (cseg != NULL) {
2486 if (prev != NULL) {
2487 prev->next = cseg;
2488 } else {
2489 pcb->ooseq = cseg;
2490 }
2491 tcp_oos_insert_segment(cseg, next);
2492 }
2493 break;
2494 } else {
2495 /* Either the lengths are the same or the incoming
2496 segment was smaller than the old one; in either
2497 case, we ditch the incoming segment. */
2498 break;
2499 }
2500 } else {
2501 if (prev == NULL) {
2502 if (TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {
2503 /* The sequence number of the incoming segment is lower
2504 than the sequence number of the first segment on the
2505 queue. We put the incoming segment first on the
2506 queue. */
2507 struct tcp_seg *cseg = tcp_seg_copy(&inseg);
2508 if (cseg != NULL) {
2509 pcb->ooseq = cseg;
2510 tcp_oos_insert_segment(cseg, next);
2511 }
2512 break;
2513 }
2514 } else {
2515 /*if (TCP_SEQ_LT(prev->tcphdr->seqno, seqno) &&
2516 TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {*/
2517 if (TCP_SEQ_BETWEEN(seqno, prev->tcphdr->seqno + 1, next->tcphdr->seqno - 1)) {
2518 /* The sequence number of the incoming segment is in
2519 between the sequence numbers of the previous and
2520 the next segment on ->ooseq. We trim trim the previous
2521 segment, delete next segments that included in received segment
2522 and trim received, if needed. */
2523 struct tcp_seg *cseg = tcp_seg_copy(&inseg);
2524 if (cseg != NULL) {
2525 if (TCP_SEQ_GT(prev->tcphdr->seqno + prev->len, seqno)) {
2526 /* We need to trim the prev segment. */
2527 prev->len = (u16_t)(seqno - prev->tcphdr->seqno);
2528 pbuf_realloc(prev->p, prev->len);
2529 }
2530 prev->next = cseg;
2531 tcp_oos_insert_segment(cseg, next);
2532 }
2533 break;
2534 }
2535 }
2536
2537 /* We don't use 'prev' below, so let's set it to current 'next'.
2538 This way even if we break the loop below, 'prev' will be pointing
2539 at the segment right in front of the newly added one. */
2540 prev = next;
2541
2542 /* If the "next" segment is the last segment on the
2543 ooseq queue, we add the incoming segment to the end
2544 of the list. */
2545 if (next->next == NULL &&
2546 TCP_SEQ_GT(seqno, next->tcphdr->seqno)) {
2547 if (TCPH_FLAGS(next->tcphdr) & TCP_FIN) {
2548 /* segment "next" already contains all data */
2549 break;
2550 }
2551 next->next = tcp_seg_copy(&inseg);
2552 if (next->next != NULL) {
2553 if (TCP_SEQ_GT(next->tcphdr->seqno + next->len, seqno)) {
2554 /* We need to trim the last segment. */
2555 next->len = (u16_t)(seqno - next->tcphdr->seqno);
2556 pbuf_realloc(next->p, next->len);
2557 }
2558 /* check if the remote side overruns our receive window */
2559 if (TCP_SEQ_GT((u32_t)tcplen + seqno, pcb->rcv_nxt + (u32_t)pcb->rcv_wnd)) {
2560 LWIP_DEBUGF(TCP_INPUT_DEBUG,
2561 ("tcp_receive: other end overran receive window"
2562 "seqno %"U32_F" len %"U16_F" right edge %"U32_F"\n",
2563 seqno, tcplen, pcb->rcv_nxt + pcb->rcv_wnd));
2564 if (TCPH_FLAGS(next->next->tcphdr) & TCP_FIN) {
2565 /* Must remove the FIN from the header as we're trimming
2566 * that byte of sequence-space from the packet */
2567 TCPH_FLAGS_SET(next->next->tcphdr, TCPH_FLAGS(next->next->tcphdr) & ~TCP_FIN);
2568 }
2569 /* Adjust length of segment to fit in the window. */
2570 next->next->len = (u16_t)(pcb->rcv_nxt + pcb->rcv_wnd - seqno);
2571 pbuf_realloc(next->next->p, next->next->len);
2572 tcplen = TCP_TCPLEN(next->next);
2573 LWIP_ASSERT("tcp_receive: segment not trimmed correctly to rcv_wnd\n",
2574 (seqno + tcplen) == (pcb->rcv_nxt + pcb->rcv_wnd));
2575 }
2576 }
2577 break;
2578 }
2579 }
2580 }
2581 }
2582 #if defined(TCP_OOSEQ_BYTES_LIMIT) || defined(TCP_OOSEQ_PBUFS_LIMIT)
2583 {
2584 /* Check that the data on ooseq doesn't exceed one of the limits
2585 and throw away everything above that limit. */
2586 #ifdef TCP_OOSEQ_BYTES_LIMIT
2587 const u32_t ooseq_max_blen = TCP_OOSEQ_BYTES_LIMIT(pcb);
2588 u32_t ooseq_blen = 0;
2589 #endif
2590 #ifdef TCP_OOSEQ_PBUFS_LIMIT
2591 const u16_t ooseq_max_qlen = TCP_OOSEQ_PBUFS_LIMIT(pcb);
2592 u16_t ooseq_qlen = 0;
2593 #endif
2594 struct tcp_seg *next, *prev = NULL;
2595 for (next = pcb->ooseq; next != NULL; prev = next, next = next->next) {
2596 struct pbuf *p = next->p;
2597 int stop_here = 0;
2598 #ifdef TCP_OOSEQ_BYTES_LIMIT
2599 ooseq_blen += p->tot_len;
2600 if (ooseq_blen > ooseq_max_blen) {
2601 stop_here = 1;
2602 }
2603 #endif
2604 #ifdef TCP_OOSEQ_PBUFS_LIMIT
2605 ooseq_qlen += pbuf_clen(p);
2606 if (ooseq_qlen > ooseq_max_qlen) {
2607 stop_here = 1;
2608 }
2609 #endif
2610 if (stop_here) {
2611 /* too much ooseq data, dump this and everything after it */
2612 tcp_segs_free(next);
2613 if (prev == NULL) {
2614 /* first ooseq segment is too much, dump the whole queue */
2615 pcb->ooseq = NULL;
2616 } else {
2617 /* just dump 'next' and everything after it */
2618 prev->next = NULL;
2619 }
2620 break;
2621 }
2622 }
2623 }
2624 #endif /* TCP_OOSEQ_BYTES_LIMIT || TCP_OOSEQ_PBUFS_LIMIT */
2625 #endif /* TCP_QUEUE_OOSEQ */
2626
2627 #if LWIP_SACK
2628 tcp_update_sack_for_received_ooseq_segs(pcb);
2629 #else
2630 /* We send the ACK packet after we've (potentially) dealt with SACKs,
2631 so they can be included in the acknowledgment. */
2632 tcp_send_empty_ack(pcb);
2633 #endif
2634 }
2635 } else {
2636 /* The incoming segment is not within the window. */
2637 tcp_send_empty_ack(pcb);
2638 }
2639 } else {
2640 /* Segments with length 0 is taken care of here. Segments that
2641 fall out of the window are ACKed. */
2642 if (!TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt + pcb->rcv_wnd - 1)) {
2643 tcp_ack_now(pcb);
2644 }
2645 }
2646 }
2647
2648 /**
2649 * Parses the options contained in the incoming segment.
2650 *
2651 * Called from tcp_listen_input() and tcp_process().
2652 * Currently, only the MSS option is supported!
2653 *
2654 * @param pcb the tcp_pcb for which a segment arrived
2655 */
2656 static void
2657 tcp_parseopt(struct tcp_pcb *pcb)
2658 {
2659 u16_t c, max_c;
2660 u16_t mss;
2661 u8_t *opts = NULL, opt;
2662 #if LWIP_TCP_TIMESTAMPS
2663 u32_t tsval;
2664 #endif
2665
2666 #if LWIP_SACK
2667 u8_t sack_perm_rx = 0;
2668 #endif
2669
2670 LWIP_ASSERT("tcp_parseopt: invalid pcb", pcb != NULL);
2671
2672 /* Parse the TCP MSS option, if present. */
2673 if (TCPH_HDRLEN_BYTES(tcphdr) > TCP_HLEN) {
2674 opts = (u8_t *)tcphdr + TCP_HLEN;
2675
2676 /* tcp headerlen check for > 5 is done, so value wont be negative */
2677 max_c = (u16_t)(TCPH_HDRLEN_BYTES(tcphdr) - TCP_HLEN);
2678 for (c = 0; c < max_c;) {
2679 opt = opts[c];
2680 switch (opt) {
2681 case LWIP_TCP_OPT_EOL:
2682 /* End of options. */
2683 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: EOL\n"));
2684 goto RETURN;
2685 case LWIP_TCP_OPT_NOP:
2686 /* NOP option. */
2687 ++c;
2688 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: NOP\n"));
2689 break;
2690 case LWIP_TCP_OPT_MSS:
2691 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: MSS\n"));
2692 if (c + LWIP_TCP_OPT_LEN_MSS > max_c || opts[c + 1] != LWIP_TCP_OPT_LEN_MSS) {
2693 /* Bad length */
2694 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
2695 goto RETURN;
2696 }
2697 /* An MSS option with the right option length. */
2698 mss = (u16_t)((opts[c + 2] << 8) | opts[c + 3]);
2699
2700 /* Check for mss only in SYN packets, else ignore it */
2701 if (flags & TCP_SYN) {
2702 /* Limit the mss to the 536 and prevent division by zero */
2703 pcb->rcv_mss = (u16_t)((mss == 0) ? TCP_DEFAULT_MSS : mss);
2704 }
2705
2706 /* Advance to next option */
2707 c = (u16_t)(c + LWIP_TCP_OPT_LEN_MSS);
2708 break;
2709
2710 #if LWIP_WND_SCALE
2711 case LWIP_TCP_OPT_WS:
2712 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: WND_SCALE\n"));
2713 if (c + LWIP_TCP_OPT_LEN_WS > max_c || opts[c + 1] != LWIP_TCP_OPT_LEN_WS) {
2714 /* Bad length */
2715 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
2716 goto RETURN;
2717 }
2718 /* An WND_SCALE option with the right option length. */
2719 /* If syn was received with wnd scale option,
2720 activate wnd scale opt, but only if this is not a retransmission */
2721 if ((flags & TCP_SYN) && !(pcb->flags & TF_WND_SCALE)) {
2722 pcb->snd_scale = opts[c + 2];
2723 /* the largest scale is 14 */
2724 if (pcb->snd_scale > 14U) {
2725 pcb->snd_scale = 14U;
2726 }
2727 pcb->rcv_scale = TCP_RCV_SCALE;
2728 pcb->flags |= TF_WND_SCALE;
2729 /* window scaling is enabled, we can use the full receive window */
2730 LWIP_ASSERT("window not at default value", pcb->rcv_wnd == TCPWND_MIN16(TCP_WND));
2731 LWIP_ASSERT("window not at default value", pcb->rcv_ann_wnd == TCPWND_MIN16(TCP_WND));
2732 pcb->rcv_wnd = pcb->rcv_ann_wnd = TCP_WND;
2733 }
2734 /* Advance to next option */
2735 c = (u16_t)(c + LWIP_TCP_OPT_LEN_WS);
2736 break;
2737 #endif /* LWIP_WND_SCALE */
2738
2739 /* Adding for SACK */
2740 #if LWIP_SACK
2741 case LWIP_TCP_OPT_SACK_PERMITTED:
2742 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: TCP SACK permitted\n"));
2743 if (c + LWIP_TCP_OPT_LEN_SACK_PERMITTED > max_c || opts[c + 1] != LWIP_TCP_OPT_LEN_SACK_PERMITTED) {
2744 /* Bad length */
2745 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
2746 goto RETURN;
2747 }
2748 /*
2749 * If syn was received with tcp sack option,
2750 * activate sack opt
2751 */
2752 if (flags & TCP_SYN) {
2753 tcp_parseopt_sack_permitted(pcb);
2754 sack_perm_rx = 1;
2755 }
2756
2757 c = (u16_t)(c + LWIP_TCP_OPT_LEN_SACK_PERMITTED);
2758 break;
2759 case LWIP_TCP_OPT_SACK:
2760 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: TCP SACK\n"));
2761
2762 /* Length check: (a) Non Zero (b)less than the max allowed options length (c) length should be 8*n+2 */
2763 if (((c + LWIP_TCP_OPT_SACK_MIN_LEN) > max_c) ||
2764 (!opts[c + 1]) || ((c + opts[c + 1]) > max_c) || ((opts[c + 1] - 2) % 8 != 0)) {
2765 /* Bad length */
2766 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
2767 goto RETURN;
2768 }
2769
2770 if (pcb->flags & TF_SACK) {
2771 pcb->num_sacks = tcp_parseopt_sack(opts, c);
2772 }
2773 /* Advance to next option */
2774 c = (u16_t)(c + opts[c + 1]);
2775 break;
2776 #endif
2777
2778 #if LWIP_TCP_TIMESTAMPS
2779 case LWIP_TCP_OPT_TS:
2780 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: TS\n"));
2781 if ((c + LWIP_TCP_OPT_LEN_TS > max_c) || (opts[c + 1] != LWIP_TCP_OPT_LEN_TS)) {
2782 /* Bad length */
2783 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
2784 goto RETURN;
2785 }
2786 /* TCP timestamp option with valid length */
2787 tsval = (opts[c + 2]) | (opts[c + 3] << 8) | (opts[c + 4] << 16) | (u32_t)(opts[c + 5] << 24);
2788 if (flags & TCP_SYN) {
2789 pcb->ts_recent = lwip_ntohl(tsval);
2790 /* Enable sending timestamps in every segment now that we know
2791 the remote host supports it. */
2792 pcb->flags |= TF_TIMESTAMP;
2793 } else if (TCP_SEQ_BETWEEN(pcb->ts_lastacksent, seqno, seqno + tcplen)) {
2794 pcb->ts_recent = lwip_ntohl(tsval);
2795 }
2796 /* Advance to next option */
2797 c = (u16_t)(c + LWIP_TCP_OPT_LEN_TS);
2798 break;
2799 #endif /* LWIP_TCP_TIMESTAMPS */
2800 default:
2801 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: other\n"));
2802 if ((max_c <= c + 1) || opts[c + 1] == 0) {
2803 LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_parseopt: bad length\n"));
2804 /* If the length field is zero, the options are malformed
2805 and we don't process them further. */
2806 goto RETURN;
2807 }
2808 /* All other options have a length field, so that we easily
2809 can skip past them. */
2810 c = (u16_t)(c + opts[c + 1]);
2811 break;
2812 }
2813 }
2814 }
2815
2816 RETURN:
2817
2818 #if LWIP_SACK
2819 if (flags & TCP_SYN) {
2820 /* If flags field is set with TF_SACK but, the peer does not support SACK,
2821 then disable SACK for the connection */
2822 if ((pcb->flags & TF_SACK) && (sack_perm_rx == 0)) {
2823 pcb->flags = (tcpflags_t)(pcb->flags & (~TF_SACK));
2824 }
2825 }
2826 #endif
2827 return;
2828 }
2829
2830 void
2831 tcp_trigger_input_pcb_close(void)
2832 {
2833 recv_flags |= TF_CLOSED;
2834 }
2835
2836 #endif /* LWIP_TCP */
2837