• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * @file
3  * Transmission Control Protocol, outgoing traffic
4  *
5  * The output functions of TCP.
6  *
7  * There are two distinct ways for TCP segments to get sent:
8  * - queued data: these are segments transferring data or segments containing
9  *   SYN or FIN (which both count as one sequence number). They are created as
10  *   struct @ref pbuf together with a struct tcp_seg and enqueue to the
11  *   unsent list of the pcb. They are sent by tcp_output:
12  *   - @ref tcp_write : creates data segments
13  *   - @ref tcp_split_unsent_seg : splits a data segment
14  *   - @ref tcp_enqueue_flags : creates SYN-only or FIN-only segments
15  *   - @ref tcp_output / tcp_output_segment : finalize the tcp header
16  *      (e.g. sequence numbers, options, checksum) and output to IP
17  *   - the various tcp_rexmit functions shuffle around segments between the
18  *     unsent an unacked lists to retransmit them
19  *   - tcp_create_segment and tcp_pbuf_prealloc allocate pbuf and
20  *     segment for these functions
21  * - direct send: these segments don't contain data but control the connection
22  *   behaviour. They are created as pbuf only and sent directly without
23  *   enqueueing them:
24  *   - @ref tcp_send_empty_ack sends an ACK-only segment
25  *   - @ref tcp_rst sends a RST segment
26  *   - @ref tcp_keepalive sends a keepalive segment
27  *   - @ref tcp_zero_window_probe sends a window probe segment
28  *   - tcp_output_alloc_header allocates a header-only pbuf for these functions
29  */
30 
31 /*
32  * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
33  * All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without modification,
36  * are permitted provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright notice,
39  *    this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright notice,
41  *    this list of conditions and the following disclaimer in the documentation
42  *    and/or other materials provided with the distribution.
43  * 3. The name of the author may not be used to endorse or promote products
44  *    derived from this software without specific prior written permission.
45  *
46  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
47  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
48  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
49  * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
50  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
51  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
54  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
55  * OF SUCH DAMAGE.
56  *
57  * This file is part of the lwIP TCP/IP stack.
58  *
59  * Author: Adam Dunkels <adam@sics.se>
60  *
61  */
62 
63 #include "lwip/opt.h"
64 
65 #if LWIP_TCP /* don't build if not configured for use in lwipopts.h */
66 
67 #include "lwip/priv/tcp_priv.h"
68 #include "lwip/def.h"
69 #include "lwip/mem.h"
70 #include "lwip/memp.h"
71 #include "lwip/ip_addr.h"
72 #include "lwip/netif.h"
73 #include "lwip/inet_chksum.h"
74 #include "lwip/stats.h"
75 #include "lwip/ip6.h"
76 #include "lwip/ip6_addr.h"
77 #include "lwip/sys.h"
78 #include "lwip/tcp_sack.h"
79 
80 #include <string.h>
81 
82 #ifdef LWIP_HOOK_FILENAME
83 #include LWIP_HOOK_FILENAME
84 #endif
85 
86 /* Allow to add custom TCP header options by defining this hook */
87 #ifdef LWIP_HOOK_TCP_OUT_TCPOPT_LENGTH
88 #define LWIP_TCP_OPT_LENGTH_SEGMENT(flags, pcb) LWIP_HOOK_TCP_OUT_TCPOPT_LENGTH(pcb, LWIP_TCP_OPT_LENGTH(flags))
89 #else
90 #define LWIP_TCP_OPT_LENGTH_SEGMENT(flags, pcb) LWIP_TCP_OPT_LENGTH(flags)
91 #endif
92 
93 /* Define some copy-macros for checksum-on-copy so that the code looks
94    nicer by preventing too many ifdef's. */
95 #if TCP_CHECKSUM_ON_COPY
96 #define TCP_DATA_COPY(dst, src, len, seg) do { \
97   tcp_seg_add_chksum(LWIP_CHKSUM_COPY(dst, src, len), \
98                      len, &seg->chksum, &seg->chksum_swapped); \
99   seg->flags |= TF_SEG_DATA_CHECKSUMMED; } while(0)
100 #define TCP_DATA_COPY2(dst, src, len, chksum, chksum_swapped)  \
101   tcp_seg_add_chksum(LWIP_CHKSUM_COPY(dst, src, len), len, chksum, chksum_swapped);
102 #else /* TCP_CHECKSUM_ON_COPY*/
103 #define TCP_DATA_COPY(dst, src, len, seg)                     MEMCPY(dst, src, len)
104 #define TCP_DATA_COPY2(dst, src, len, chksum, chksum_swapped) MEMCPY(dst, src, len)
105 #endif /* TCP_CHECKSUM_ON_COPY*/
106 
107 /** Define this to 1 for an extra check that the output checksum is valid
108  * (usefule when the checksum is generated by the application, not the stack) */
109 #ifndef TCP_CHECKSUM_ON_COPY_SANITY_CHECK
110 #define TCP_CHECKSUM_ON_COPY_SANITY_CHECK   0
111 #endif
112 /* Allow to override the failure of sanity check from warning to e.g. hard failure */
113 #if TCP_CHECKSUM_ON_COPY_SANITY_CHECK
114 #ifndef TCP_CHECKSUM_ON_COPY_SANITY_CHECK_FAIL
115 #define TCP_CHECKSUM_ON_COPY_SANITY_CHECK_FAIL(msg) LWIP_DEBUGF(TCP_DEBUG | LWIP_DBG_LEVEL_WARNING, msg)
116 #endif
117 #endif
118 
119 #if TCP_OVERSIZE
120 /** The size of segment pbufs created when TCP_OVERSIZE is enabled */
121 #ifndef TCP_OVERSIZE_CALC_LENGTH
122 #define TCP_OVERSIZE_CALC_LENGTH(length) ((length) + TCP_OVERSIZE)
123 #endif
124 #endif
125 
126 /* tcp_route: common code that returns a fixed bound netif or calls ip_route */
127 static struct netif *
tcp_route(const struct tcp_pcb * pcb,const ip_addr_t * src,const ip_addr_t * dst)128 tcp_route(const struct tcp_pcb *pcb, const ip_addr_t *src, const ip_addr_t *dst)
129 {
130   LWIP_UNUSED_ARG(src); /* in case IPv4-only and source-based routing is disabled */
131 
132   if ((pcb != NULL) && (pcb->netif_idx != NETIF_NO_INDEX)) {
133     return netif_get_by_index(pcb->netif_idx);
134   } else {
135     if (pcb == NULL) {
136       return ip_route(src, dst);
137     } else {
138       return ip_route_pcb(dst, (struct ip_pcb*)pcb);
139     }
140   }
141 }
142 
143 /**
144  * Create a TCP segment with prefilled header.
145  *
146  * Called by @ref tcp_write, @ref tcp_enqueue_flags and @ref tcp_split_unsent_seg
147  *
148  * @param pcb Protocol control block for the TCP connection.
149  * @param p pbuf that is used to hold the TCP header.
150  * @param hdrflags TCP flags for header.
151  * @param seqno TCP sequence number of this packet
152  * @param optflags options to include in TCP header
153  * @return a new tcp_seg pointing to p, or NULL.
154  * The TCP header is filled in except ackno and wnd.
155  * p is freed on failure.
156  */
157 static struct tcp_seg *
tcp_create_segment(const struct tcp_pcb * pcb,struct pbuf * p,u8_t hdrflags,u32_t seqno,u8_t optflags)158 tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags)
159 {
160   struct tcp_seg *seg;
161   u8_t optlen = (u8_t)(LWIP_TCP_OPT_LENGTH(optflags));
162 
163 #if LWIP_SACK_DATA_SEG_PIGGYBACK
164   u8_t cnt = 0;
165 
166   if (optflags & TF_SEG_OPTS_SACK_OPTIONS) {
167     cnt = tcp_get_sack_block_count_for_send(pcb, optlen);
168     optlen = (u8_t)(optlen + LWIP_TCP_SACK_OPT_LENGTH(cnt));
169   }
170 #endif
171 
172   if ((seg = (struct tcp_seg *)memp_malloc(MEMP_TCP_SEG)) == NULL) {
173     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_create_segment: no memory.\n"));
174     pbuf_free(p);
175     return NULL;
176   }
177   seg->flags = optflags;
178   seg->next = NULL;
179   seg->p = p;
180   LWIP_ASSERT("p->tot_len >= optlen", p->tot_len >= optlen);
181   seg->len = p->tot_len - optlen;
182 
183 #if LWIP_SACK
184   tcp_update_sack_fields_for_new_seg(seg);
185 #endif /* LWIP_SACK */
186 
187 #if TCP_OVERSIZE_DBGCHECK
188   seg->oversize_left = 0;
189 #endif /* TCP_OVERSIZE_DBGCHECK */
190 #if TCP_CHECKSUM_ON_COPY
191   seg->chksum = 0;
192   seg->chksum_swapped = 0;
193   /* check optflags */
194   LWIP_ASSERT("invalid optflags passed: TF_SEG_DATA_CHECKSUMMED",
195               (optflags & TF_SEG_DATA_CHECKSUMMED) == 0);
196 #endif /* TCP_CHECKSUM_ON_COPY */
197 
198 #if LWIP_SACK && DRIVER_STATUS_CHECK
199   seg->seg_type = SEG_TYPE_NONE;
200 #endif
201 
202   /* build TCP header */
203   if (pbuf_add_header(p, TCP_HLEN)) {
204     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_create_segment: no room for TCP header in pbuf.\n"));
205     TCP_STATS_INC(tcp.err);
206     tcp_seg_free(seg);
207     return NULL;
208   }
209   seg->tcphdr = (struct tcp_hdr *)seg->p->payload;
210   seg->tcphdr->src = lwip_htons(pcb->local_port);
211   seg->tcphdr->dest = lwip_htons(pcb->remote_port);
212   seg->tcphdr->seqno = lwip_htonl(seqno);
213   /* ackno is set in tcp_output */
214   TCPH_HDRLEN_FLAGS_SET(seg->tcphdr, (5 + optlen / 4), hdrflags);
215   /* wnd and chksum are set in tcp_output */
216   seg->tcphdr->urgp = 0;
217   return seg;
218 }
219 
220 /**
221  * Allocate a PBUF_RAM pbuf, perhaps with extra space at the end.
222  *
223  * This function is like pbuf_alloc(layer, length, PBUF_RAM) except
224  * there may be extra bytes available at the end.
225  *
226  * Called by @ref tcp_write
227  *
228  * @param layer flag to define header size.
229  * @param length size of the pbuf's payload.
230  * @param max_length maximum usable size of payload+oversize.
231  * @param oversize pointer to a u16_t that will receive the number of usable tail bytes.
232  * @param pcb The TCP connection that will enqueue the pbuf.
233  * @param apiflags API flags given to tcp_write.
234  * @param first_seg true when this pbuf will be used in the first enqueued segment.
235  */
236 #if TCP_OVERSIZE
237 static struct pbuf *
tcp_pbuf_prealloc(pbuf_layer layer,u16_t length,u16_t max_length,u16_t * oversize,const struct tcp_pcb * pcb,u8_t apiflags,u8_t first_seg)238 tcp_pbuf_prealloc(pbuf_layer layer, u16_t length, u16_t max_length,
239                   u16_t *oversize, const struct tcp_pcb *pcb, u8_t apiflags,
240                   u8_t first_seg)
241 {
242   struct pbuf *p;
243   u16_t alloc = length;
244 
245   LWIP_ASSERT("tcp_pbuf_prealloc: invalid oversize", oversize != NULL);
246   LWIP_ASSERT("tcp_pbuf_prealloc: invalid pcb", pcb != NULL);
247 
248 #if LWIP_NETIF_TX_SINGLE_PBUF
249   LWIP_UNUSED_ARG(max_length);
250   LWIP_UNUSED_ARG(pcb);
251   LWIP_UNUSED_ARG(apiflags);
252   LWIP_UNUSED_ARG(first_seg);
253   alloc = max_length;
254 #else /* LWIP_NETIF_TX_SINGLE_PBUF */
255   if (length < max_length) {
256     /* Should we allocate an oversized pbuf, or just the minimum
257      * length required? If tcp_write is going to be called again
258      * before this segment is transmitted, we want the oversized
259      * buffer. If the segment will be transmitted immediately, we can
260      * save memory by allocating only length. We use a simple
261      * heuristic based on the following information:
262      *
263      * Did the user set TCP_WRITE_FLAG_MORE?
264      *
265      * Will the Nagle algorithm defer transmission of this segment?
266      */
267     if ((apiflags & TCP_WRITE_FLAG_MORE) ||
268         (!(pcb->flags & TF_NODELAY) &&
269          (!first_seg ||
270           pcb->unsent != NULL ||
271           pcb->unacked != NULL))) {
272       alloc = LWIP_MIN(max_length, LWIP_MEM_ALIGN_SIZE(TCP_OVERSIZE_CALC_LENGTH(length)));
273     }
274   }
275 #endif /* LWIP_NETIF_TX_SINGLE_PBUF */
276   p = pbuf_alloc(layer, alloc, PBUF_RAM);
277   if (p == NULL) {
278     return NULL;
279   }
280   LWIP_ASSERT("need unchained pbuf", p->next == NULL);
281   *oversize = p->len - length;
282   /* trim p->len to the currently used size */
283   p->len = p->tot_len = length;
284   return p;
285 }
286 #else /* TCP_OVERSIZE */
287 #define tcp_pbuf_prealloc(layer, length, mx, os, pcb, api, fst) pbuf_alloc((layer), (length), PBUF_RAM)
288 #endif /* TCP_OVERSIZE */
289 
290 #if TCP_CHECKSUM_ON_COPY
291 /** Add a checksum of newly added data to the segment.
292  *
293  * Called by tcp_write and tcp_split_unsent_seg.
294  */
295 static void
tcp_seg_add_chksum(u16_t chksum,u16_t len,u16_t * seg_chksum,u8_t * seg_chksum_swapped)296 tcp_seg_add_chksum(u16_t chksum, u16_t len, u16_t *seg_chksum,
297                    u8_t *seg_chksum_swapped)
298 {
299   u32_t helper;
300   /* add chksum to old chksum and fold to u16_t */
301   helper = chksum + *seg_chksum;
302   chksum = FOLD_U32T(helper);
303   if ((len & 1) != 0) {
304     *seg_chksum_swapped = 1 - *seg_chksum_swapped;
305     chksum = SWAP_BYTES_IN_WORD(chksum);
306   }
307   *seg_chksum = chksum;
308 }
309 #endif /* TCP_CHECKSUM_ON_COPY */
310 
311 /** Checks if tcp_write is allowed or not (checks state, snd_buf and snd_queuelen).
312  *
313  * @param pcb the tcp pcb to check for
314  * @param len length of data to send (checked agains snd_buf)
315  * @return ERR_OK if tcp_write is allowed to proceed, another err_t otherwise
316  */
317 static err_t
tcp_write_checks(struct tcp_pcb * pcb,u16_t len)318 tcp_write_checks(struct tcp_pcb *pcb, u16_t len)
319 {
320   LWIP_ASSERT("tcp_write_checks: invalid pcb", pcb != NULL);
321 
322   /* connection is in invalid state for data transmission? */
323   if ((pcb->state != ESTABLISHED) &&
324       (pcb->state != CLOSE_WAIT) &&
325       (pcb->state != SYN_SENT) &&
326       (pcb->state != SYN_RCVD)) {
327     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_STATE | LWIP_DBG_LEVEL_SEVERE, ("tcp_write() called in invalid state\n"));
328     return (pcb->state > ESTABLISHED) ? ERR_PIPE : ERR_CONN;
329   } else if (len == 0) {
330     return ERR_OK;
331   }
332 
333   /* fail on too much data */
334   if (len > pcb->snd_buf) {
335     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SEVERE, ("tcp_write: too much data (len=%"U16_F" > snd_buf=%"U32_F")\n",
336                 len, pcb->snd_buf));
337     tcp_set_flags(pcb, TF_NAGLEMEMERR);
338     return ERR_MEM;
339   }
340 
341   LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_write: queuelen: %"TCPWNDSIZE_F"\n", (tcpwnd_size_t)pcb->snd_queuelen));
342 
343   /* If total number of pbufs on the unsent/unacked queues exceeds the
344    * configured maximum, return an error */
345   /* check for configured max queuelen and possible overflow */
346   if ((pcb->snd_queuelen >= pcb->snd_queuelen_max) || (pcb->snd_queuelen > TCP_SNDQUEUELEN_OVERFLOW)) {
347     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SEVERE,
348                 ("tcp_write: too long queue %"TCPWNDSIZE_F" (max %"TCPWNDSIZE_F")\n",
349                  pcb->snd_queuelen, pcb->snd_queuelen_max));
350     TCP_STATS_INC(tcp.memerr);
351     tcp_set_flags(pcb, TF_NAGLEMEMERR);
352     return ERR_MEM;
353   }
354   if (pcb->snd_queuelen != 0) {
355     LWIP_ASSERT("tcp_write: pbufs on queue => at least one queue non-empty",
356                 pcb->unacked != NULL || pcb->unsent != NULL);
357   } else {
358     LWIP_ASSERT("tcp_write: no pbufs on queue => both queues empty",
359                 pcb->unacked == NULL && pcb->unsent == NULL);
360   }
361   return ERR_OK;
362 }
363 
364 /**
365  * @ingroup tcp_raw
366  * Write data for sending (but does not send it immediately).
367  *
368  * It waits in the expectation of more data being sent soon (as
369  * it can send them more efficiently by combining them together).
370  * To prompt the system to send data now, call tcp_output() after
371  * calling tcp_write().
372  *
373  * This function enqueues the data pointed to by the argument dataptr. The length of
374  * the data is passed as the len parameter. The apiflags can be one or more of:
375  * - TCP_WRITE_FLAG_COPY: indicates whether the new memory should be allocated
376  *   for the data to be copied into. If this flag is not given, no new memory
377  *   should be allocated and the data should only be referenced by pointer. This
378  *   also means that the memory behind dataptr must not change until the data is
379  *   ACKed by the remote host
380  * - TCP_WRITE_FLAG_MORE: indicates that more data follows. If this is omitted,
381  *   the PSH flag is set in the last segment created by this call to tcp_write.
382  *   If this flag is given, the PSH flag is not set.
383  *
384  * The tcp_write() function will fail and return ERR_MEM if the length
385  * of the data exceeds the current send buffer size or if the length of
386  * the queue of outgoing segment is larger than the upper limit defined
387  * in lwipopts.h. The number of bytes available in the output queue can
388  * be retrieved with the tcp_sndbuf() function.
389  *
390  * The proper way to use this function is to call the function with at
391  * most tcp_sndbuf() bytes of data. If the function returns ERR_MEM,
392  * the application should wait until some of the currently enqueued
393  * data has been successfully received by the other host and try again.
394  *
395  * @param pcb Protocol control block for the TCP connection to enqueue data for.
396  * @param arg Pointer to the data to be enqueued for sending.
397  * @param len Data length in bytes
398  * @param apiflags combination of following flags :
399  * - TCP_WRITE_FLAG_COPY (0x01) data will be copied into memory belonging to the stack
400  * - TCP_WRITE_FLAG_MORE (0x02) for TCP connection, PSH flag will not be set on last segment sent,
401  * @return ERR_OK if enqueued, another err_t on error
402  */
403 err_t
tcp_write(struct tcp_pcb * pcb,const void * arg,u16_t len,u8_t apiflags)404 tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags)
405 {
406   struct pbuf *concat_p = NULL;
407   struct tcp_seg *last_unsent = NULL, *seg = NULL, *prev_seg = NULL, *queue = NULL;
408   u16_t pos = 0; /* position in 'arg' data */
409   tcpwnd_size_t queuelen;
410   u8_t optlen;
411   u8_t optflags = 0;
412 #if TCP_OVERSIZE
413   u16_t oversize = 0;
414   u16_t oversize_used = 0;
415 #if TCP_OVERSIZE_DBGCHECK
416   u16_t oversize_add = 0;
417 #endif /* TCP_OVERSIZE_DBGCHECK*/
418 #endif /* TCP_OVERSIZE */
419   u16_t extendlen = 0;
420 #if TCP_CHECKSUM_ON_COPY
421   u16_t concat_chksum = 0;
422   u8_t concat_chksum_swapped = 0;
423   u16_t concat_chksummed = 0;
424 #endif /* TCP_CHECKSUM_ON_COPY */
425   err_t err;
426   u16_t mss_local;
427 #if LWIP_SACK_DATA_SEG_PIGGYBACK
428   u8_t cnt = 0;
429 #endif
430 
431   LWIP_ERROR("tcp_write: invalid pcb", pcb != NULL, return ERR_ARG);
432 
433   /* don't allocate segments bigger than half the maximum window we ever received */
434   mss_local = LWIP_MIN(pcb->mss, TCPWND_MIN16(pcb->snd_wnd_max / 2));
435   mss_local = mss_local ? mss_local : pcb->mss;
436 
437   LWIP_ASSERT_CORE_LOCKED();
438 
439 #if LWIP_NETIF_TX_SINGLE_PBUF
440   /* Always copy to try to create single pbufs for TX */
441   apiflags |= TCP_WRITE_FLAG_COPY;
442 #endif /* LWIP_NETIF_TX_SINGLE_PBUF */
443 
444   LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_write(pcb=%p, data=%p, len=%"U16_F", apiflags=%"U16_F")\n",
445                                  (void *)pcb, arg, len, (u16_t)apiflags));
446   LWIP_ERROR("tcp_write: arg == NULL && len > 0 (programmer violates API)",
447              (arg != NULL) || (len == 0), return ERR_ARG;);
448 
449   err = tcp_write_checks(pcb, len);
450   if (err != ERR_OK) {
451     return err;
452   }
453   queuelen = pcb->snd_queuelen;
454 
455 #if LWIP_TCP_TIMESTAMPS
456   if ((pcb->flags & TF_TIMESTAMP)) {
457     /* Make sure the timestamp option is only included in data segments if we
458        agreed about it with the remote host. */
459     optflags = TF_SEG_OPTS_TS;
460     optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(TF_SEG_OPTS_TS, pcb);
461     /* ensure that segments can hold at least one data byte... */
462     mss_local = LWIP_MAX(mss_local, LWIP_TCP_OPT_LEN_TS + 1);
463   } else
464 #endif /* LWIP_TCP_TIMESTAMPS */
465   {
466     optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(0, pcb);
467   }
468 
469 #if LWIP_SACK_DATA_SEG_PIGGYBACK
470   if (pcb->flags & TF_SACK) {
471     cnt = tcp_get_sack_block_count_for_send(pcb, optlen);
472     if (cnt) {
473       optflags |= TF_SEG_OPTS_SACK_OPTIONS;
474       optlen = (u8_t)(optlen + LWIP_TCP_SACK_OPT_LENGTH(cnt));
475     }
476   }
477 #endif
478 
479   /*
480    * TCP segmentation is done in three phases with increasing complexity:
481    *
482    * 1. Copy data directly into an oversized pbuf.
483    * 2. Chain a new pbuf to the end of pcb->unsent.
484    * 3. Create new segments.
485    *
486    * We may run out of memory at any point. In that case we must
487    * return ERR_MEM and not change anything in pcb. Therefore, all
488    * changes are recorded in local variables and committed at the end
489    * of the function. Some pcb fields are maintained in local copies:
490    *
491    * queuelen = pcb->snd_queuelen
492    * oversize = pcb->unsent_oversize
493    *
494    * These variables are set consistently by the phases:
495    *
496    * seg points to the last segment tampered with.
497    *
498    * pos records progress as data is segmented.
499    */
500 
501   /* Find the tail of the unsent queue. */
502   if (pcb->unsent != NULL) {
503     u16_t space;
504     u16_t unsent_optlen;
505 
506     /* @todo: this could be sped up by keeping last_unsent in the pcb */
507     for (last_unsent = pcb->unsent; last_unsent->next != NULL;
508          last_unsent = last_unsent->next);
509 
510     /* Usable space at the end of the last unsent segment */
511     unsent_optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(last_unsent->flags, pcb);
512 
513 #if LWIP_SACK_DATA_SEG_PIGGYBACK
514     if (last_unsent->flags & TF_SEG_OPTS_SACK_OPTIONS) {
515       cnt = tcp_get_sack_block_count_for_send(pcb, (u8_t)unsent_optlen);
516       /*
517         Here, it is not possible to predict previous sack blocks, so ensure that overflow does not happen.
518         send whatever SACK blocks which can be sent on wire with available space
519         This validation avoids space underflow...
520       */
521       if (mss_local >= (last_unsent->len + unsent_optlen + LWIP_TCP_SACK_OPT_LENGTH(cnt))) {
522         unsent_optlen = (u8_t)(unsent_optlen + LWIP_TCP_SACK_OPT_LENGTH(cnt));
523       }
524     }
525 #endif
526 
527     LWIP_ASSERT("mss_local is too small", mss_local >= last_unsent->len + unsent_optlen);
528     space = mss_local - (last_unsent->len + unsent_optlen);
529 
530     /*
531      * Phase 1: Copy data directly into an oversized pbuf.
532      *
533      * The number of bytes copied is recorded in the oversize_used
534      * variable. The actual copying is done at the bottom of the
535      * function.
536      */
537 #if TCP_OVERSIZE
538 #if TCP_OVERSIZE_DBGCHECK
539     /* check that pcb->unsent_oversize matches last_unsent->oversize_left */
540     LWIP_ASSERT("unsent_oversize mismatch (pcb vs. last_unsent)",
541                 pcb->unsent_oversize == last_unsent->oversize_left);
542 #endif /* TCP_OVERSIZE_DBGCHECK */
543     oversize = pcb->unsent_oversize;
544     if (oversize > 0) {
545       LWIP_ASSERT("inconsistent oversize vs. space", oversize <= space);
546       seg = last_unsent;
547       oversize_used = LWIP_MIN(space, LWIP_MIN(oversize, len));
548       pos += oversize_used;
549       oversize -= oversize_used;
550       space -= oversize_used;
551     }
552     /* now we are either finished or oversize is zero */
553     LWIP_ASSERT("inconsistent oversize vs. len", (oversize == 0) || (pos == len));
554 #endif /* TCP_OVERSIZE */
555 
556 #if !LWIP_NETIF_TX_SINGLE_PBUF
557     /*
558      * Phase 2: Chain a new pbuf to the end of pcb->unsent.
559      *
560      * As an exception when NOT copying the data, if the given data buffer
561      * directly follows the last unsent data buffer in memory, extend the last
562      * ROM pbuf reference to the buffer, thus saving a ROM pbuf allocation.
563      *
564      * We don't extend segments containing SYN/FIN flags or options
565      * (len==0). The new pbuf is kept in concat_p and pbuf_cat'ed at
566      * the end.
567      *
568      * This phase is skipped for LWIP_NETIF_TX_SINGLE_PBUF as we could only execute
569      * it after rexmit puts a segment from unacked to unsent and at this point,
570      * oversize info is lost.
571      */
572 #if LWIP_SACK
573     if ((last_unsent->len >= mss_local) && (pos < len) && (space > 0) && (last_unsent->len > 0))
574 #else
575     if ((pos < len) && (space > 0) && (last_unsent->len > 0))
576 #endif
577     {
578       u16_t seglen = LWIP_MIN(space, len - pos);
579       seg = last_unsent;
580 
581       /* Create a pbuf with a copy or reference to seglen bytes. We
582        * can use PBUF_RAW here since the data appears in the middle of
583        * a segment. A header will never be prepended. */
584       if (apiflags & TCP_WRITE_FLAG_COPY) {
585         /* Data is copied */
586         if ((concat_p = tcp_pbuf_prealloc(PBUF_RAW, seglen, space, &oversize, pcb, apiflags, 1)) == NULL) {
587           LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
588                       ("tcp_write : could not allocate memory for pbuf copy size %"U16_F"\n",
589                        seglen));
590           goto memerr;
591         }
592 #if TCP_OVERSIZE_DBGCHECK
593         oversize_add = oversize;
594 #endif /* TCP_OVERSIZE_DBGCHECK */
595         TCP_DATA_COPY2(concat_p->payload, (const u8_t *)arg + pos, seglen, &concat_chksum, &concat_chksum_swapped);
596 #if TCP_CHECKSUM_ON_COPY
597         concat_chksummed += seglen;
598 #endif /* TCP_CHECKSUM_ON_COPY */
599         queuelen += pbuf_clen(concat_p);
600       } else {
601         /* Data is not copied */
602         /* If the last unsent pbuf is of type PBUF_ROM, try to extend it. */
603         struct pbuf *p;
604         for (p = last_unsent->p; p->next != NULL; p = p->next);
605         if (((p->type_internal & (PBUF_TYPE_FLAG_STRUCT_DATA_CONTIGUOUS | PBUF_TYPE_FLAG_DATA_VOLATILE)) == 0) &&
606             (const u8_t *)p->payload + p->len == (const u8_t *)arg) {
607           LWIP_ASSERT("tcp_write: ROM pbufs cannot be oversized", pos == 0);
608           extendlen = seglen;
609         } else {
610           if ((concat_p = pbuf_alloc(PBUF_RAW, seglen, PBUF_ROM)) == NULL) {
611             LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
612                         ("tcp_write: could not allocate memory for zero-copy pbuf\n"));
613             goto memerr;
614           }
615           /* reference the non-volatile payload data */
616           ((struct pbuf_rom *)concat_p)->payload = (const u8_t *)arg + pos;
617           queuelen += pbuf_clen(concat_p);
618         }
619 #if TCP_CHECKSUM_ON_COPY
620         /* calculate the checksum of nocopy-data */
621         tcp_seg_add_chksum(~inet_chksum((const u8_t *)arg + pos, seglen), seglen,
622                            &concat_chksum, &concat_chksum_swapped);
623         concat_chksummed += seglen;
624 #endif /* TCP_CHECKSUM_ON_COPY */
625       }
626 
627       pos += seglen;
628     }
629 #endif /* !LWIP_NETIF_TX_SINGLE_PBUF */
630   } else {
631 #if TCP_OVERSIZE
632     LWIP_ASSERT("unsent_oversize mismatch (pcb->unsent is NULL)",
633                 pcb->unsent_oversize == 0);
634 #endif /* TCP_OVERSIZE */
635   }
636 
637   /*
638    * Phase 3: Create new segments.
639    *
640    * The new segments are chained together in the local 'queue'
641    * variable, ready to be appended to pcb->unsent.
642    */
643   while (pos < len) {
644     struct pbuf *p;
645     u16_t left = len - pos;
646     u16_t max_len = mss_local - optlen;
647     u16_t seglen = LWIP_MIN(left, max_len);
648 #if TCP_CHECKSUM_ON_COPY
649     u16_t chksum = 0;
650     u8_t chksum_swapped = 0;
651 #endif /* TCP_CHECKSUM_ON_COPY */
652 
653     if (apiflags & TCP_WRITE_FLAG_COPY) {
654       /* If copy is set, memory should be allocated and data copied
655        * into pbuf */
656       if ((p = tcp_pbuf_prealloc(PBUF_TRANSPORT, seglen + optlen, mss_local, &oversize, pcb, apiflags, queue == NULL)) == NULL) {
657         LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_write : could not allocate memory for pbuf copy size %"U16_F"\n", seglen));
658         goto memerr;
659       }
660       LWIP_ASSERT("tcp_write: check that first pbuf can hold the complete seglen",
661                   (p->len >= seglen));
662       TCP_DATA_COPY2((char *)p->payload + optlen, (const u8_t *)arg + pos, seglen, &chksum, &chksum_swapped);
663     } else {
664       /* Copy is not set: First allocate a pbuf for holding the data.
665        * Since the referenced data is available at least until it is
666        * sent out on the link (as it has to be ACKed by the remote
667        * party) we can safely use PBUF_ROM instead of PBUF_REF here.
668        */
669       struct pbuf *p2;
670 #if TCP_OVERSIZE
671       LWIP_ASSERT("oversize == 0", oversize == 0);
672 #endif /* TCP_OVERSIZE */
673       if ((p2 = pbuf_alloc(PBUF_TRANSPORT, seglen, PBUF_ROM)) == NULL) {
674         LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_write: could not allocate memory for zero-copy pbuf\n"));
675         goto memerr;
676       }
677 #if TCP_CHECKSUM_ON_COPY
678       /* calculate the checksum of nocopy-data */
679       chksum = ~inet_chksum((const u8_t *)arg + pos, seglen);
680       if (seglen & 1) {
681         chksum_swapped = 1;
682         chksum = SWAP_BYTES_IN_WORD(chksum);
683       }
684 #endif /* TCP_CHECKSUM_ON_COPY */
685       /* reference the non-volatile payload data */
686       ((struct pbuf_rom *)p2)->payload = (const u8_t *)arg + pos;
687 
688       /* Second, allocate a pbuf for the headers. */
689       if ((p = pbuf_alloc(PBUF_TRANSPORT, optlen, PBUF_RAM)) == NULL) {
690         /* If allocation fails, we have to deallocate the data pbuf as
691          * well. */
692         pbuf_free(p2);
693         LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_write: could not allocate memory for header pbuf\n"));
694         goto memerr;
695       }
696       /* Concatenate the headers and data pbufs together. */
697       pbuf_cat(p/*header*/, p2/*data*/);
698     }
699 
700     queuelen += pbuf_clen(p);
701 
702     /* Now that there are more segments queued, we check again if the
703      * length of the queue exceeds the configured maximum or
704      * overflows. */
705     if ((queuelen > pcb->snd_queuelen_max) || (queuelen > TCP_SNDQUEUELEN_OVERFLOW)) {
706       LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
707                   ("tcp_write: queue too long %"TCPWNDSIZE_F" (%"TCPWNDSIZE_F")\n",
708                    queuelen, pcb->snd_queuelen_max));
709       pbuf_free(p);
710       goto memerr;
711     }
712 
713     if ((seg = tcp_create_segment(pcb, p, 0, pcb->snd_lbb + pos, optflags)) == NULL) {
714       goto memerr;
715     }
716 #if TCP_OVERSIZE_DBGCHECK
717     seg->oversize_left = oversize;
718 #endif /* TCP_OVERSIZE_DBGCHECK */
719 #if TCP_CHECKSUM_ON_COPY
720     seg->chksum = chksum;
721     seg->chksum_swapped = chksum_swapped;
722     seg->flags |= TF_SEG_DATA_CHECKSUMMED;
723 #endif /* TCP_CHECKSUM_ON_COPY */
724 
725     /* first segment of to-be-queued data? */
726     if (queue == NULL) {
727       queue = seg;
728     } else {
729       /* Attach the segment to the end of the queued segments */
730       LWIP_ASSERT("prev_seg != NULL", prev_seg != NULL);
731       prev_seg->next = seg;
732     }
733     /* remember last segment of to-be-queued data for next iteration */
734     prev_seg = seg;
735 
736     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_TRACE, ("tcp_write: queueing %"U32_F":%"U32_F"\n",
737                 lwip_ntohl(seg->tcphdr->seqno),
738                 lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg)));
739 
740     pos += seglen;
741   }
742 
743   /*
744    * All three segmentation phases were successful. We can commit the
745    * transaction.
746    */
747 #if TCP_OVERSIZE_DBGCHECK
748   if ((last_unsent != NULL) && (oversize_add != 0)) {
749     last_unsent->oversize_left += oversize_add;
750   }
751 #endif /* TCP_OVERSIZE_DBGCHECK */
752 
753   /*
754    * Phase 1: If data has been added to the preallocated tail of
755    * last_unsent, we update the length fields of the pbuf chain.
756    */
757 #if TCP_OVERSIZE
758   if ((last_unsent != NULL) && (oversize_used > 0)) {
759     struct pbuf *p;
760     /* Bump tot_len of whole chain, len of tail */
761     for (p = last_unsent->p; p; p = p->next) {
762       p->tot_len += oversize_used;
763       if (p->next == NULL) {
764         TCP_DATA_COPY((char *)p->payload + p->len, arg, oversize_used, last_unsent);
765         p->len += oversize_used;
766       }
767     }
768     last_unsent->len += oversize_used;
769 #if TCP_OVERSIZE_DBGCHECK
770     LWIP_ASSERT("last_unsent->oversize_left >= oversize_used",
771                 last_unsent->oversize_left >= oversize_used);
772     last_unsent->oversize_left -= oversize_used;
773 #endif /* TCP_OVERSIZE_DBGCHECK */
774   }
775   pcb->unsent_oversize = oversize;
776 #endif /* TCP_OVERSIZE */
777 
778   /*
779    * Phase 2: concat_p can be concatenated onto last_unsent->p, unless we
780    * determined that the last ROM pbuf can be extended to include the new data.
781    */
782   if (last_unsent != NULL) {
783     if (concat_p != NULL) {
784       LWIP_ASSERT("tcp_write: cannot concatenate when pcb->unsent is empty",
785                   (last_unsent != NULL));
786       pbuf_cat(last_unsent->p, concat_p);
787       last_unsent->len += concat_p->tot_len;
788     } else if (extendlen > 0) {
789       struct pbuf *p;
790       LWIP_ASSERT("tcp_write: extension of reference requires reference",
791                   last_unsent != NULL && last_unsent->p != NULL);
792       for (p = last_unsent->p; p->next != NULL; p = p->next) {
793         p->tot_len += extendlen;
794       }
795       p->tot_len += extendlen;
796       p->len += extendlen;
797       last_unsent->len += extendlen;
798     }
799   }
800 #if TCP_CHECKSUM_ON_COPY
801   if (concat_chksummed) {
802     LWIP_ASSERT("tcp_write: concat checksum needs concatenated data",
803                 concat_p != NULL || extendlen > 0);
804     /*if concat checksumm swapped - swap it back */
805     if (concat_chksum_swapped) {
806       concat_chksum = SWAP_BYTES_IN_WORD(concat_chksum);
807     }
808     if (last_unsent != NULL) {
809       tcp_seg_add_chksum(concat_chksum, concat_chksummed, &last_unsent->chksum,
810                          &last_unsent->chksum_swapped);
811       last_unsent->flags |= TF_SEG_DATA_CHECKSUMMED;
812     }
813   }
814 #endif /* TCP_CHECKSUM_ON_COPY */
815 
816   /*
817    * Phase 3: Append queue to pcb->unsent. Queue may be NULL, but that
818    * is harmless
819    */
820   if (last_unsent == NULL) {
821     pcb->unsent = queue;
822   } else {
823     last_unsent->next = queue;
824   }
825 
826   /*
827    * Finally update the pcb state.
828    */
829   pcb->snd_lbb += len;
830   pcb->snd_buf -= len;
831   pcb->snd_queuelen = queuelen;
832 
833   LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_write: %"S16_F" (after enqueued)\n",
834                                pcb->snd_queuelen));
835   if (pcb->snd_queuelen != 0) {
836     LWIP_ASSERT("tcp_write: valid queue length",
837                 pcb->unacked != NULL || pcb->unsent != NULL);
838   }
839 
840   /* Set the PSH flag in the last segment that we enqueued. */
841   if (seg != NULL && seg->tcphdr != NULL && ((apiflags & TCP_WRITE_FLAG_MORE) == 0)) {
842     TCPH_SET_FLAG(seg->tcphdr, TCP_PSH);
843   }
844 
845   return ERR_OK;
846 memerr:
847   tcp_set_flags(pcb, TF_NAGLEMEMERR);
848   TCP_STATS_INC(tcp.memerr);
849 
850   if (concat_p != NULL) {
851     pbuf_free(concat_p);
852   }
853   if (queue != NULL) {
854     tcp_segs_free(queue);
855   }
856   if (pcb->snd_queuelen != 0) {
857     LWIP_ASSERT("tcp_write: valid queue length", pcb->unacked != NULL ||
858                 pcb->unsent != NULL);
859   }
860   LWIP_DEBUGF(TCP_QLEN_DEBUG | LWIP_DBG_STATE, ("tcp_write: %"TCPWNDSIZE_F" (with mem err)\n", pcb->snd_queuelen));
861   return ERR_MEM;
862 }
863 
864 /**
865  * Split segment on the head of the unsent queue.  If return is not
866  * ERR_OK, existing head remains intact
867  *
868  * The split is accomplished by creating a new TCP segment and pbuf
869  * which holds the remainder payload after the split.  The original
870  * pbuf is trimmed to new length.  This allows splitting of read-only
871  * pbufs
872  *
873  * @param pcb the tcp_pcb for which to split the unsent head
874  * @param split the amount of payload to remain in the head
875  */
876 err_t
tcp_split_unsent_seg(struct tcp_pcb * pcb,u16_t split)877 tcp_split_unsent_seg(struct tcp_pcb *pcb, u16_t split)
878 {
879   struct tcp_seg *seg = NULL, *useg = NULL;
880   struct pbuf *p = NULL;
881   u8_t optlen;
882   u8_t optflags;
883   u8_t split_flags;
884   u8_t remainder_flags;
885   u16_t remainder;
886   u16_t offset;
887 #if TCP_CHECKSUM_ON_COPY
888   u16_t chksum = 0;
889   u8_t chksum_swapped = 0;
890   struct pbuf *q;
891 #endif /* TCP_CHECKSUM_ON_COPY */
892 
893   LWIP_ASSERT("tcp_split_unsent_seg: invalid pcb", pcb != NULL);
894 
895   useg = pcb->unsent;
896   if (useg == NULL) {
897     return ERR_MEM;
898   }
899 
900   if (split == 0) {
901     LWIP_ASSERT("Can't split segment into length 0", 0);
902     return ERR_VAL;
903   }
904 
905   if (useg->len <= split) {
906     return ERR_OK;
907   }
908 
909   LWIP_ASSERT("split <= mss", split <= pcb->mss);
910   LWIP_ASSERT("useg->len > 0", useg->len > 0);
911 
912   /* We should check that we don't exceed TCP_SND_QUEUELEN but we need
913    * to split this packet so we may actually exceed the max value by
914    * one!
915    */
916   LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_enqueue: split_unsent_seg: %u\n", (unsigned int)pcb->snd_queuelen));
917 
918   optflags = useg->flags;
919 #if TCP_CHECKSUM_ON_COPY
920   /* Remove since checksum is not stored until after tcp_create_segment() */
921   optflags &= ~TF_SEG_DATA_CHECKSUMMED;
922 #endif /* TCP_CHECKSUM_ON_COPY */
923   optlen = LWIP_TCP_OPT_LENGTH(optflags);
924   remainder = useg->len - split;
925 
926   /* Create new pbuf for the remainder of the split */
927   p = pbuf_alloc(PBUF_TRANSPORT, remainder + optlen, PBUF_RAM);
928   if (p == NULL) {
929     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
930                 ("tcp_split_unsent_seg: could not allocate memory for pbuf remainder %u\n", remainder));
931     goto memerr;
932   }
933 
934   /* Offset into the original pbuf is past TCP/IP headers, options, and split amount */
935   offset = useg->p->tot_len - useg->len + split;
936   /* Copy remainder into new pbuf, headers and options will not be filled out */
937   if (pbuf_copy_partial(useg->p, (u8_t *)p->payload + optlen, remainder, offset ) != remainder) {
938     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
939                 ("tcp_split_unsent_seg: could not copy pbuf remainder %u\n", remainder));
940     goto memerr;
941   }
942 #if TCP_CHECKSUM_ON_COPY
943   /* calculate the checksum on remainder data */
944   tcp_seg_add_chksum(~inet_chksum((const u8_t *)p->payload + optlen, remainder), remainder,
945                      &chksum, &chksum_swapped);
946 #endif /* TCP_CHECKSUM_ON_COPY */
947 
948   /* Options are created when calling tcp_output() */
949 
950   /* Migrate flags from original segment */
951   split_flags = TCPH_FLAGS(useg->tcphdr);
952   remainder_flags = 0; /* ACK added in tcp_output() */
953 
954   if (split_flags & TCP_PSH) {
955     split_flags &= ~TCP_PSH;
956     remainder_flags |= TCP_PSH;
957   }
958   if (split_flags & TCP_FIN) {
959     split_flags &= ~TCP_FIN;
960     remainder_flags |= TCP_FIN;
961   }
962   /* SYN should be left on split, RST should not be present with data */
963 
964   seg = tcp_create_segment(pcb, p, remainder_flags, lwip_ntohl(useg->tcphdr->seqno) + split, optflags);
965   if (seg == NULL) {
966     p = NULL; /* Freed by tcp_create_segment */
967     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
968                 ("tcp_split_unsent_seg: could not create new TCP segment\n"));
969     goto memerr;
970   }
971 
972 #if TCP_CHECKSUM_ON_COPY
973   seg->chksum = chksum;
974   seg->chksum_swapped = chksum_swapped;
975   seg->flags |= TF_SEG_DATA_CHECKSUMMED;
976 #endif /* TCP_CHECKSUM_ON_COPY */
977 
978   /* Remove this segment from the queue since trimming it may free pbufs */
979   pcb->snd_queuelen -= pbuf_clen(useg->p);
980 
981   /* Trim the original pbuf into our split size.  At this point our remainder segment must be setup
982   successfully because we are modifying the original segment */
983   pbuf_realloc(useg->p, useg->p->tot_len - remainder);
984   useg->len -= remainder;
985   TCPH_SET_FLAG(useg->tcphdr, split_flags);
986 #if TCP_OVERSIZE_DBGCHECK
987   /* By trimming, realloc may have actually shrunk the pbuf, so clear oversize_left */
988   useg->oversize_left = 0;
989 #endif /* TCP_OVERSIZE_DBGCHECK */
990 
991   /* Add back to the queue with new trimmed pbuf */
992   pcb->snd_queuelen += pbuf_clen(useg->p);
993 
994 #if TCP_CHECKSUM_ON_COPY
995   /* The checksum on the split segment is now incorrect. We need to re-run it over the split */
996   useg->chksum = 0;
997   useg->chksum_swapped = 0;
998   q = useg->p;
999   offset = q->tot_len - useg->len; /* Offset due to exposed headers */
1000 
1001   /* Advance to the pbuf where the offset ends */
1002   while (q != NULL && offset > q->len) {
1003     offset -= q->len;
1004     q = q->next;
1005   }
1006   LWIP_ASSERT("Found start of payload pbuf", q != NULL);
1007   /* Checksum the first payload pbuf accounting for offset, then other pbufs are all payload */
1008   for (; q != NULL; offset = 0, q = q->next) {
1009     tcp_seg_add_chksum(~inet_chksum((const u8_t *)q->payload + offset, q->len - offset), q->len - offset,
1010                        &useg->chksum, &useg->chksum_swapped);
1011   }
1012 #endif /* TCP_CHECKSUM_ON_COPY */
1013 
1014   /* Update number of segments on the queues. Note that length now may
1015    * exceed TCP_SND_QUEUELEN! We don't have to touch pcb->snd_buf
1016    * because the total amount of data is constant when packet is split */
1017   pcb->snd_queuelen += pbuf_clen(seg->p);
1018 
1019   /* Finally insert remainder into queue after split (which stays head) */
1020   seg->next = useg->next;
1021   useg->next = seg;
1022 
1023 #if TCP_OVERSIZE
1024   /* If remainder is last segment on the unsent, ensure we clear the oversize amount
1025    * because the remainder is always sized to the exact remaining amount */
1026   if (seg->next == NULL) {
1027     pcb->unsent_oversize = 0;
1028   }
1029 #endif /* TCP_OVERSIZE */
1030 
1031   return ERR_OK;
1032 memerr:
1033   TCP_STATS_INC(tcp.memerr);
1034 
1035   LWIP_ASSERT("seg == NULL", seg == NULL);
1036   if (p != NULL) {
1037     pbuf_free(p);
1038   }
1039 
1040   return ERR_MEM;
1041 }
1042 
1043 /**
1044  * Called by tcp_close() to send a segment including FIN flag but not data.
1045  * This FIN may be added to an existing segment or a new, otherwise empty
1046  * segment is enqueued.
1047  *
1048  * @param pcb the tcp_pcb over which to send a segment
1049  * @return ERR_OK if sent, another err_t otherwise
1050  */
1051 err_t
tcp_send_fin(struct tcp_pcb * pcb)1052 tcp_send_fin(struct tcp_pcb *pcb)
1053 {
1054   LWIP_ASSERT("tcp_send_fin: invalid pcb", pcb != NULL);
1055 
1056   /* first, try to add the fin to the last unsent segment */
1057   if (pcb->unsent != NULL) {
1058     struct tcp_seg *last_unsent;
1059     for (last_unsent = pcb->unsent; last_unsent->next != NULL;
1060          last_unsent = last_unsent->next);
1061 
1062     if ((TCPH_FLAGS(last_unsent->tcphdr) & (TCP_SYN | TCP_FIN | TCP_RST)) == 0) {
1063       /* no SYN/FIN/RST flag in the header, we can add the FIN flag */
1064       TCPH_SET_FLAG(last_unsent->tcphdr, TCP_FIN);
1065       tcp_set_flags(pcb, TF_FIN);
1066       return ERR_OK;
1067     }
1068   }
1069   /* no data, no length, flags, copy=1, no optdata */
1070   return tcp_enqueue_flags(pcb, TCP_FIN);
1071 }
1072 
1073 /**
1074  * Enqueue SYN or FIN for transmission.
1075  *
1076  * Called by @ref tcp_connect, tcp_listen_input, and @ref tcp_close
1077  * (via @ref tcp_send_fin)
1078  *
1079  * @param pcb Protocol control block for the TCP connection.
1080  * @param flags TCP header flags to set in the outgoing segment.
1081  */
1082 err_t
tcp_enqueue_flags(struct tcp_pcb * pcb,u8_t flags)1083 tcp_enqueue_flags(struct tcp_pcb *pcb, u8_t flags)
1084 {
1085   struct pbuf *p;
1086   struct tcp_seg *seg;
1087   u8_t optflags = 0;
1088   u8_t optlen = 0;
1089 
1090   LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_enqueue_flags: queuelen: %"U16_F"\n", (u16_t)pcb->snd_queuelen));
1091 
1092   LWIP_ASSERT("tcp_enqueue_flags: need either TCP_SYN or TCP_FIN in flags (programmer violates API)",
1093               (flags & (TCP_SYN | TCP_FIN)) != 0);
1094   LWIP_ASSERT("tcp_enqueue_flags: invalid pcb", pcb != NULL);
1095 
1096   /* check for configured max queuelen and possible overflow (FIN flag should always come through!) */
1097   if (((pcb->snd_queuelen >= pcb->snd_queuelen_max) || (pcb->snd_queuelen > TCP_SNDQUEUELEN_OVERFLOW)) &&
1098       ((flags & TCP_FIN) == 0)) {
1099     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SEVERE,
1100                 ("tcp_enqueue_flags: too long queue %"TCPWNDSIZE_F" (max %"TCPWNDSIZE_F")\n",
1101                  pcb->snd_queuelen, pcb->snd_queuelen_max));
1102     TCP_STATS_INC(tcp.memerr);
1103     pcb->flags |= TF_NAGLEMEMERR;
1104     return ERR_MEM;
1105   }
1106 
1107   /* No need to check pcb->snd_queuelen if only SYN or FIN are allowed! */
1108 
1109 #if LWIP_SACK_DATA_SEG_PIGGYBACK
1110   if (flags & TCP_FIN) {
1111     if (pcb->flags & TF_SACK) {
1112       optflags |= TF_SEG_OPTS_SACK_OPTIONS;
1113     }
1114   }
1115 #endif
1116 
1117   /* Get options for this segment. This is a special case since this is the
1118      only place where a SYN can be sent. */
1119   if (flags & TCP_SYN) {
1120     optflags = TF_SEG_OPTS_MSS;
1121 #if LWIP_WND_SCALE
1122     if ((pcb->state != SYN_RCVD) || (pcb->flags & TF_WND_SCALE)) {
1123       /* In a <SYN,ACK> (sent in state SYN_RCVD), the window scale option may only
1124          be sent if we received a window scale option from the remote host. */
1125       optflags |= TF_SEG_OPTS_WND_SCALE;
1126     }
1127 #endif /* LWIP_WND_SCALE */
1128 
1129 #if LWIP_SACK
1130     tcp_enqueue_flags_sack(pcb, &optflags);
1131 #endif
1132   }
1133 #if LWIP_TCP_TIMESTAMPS
1134   if ((pcb->flags & TF_TIMESTAMP) || ((flags & TCP_SYN) && (pcb->state != SYN_RCVD))) {
1135     /* Make sure the timestamp option is only included in data segments if we
1136        agreed about it with the remote host (and in active open SYN segments). */
1137     optflags |= TF_SEG_OPTS_TS;
1138   }
1139 #endif /* LWIP_TCP_TIMESTAMPS */
1140   optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb);
1141 
1142 #if LWIP_SACK_DATA_SEG_PIGGYBACK
1143   if (optflags & TF_SEG_OPTS_SACK_OPTIONS) {
1144       u8_t cnt = tcp_get_sack_block_count_for_send(pcb, optlen);
1145       optlen = (u8_t)(optlen + LWIP_TCP_SACK_OPT_LENGTH(cnt));
1146   }
1147 #endif
1148 
1149   /* Allocate pbuf with room for TCP header + options */
1150   if ((p = pbuf_alloc(PBUF_TRANSPORT, optlen, PBUF_RAM)) == NULL) {
1151     tcp_set_flags(pcb, TF_NAGLEMEMERR);
1152     TCP_STATS_INC(tcp.memerr);
1153     return ERR_MEM;
1154   }
1155   LWIP_ASSERT("tcp_enqueue_flags: check that first pbuf can hold optlen",
1156               (p->len >= optlen));
1157 
1158   /* Allocate memory for tcp_seg, and fill in fields. */
1159   if ((seg = tcp_create_segment(pcb, p, flags, pcb->snd_lbb, optflags)) == NULL) {
1160     tcp_set_flags(pcb, TF_NAGLEMEMERR);
1161     TCP_STATS_INC(tcp.memerr);
1162     return ERR_MEM;
1163   }
1164   LWIP_ASSERT("seg->tcphdr not aligned", ((mem_ptr_t)seg->tcphdr % LWIP_MIN(MEM_ALIGNMENT, 4)) == 0);
1165   LWIP_ASSERT("tcp_enqueue_flags: invalid segment length", seg->len == 0);
1166 
1167   LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_TRACE,
1168               ("tcp_enqueue_flags: queueing %"U32_F":%"U32_F" (0x%"X16_F")\n",
1169                lwip_ntohl(seg->tcphdr->seqno),
1170                lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg),
1171                (u16_t)flags));
1172 
1173   /* Now append seg to pcb->unsent queue */
1174   if (pcb->unsent == NULL) {
1175     pcb->unsent = seg;
1176   } else {
1177     struct tcp_seg *useg;
1178     for (useg = pcb->unsent; useg->next != NULL; useg = useg->next);
1179     useg->next = seg;
1180   }
1181 #if TCP_OVERSIZE
1182   /* The new unsent tail has no space */
1183   pcb->unsent_oversize = 0;
1184 #endif /* TCP_OVERSIZE */
1185 
1186   /* SYN and FIN bump the sequence number */
1187   if ((flags & TCP_SYN) || (flags & TCP_FIN)) {
1188     pcb->snd_lbb++;
1189     /* optlen does not influence snd_buf */
1190   }
1191   if (flags & TCP_FIN) {
1192     tcp_set_flags(pcb, TF_FIN);
1193   }
1194 
1195   /* update number of segments on the queues */
1196   pcb->snd_queuelen += pbuf_clen(seg->p);
1197   LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_enqueue_flags: %"S16_F" (after enqueued)\n", pcb->snd_queuelen));
1198   if (pcb->snd_queuelen != 0) {
1199     LWIP_ASSERT("tcp_enqueue_flags: invalid queue length",
1200                 pcb->unacked != NULL || pcb->unsent != NULL);
1201   }
1202 
1203   return ERR_OK;
1204 }
1205 
1206 #if LWIP_TCP_TIMESTAMPS
1207 /* Build a timestamp option (12 bytes long) at the specified options pointer)
1208  *
1209  * @param pcb tcp_pcb
1210  * @param opts option pointer where to store the timestamp option
1211  */
1212 static void
tcp_build_timestamp_option(const struct tcp_pcb * pcb,u32_t * opts)1213 tcp_build_timestamp_option(const struct tcp_pcb *pcb, u32_t *opts)
1214 {
1215   LWIP_ASSERT("tcp_build_timestamp_option: invalid pcb", pcb != NULL);
1216 
1217   /* Pad with two NOP options to make everything nicely aligned */
1218   opts[0] = PP_HTONL(0x0101080A);
1219   opts[1] = lwip_htonl(sys_now());
1220   opts[2] = lwip_htonl(pcb->ts_recent);
1221 }
1222 #endif
1223 
1224 #if LWIP_WND_SCALE
1225 /** Build a window scale option (3 bytes long) at the specified options pointer)
1226  *
1227  * @param opts option pointer where to store the window scale option
1228  */
1229 static void
tcp_build_wnd_scale_option(u32_t * opts)1230 tcp_build_wnd_scale_option(u32_t *opts)
1231 {
1232   LWIP_ASSERT("tcp_build_wnd_scale_option: invalid opts", opts != NULL);
1233 
1234   /* Pad with one NOP option to make everything nicely aligned */
1235   opts[0] = PP_HTONL(0x01030300 | TCP_RCV_SCALE);
1236 }
1237 #endif
1238 
1239 #if LWIP_TCP_TLP_SUPPORT
1240 /*
1241  * This is the TLP probe timeout function as in draft-dukkipati-tcpm-tcp-loss-probe-01
1242  *
1243  * When PTO fires:
1244  *   (a) If a new previously unsent segment exists:
1245  *         -> Transmit new segment.
1246  *         -> FlightSize += SMSS. cwnd remains unchanged.
1247  *   (b) If no new segment exists:
1248  *         -> Retransmit the last segment.
1249  *   (c) Increment statistics counter for loss probes.
1250  *   (d) If conditions in (2) are satisfied:
1251  *         -> Reschedule next PTO.
1252  *       Else:
1253  *         -> Rearm RTO to fire at epoch 'now+RTO'.
1254  *   Note:- TLP was scheduled when the connection was in open-state i.e. the sender has so far
1255  *   received in-sequence ACKs with no SACK blocks. But when actually the pto timer fires
1256  *   (at the tcp_slowtmr or tcp_fasttmr),
1257  *   then there may be chances of receiving the SACK block from peer within that interval. However,
1258  *   lwip send out the tail-loss-probe segment here hoping that any additional dupack because of
1259  *   the probe segment can trigger early retransmit and have the chance of fat recovery.
1260  * @param pcb Protocol control block for the TCP connection to send data
1261  * @return None
1262  *
1263  */
tcp_pto_fire(struct tcp_pcb * pcb)1264 void tcp_pto_fire(struct tcp_pcb *pcb)
1265 {
1266   struct tcp_seg *unacked = NULL;
1267   u32_t wnd;
1268   err_t err;
1269   u32_t cwnd;
1270 
1271   if (!((pcb->tlp_rtx_out == 0) || (pcb->tlp_high_rxt == pcb->snd_nxt))) {
1272     LWIP_DEBUGF(TCP_TLP_DEBUG, ("tcp_pto_fire: tlp_rtx_out %u, tlp_high_rxt %u, snd_nxt %u\n",
1273                 pcb->tlp_rtx_out, pcb->tlp_high_rxt, pcb->snd_nxt));
1274     return;
1275   }
1276 
1277   LWIP_DEBUGF(TCP_TLP_DEBUG, ("tcp_pto_fire: timer fired\n"));
1278 
1279   wnd = LWIP_MIN(pcb->snd_wnd, pcb->cwnd);
1280 
1281   /* Currently there is unsent packet but not sent due to cwnd, then force it to send 1 more
1282     if it is due to snd_wnd then, try to send unacked... as sending unsent may be declined by peer due to
1283     insufficient buffer
1284   */
1285 #if defined(LWIP_DEBUG)
1286   if (pcb->unsent) {
1287     LWIP_DEBUGF(TCP_TLP_DEBUG, ("tcp_pto_fire: unsent segment:%"U32_F" Available WND:%"TCPWNDSIZE_F""
1288                                 "Required WND :%"TCPWNDSIZE_F"\n", lwip_htonl(pcb->unsent->tcphdr->seqno),
1289                                 wnd, (lwip_ntohl(pcb->unsent->tcphdr->seqno) - pcb->lastack + pcb->unsent->len)));
1290   }
1291 #endif
1292 
1293   /* unsent is allowed to be sent only once : There is not clarification from RFC, so probe using unsent only once,
1294     after first probe with unsent, it will be moved to unacked, so use unacked after that
1295   */
1296   if ((pcb->unsent != NULL) &&
1297       (lwip_ntohl(pcb->unsent->tcphdr->seqno) - pcb->lastack + pcb->unsent->len > wnd) &&
1298       (wnd == pcb->cwnd) && (pcb->tlp_rtx_out == 0)) {
1299     cwnd = pcb->cwnd;
1300     TCP_WND_INC(pcb->cwnd, pcb->mss);
1301     LWIP_DEBUGF(TCP_TLP_DEBUG, ("tcp_pto_fire: Probing with unsent segment %"U32_F"\n",
1302                                 lwip_htonl(pcb->unsent->tcphdr->seqno)));
1303 
1304     /* HACK, increase cwnd+1 and after send reduce it by one again */
1305     err = tcp_output(pcb);
1306     pcb->tlp_pto_cnt++;
1307     pcb->cwnd  = cwnd; /* restore cwnd */
1308     if (err != ERR_OK) {
1309       return;
1310     }
1311   } else if (pcb->unacked != NULL) { /* if unacked is present */
1312     for (unacked = pcb->unacked; unacked->next != NULL; unacked = unacked->next) ;
1313     LWIP_DEBUGF(TCP_TLP_DEBUG, ("tcp_pto_fire: Probing with unacked segment %"U32_F"\n",
1314                                 lwip_htonl(unacked->tcphdr->seqno)));
1315     err = tcp_output_segment(unacked, pcb, NULL);
1316     if (err != ERR_OK) {
1317       return;
1318     }
1319   } else {
1320     return;
1321   }
1322 
1323   /* tcp_output might have started scheduled pto timer, let it be... */
1324   if (pcb->tlp_rtx_out == 0) {
1325     pcb->tlp_high_rxt = pcb->snd_nxt;
1326   }
1327   pcb->tlp_rtx_out++;
1328 
1329   LWIP_DEBUGF(TCP_TLP_DEBUG,
1330               ("tcp_pto_fire: send TLP, pcb %p, tlp_rtx_out %u, tlp_high_rxt %u, tlp_pto_cnt %u, rtime %d\n",
1331                pcb, pcb->tlp_rtx_out, pcb->tlp_high_rxt, pcb->tlp_pto_cnt, pcb->rtime));
1332   tcp_tlp_schedule_probe(pcb, wnd);
1333 
1334   return;
1335 }
1336 #endif /* LWIP_TCP_TLP_SUPPORT */
1337 
1338 /**
1339  * @ingroup tcp_raw
1340  * Find out what we can send and send it
1341  *
1342  * @param pcb Protocol control block for the TCP connection to send data
1343  * @return ERR_OK if data has been sent or nothing to send
1344  *         another err_t on error
1345  */
1346 err_t
tcp_output(struct tcp_pcb * pcb)1347 tcp_output(struct tcp_pcb *pcb)
1348 {
1349   struct tcp_seg *seg, *useg;
1350   u32_t wnd, snd_nxt;
1351   u32_t wnd_ex;
1352   err_t err;
1353   struct netif *netif;
1354 #if TCP_CWND_DEBUG
1355   s16_t i = 0;
1356 #endif /* TCP_CWND_DEBUG */
1357 
1358   LWIP_ASSERT_CORE_LOCKED();
1359 
1360   LWIP_ASSERT("tcp_output: invalid pcb", pcb != NULL);
1361   /* pcb->state LISTEN not allowed here */
1362   LWIP_ASSERT("don't call tcp_output for listen-pcbs",
1363               pcb->state != LISTEN);
1364 
1365   /* First, check if we are invoked by the TCP input processing
1366      code. If so, we do not output anything. Instead, we rely on the
1367      input processing code to call us when input processing is done
1368      with. */
1369   if (tcp_input_pcb == pcb) {
1370     return ERR_OK;
1371   }
1372 
1373 #if DRIVER_STATUS_CHECK
1374   if (pcb->drv_status == DRV_NOT_READY) {
1375     LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output: Cannot send data as netif driver is not ready\n"));
1376     /* Drop the packet here and return success */
1377     return ERR_OK;
1378   }
1379 #endif
1380 
1381   /*
1382   [RFC 5681 Section 3.2]
1383     The fast retransmit and fast recovery algorithms are implemented
1384       together as follows:
1385        1.  On the first and second duplicate ACKs received at a sender, a
1386            TCP SHOULD send a segment of previously unsent data per [RFC3042]
1387            provided that the receiver's advertised window allows, the total
1388            FlightSize would remain less than or equal to cwnd plus 2*SMSS,
1389            and that new data is available for transmission.  Further, the
1390            TCP sender MUST NOT change cwnd to reflect these two segments
1391            [RFC3042].
1392   */
1393   wnd_ex = pcb->cwnd;
1394   TCP_WND_INC(wnd_ex, ((pcb->dupacks > 0 && pcb->dupacks <= 2) ?
1395                        (tcpwnd_size_t)(pcb->dupacks * pcb->mss) : (tcpwnd_size_t)0));
1396   wnd = LWIP_MIN(pcb->snd_wnd, wnd_ex);
1397 
1398   seg = pcb->unsent;
1399 
1400   if (seg == NULL) {
1401     LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output: nothing to send (%p)\n",
1402                                    (void *)pcb->unsent));
1403     LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_output: snd_wnd %"TCPWNDSIZE_F
1404                                  ", cwnd %"TCPWNDSIZE_F", wnd %"U32_F
1405                                  ", seg == NULL, ack %"U32_F"\n",
1406                                  pcb->snd_wnd, pcb->cwnd, wnd, pcb->lastack));
1407 
1408     /* If the TF_ACK_NOW flag is set and the ->unsent queue is empty, construct
1409      * an empty ACK segment and send it. */
1410     if (pcb->flags & TF_ACK_NOW) {
1411       return tcp_send_empty_ack(pcb);
1412     }
1413     /* nothing to send: shortcut out of here */
1414     goto output_done;
1415   } else {
1416     LWIP_DEBUGF(TCP_CWND_DEBUG,
1417                 ("tcp_output: snd_wnd %"TCPWNDSIZE_F", cwnd %"TCPWNDSIZE_F", wnd %"U32_F
1418                  ", effwnd %"U32_F", seq %"U32_F", ack %"U32_F"\n",
1419                  pcb->snd_wnd, pcb->cwnd, wnd,
1420                  lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len,
1421                  lwip_ntohl(seg->tcphdr->seqno), pcb->lastack));
1422   }
1423 
1424   netif = tcp_route(pcb, &pcb->local_ip, &pcb->remote_ip);
1425   if (netif == NULL) {
1426     return ERR_RTE;
1427   }
1428 
1429   /* If we don't have a local IP address, we get one from netif */
1430   if (ip_addr_isany(&pcb->local_ip)) {
1431     const ip_addr_t *local_ip = ip_netif_get_local_ip(netif, &pcb->remote_ip);
1432     if (local_ip == NULL) {
1433       return ERR_RTE;
1434     }
1435     ip_addr_copy(pcb->local_ip, *local_ip);
1436   }
1437 
1438   /* Handle the current segment not fitting within the window */
1439   if (lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len > wnd) {
1440   /* Check if we need to start the persistent timer when the next unsent segment
1441    * does not fit within the remaining send window and RTO timer is not running (we
1442    * have no in-flight data). A traditional approach would fill the remaining window
1443    * with part of the unsent segment (which will engage zero-window probing upon
1444    * reception of the zero window update from the receiver). This ensures the
1445    * subsequent window update is reliably received. With the goal of being lightweight,
1446    * we avoid splitting the unsent segment and treat the window as already zero.
1447    */
1448     if (wnd > 0 && wnd == pcb->snd_wnd && pcb->unacked == NULL && pcb->persist_backoff == 0) {
1449       pcb->persist_cnt = 0;
1450       pcb->persist_backoff = 1;
1451       pcb->persist_probe = 0;
1452     }
1453     /* We need an ACK, but can't send data now, so send an empty ACK */
1454     if (pcb->flags & TF_ACK_NOW) {
1455       return tcp_send_empty_ack(pcb);
1456     }
1457     goto output_done;
1458   }
1459   /* Stop persist timer, above conditions are not active */
1460   pcb->persist_backoff = 0;
1461 
1462   /* useg should point to last segment on unacked queue */
1463   useg = pcb->unacked;
1464   if (useg != NULL) {
1465     for (; useg->next != NULL; useg = useg->next);
1466   }
1467 
1468   /*
1469   [RFC 5681] 4.1.  Restarting Idle Connections
1470   [PENDING -Not yet implemented]
1471     A known problem with the TCP congestion control algorithms described
1472      above is that they allow a potentially inappropriate burst of traffic
1473      to be transmitted after TCP has been idle for a relatively long
1474      period of time.  After an idle period, TCP cannot use the ACK clock
1475      to strobe new segments into the network, as all the ACKs have drained
1476      from the network.  Therefore, as specified above, TCP can potentially
1477      send a cwnd-size line-rate burst into the network after an idle
1478      period.  In addition, changing network conditions may have rendered
1479      TCP's notion of the available end-to-end network capacity between two
1480      endpoints, as estimated by cwnd, inaccurate during the course of a
1481      long idle period.
1482 
1483      [Jac88] recommends that a TCP use slow start to restart transmission
1484      after a relatively long idle period.  Slow start serves to restart
1485      the ACK clock, just as it does at the beginning of a transfer.  This
1486      mechanism has been widely deployed in the following manner.  When TCP
1487      has not received a segment for more than one retransmission timeout,
1488      cwnd is reduced to the value of the restart window (RW) before
1489      transmission begins.
1490 
1491      For the purposes of this standard, we define RW = min(IW,cwnd).
1492 
1493      Using the last time a segment was received to determine whether or
1494      not to decrease cwnd can fail to deflate cwnd in the common case of
1495      persistent HTTP connections [HTH98].  In this case, a Web server
1496      receives a request before transmitting data to the Web client.  The
1497      reception of the request makes the test for an idle connection fail,
1498      and allows the TCP to begin transmission with a possibly
1499      inappropriately large cwnd.
1500 
1501      Therefore, a TCP SHOULD set cwnd to no more than RW before beginning
1502      transmission if the TCP has not sent data in an interval exceeding
1503      the retransmission timeout.
1504   */
1505 
1506   /* data available and window allows it to be sent? */
1507   while (seg != NULL &&
1508 #if DRIVER_STATUS_CHECK
1509          pcb->drv_status == DRV_READY &&
1510 #endif /* NETIF DRIVER STATUS */
1511          lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) {
1512     LWIP_ASSERT("RST not expected here!",
1513                 (TCPH_FLAGS(seg->tcphdr) & TCP_RST) == 0);
1514     /* Stop sending if the nagle algorithm would prevent it
1515      * Don't stop:
1516      * - if tcp_write had a memory error before (prevent delayed ACK timeout) or
1517      * - if FIN was already enqueued for this PCB (SYN is always alone in a segment -
1518      *   either seg->next != NULL or pcb->unacked == NULL;
1519      *   RST is no sent using tcp_write/tcp_output.
1520      */
1521     if ((tcp_do_output_nagle(pcb) == 0) &&
1522         ((pcb->flags & (TF_NAGLEMEMERR | TF_FIN)) == 0)) {
1523       break;
1524     }
1525 #if TCP_CWND_DEBUG
1526     LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_output: snd_wnd %"TCPWNDSIZE_F", cwnd %"TCPWNDSIZE_F", wnd %"U32_F", effwnd %"U32_F", seq %"U32_F", ack %"U32_F", i %"S16_F"\n",
1527                                  pcb->snd_wnd, pcb->cwnd, wnd,
1528                                  lwip_ntohl(seg->tcphdr->seqno) + seg->len -
1529                                  pcb->lastack,
1530                                  lwip_ntohl(seg->tcphdr->seqno), pcb->lastack, i));
1531     ++i;
1532 #endif /* TCP_CWND_DEBUG */
1533 
1534     if (pcb->state != SYN_SENT) {
1535       TCPH_SET_FLAG(seg->tcphdr, TCP_ACK);
1536     }
1537 
1538 #if TCP_OVERSIZE_DBGCHECK
1539     seg->oversize_left = 0;
1540 #endif /* TCP_OVERSIZE_DBGCHECK */
1541     err = tcp_output_segment(seg, pcb, netif);
1542     if (err != ERR_OK) {
1543       /* segment could not be sent, for whatever reason */
1544       tcp_set_flags(pcb, TF_NAGLEMEMERR);
1545       return err;
1546     }
1547     pcb->unsent = seg->next;
1548     if (pcb->state != SYN_SENT) {
1549       tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
1550     }
1551     snd_nxt = lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg);
1552     if (TCP_SEQ_LT(pcb->snd_nxt, snd_nxt)) {
1553       pcb->snd_nxt = snd_nxt;
1554       if (TCP_TCPLEN(seg) < pcb->mss) {
1555         pcb->snd_sml = snd_nxt;
1556       }
1557 #if LWIP_SACK
1558       if (pcb->flags & TF_SACK) {
1559         pcb->high_data = pcb->snd_nxt - 1;
1560       }
1561 #endif
1562     }
1563     /* put segment on unacknowledged list if length > 0 */
1564     if (TCP_TCPLEN(seg) > 0) {
1565       seg->next = NULL;
1566       /* unacked list is empty? */
1567       if (pcb->unacked == NULL) {
1568         pcb->unacked = seg;
1569         useg = seg;
1570         /* unacked list is not empty? */
1571       } else {
1572         /* In the case of fast retransmit, the packet should not go to the tail
1573          * of the unacked queue, but rather somewhere before it. We need to check for
1574          * this case. -STJ Jul 27, 2004 */
1575         if (useg != NULL) {
1576           if (TCP_SEQ_LT(lwip_ntohl(seg->tcphdr->seqno), lwip_ntohl(useg->tcphdr->seqno))) {
1577             /* add segment to before tail of unacked list, keeping the list sorted */
1578             struct tcp_seg **cur_seg = &(pcb->unacked);
1579             while (*cur_seg &&
1580               TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) {
1581                 cur_seg = &((*cur_seg)->next);
1582             }
1583             seg->next = (*cur_seg);
1584             (*cur_seg) = seg;
1585           } else {
1586             /* add segment to tail of unacked list */
1587             useg->next = seg;
1588             useg = useg->next;
1589           }
1590         }
1591       }
1592       /* do not queue empty segments on the unacked list */
1593     } else {
1594       tcp_seg_free(seg);
1595     }
1596     seg = pcb->unsent;
1597 
1598 #if LWIP_TCP_TLP_SUPPORT
1599     /* stop In-progressing PTO if new data transmit */
1600     if (pcb->tlp_time_stamp) {
1601       LWIP_TCP_TLP_CLEAR_VARS(pcb);
1602       if ((pcb->unacked != NULL) && (pcb->rtime == -1)) {
1603         pcb->rtime = 0;
1604       }
1605     }
1606 
1607     /* draft-dukkipati-tcpm-tcp-loss-probe-01: Schedule PTO after transmission of new data in Open state */
1608     /* lwip violates this draft as PTO still be scheduled in Disorder state */
1609     tcp_tlp_schedule_probe(pcb, wnd);
1610 #endif /* LWIP_TCP_TLP_SUPPORT */
1611   }
1612 output_done:
1613 #if TCP_OVERSIZE
1614   if (pcb->unsent == NULL) {
1615     /* last unsent has been removed, reset unsent_oversize */
1616     pcb->unsent_oversize = 0;
1617   }
1618 #endif /* TCP_OVERSIZE */
1619 
1620   tcp_clear_flags(pcb, TF_NAGLEMEMERR);
1621   return ERR_OK;
1622 }
1623 
1624 #if LWIP_SACK_DATA_SEG_PIGGYBACK
1625 
1626 /*
1627  * Called by tcp_output_segment.
1628  *
1629  * @param seg the tcp_seg to send
1630  * @param pcb the tcp_pcb for the TCP connection used to send the segment
1631  */
1632 u8_t
tcp_check_and_alloc_sack_options(struct tcp_seg * seg,const struct tcp_pcb * pcb)1633 tcp_check_and_alloc_sack_options(struct tcp_seg *seg, const struct tcp_pcb *pcb)
1634 {
1635   struct pbuf *p = NULL;
1636   struct pbuf *p2 = NULL;
1637   u8_t optlen;
1638   u8_t cnt, orig_cnt;
1639   u8_t flags = 0;
1640   struct tcp_hdr *new_tcphdr = NULL;
1641   u16_t offset;
1642   u16_t mss_local = (u16_t)LWIP_MIN(pcb->mss, pcb->snd_wnd_max >> 1);
1643 
1644   u16_t alloc_len;
1645   u8_t sack_optlen, orig_sack_optlen;
1646 
1647   /* Zero can be given as right operand based on flags. */
1648   optlen = (u8_t)(LWIP_TCP_OPT_LENGTH(seg->flags));
1649 
1650   /* Header length - 20 - Other options length excluding SACK Opts */
1651   orig_sack_optlen = (u8_t)((TCPH_HDRLEN_BYTES(seg->tcphdr) - TCP_HLEN) - (LWIP_TCP_OPT_LENGTH(seg->flags)));
1652   /* 4 - First byte(Padding + Kind + Length) 8 - Number of bytes for each SACK Block */
1653   orig_cnt = (u8_t)((orig_sack_optlen >= 4) ? (orig_sack_optlen - 4) / 8 : 0);
1654 
1655   cnt = tcp_get_sack_block_count_for_send(pcb, optlen);
1656   sack_optlen = (u8_t)(LWIP_TCP_SACK_OPT_LENGTH(cnt));
1657   if ((seg->len + optlen + sack_optlen) > mss_local) {
1658     sack_optlen = orig_sack_optlen;
1659   }
1660 
1661   if (sack_optlen == orig_sack_optlen) {
1662     LWIP_DEBUGF(TCP_SACK_DEBUG, ("SACK count same. Rebuilding seg not required\n"));
1663     return orig_cnt;
1664   }
1665 
1666   /* 4 - First byte(Padding + Kind + Length) 8 - Number of bytes for each SACK Block */
1667   cnt = (u8_t)((sack_optlen >= 4) ? (sack_optlen - 4) / 8 : 0);
1668   LWIP_DEBUGF(TCP_SACK_DEBUG, ("Calculated SACK Count: %d SACK Optlen: %d\n", cnt, sack_optlen));
1669 
1670   optlen =  (u8_t)(optlen + sack_optlen);
1671 
1672   if (seg->tcphdr < (struct tcp_hdr *)seg->p->payload) {
1673     /* Sanity check before finding offset below */
1674     return orig_cnt;
1675   }
1676 
1677   offset = (u16_t)((u8_t *)seg->tcphdr - (u8_t *)seg->p->payload);
1678   alloc_len = (u16_t)(optlen + (seg->p->len - (offset + TCPH_HDRLEN_BYTES(seg->tcphdr))));
1679 
1680   p = pbuf_alloc(PBUF_TRANSPORT, alloc_len, PBUF_RAM);
1681   if (p == NULL) {
1682     return orig_cnt;
1683   }
1684 
1685   if (!pbuf_header(p, TCP_HLEN)) {
1686     new_tcphdr = (struct tcp_hdr *)p->payload;
1687     (void)memcpy_s(new_tcphdr, TCP_HLEN, seg->tcphdr, TCP_HLEN);
1688 
1689     alloc_len = (u16_t)(alloc_len - optlen); /* options will be filled below */
1690 
1691     (void)memcpy_s((char *)new_tcphdr + TCP_HLEN + optlen, alloc_len,
1692                    (char *)seg->tcphdr + TCPH_HDRLEN_BYTES(seg->tcphdr), alloc_len);
1693 
1694     flags = TCPH_FLAGS(new_tcphdr);
1695     TCPH_HDRLEN_FLAGS_SET(new_tcphdr, (5 + optlen / 4), flags);
1696     p2 = seg->p->next;
1697     while (p2 != NULL) {
1698       pbuf_ref(p2);
1699       pbuf_cat(p, p2);
1700       p2 = p2->next;
1701     }
1702     (void)pbuf_free(seg->p);
1703     seg->p = p;
1704     seg->tcphdr = (struct tcp_hdr *)seg->p->payload;
1705   } else {
1706     LWIP_DEBUGF(TCP_SACK_DEBUG, ("tcp_output_segment: pbuf_alloc failed \n"));
1707     (void)pbuf_free(p);
1708     return orig_cnt;
1709   }
1710   return cnt;
1711 }
1712 #endif
1713 
1714 /** Check if a segment's pbufs are used by someone else than TCP.
1715  * This can happen on retransmission if the pbuf of this segment is still
1716  * referenced by the netif driver due to deferred transmission.
1717  * This is the case (only!) if someone down the TX call path called
1718  * pbuf_ref() on one of the pbufs!
1719  *
1720  * @arg seg the tcp segment to check
1721  * @return 1 if ref != 1, 0 if ref == 1
1722  */
1723 static int
tcp_output_segment_busy(const struct tcp_seg * seg)1724 tcp_output_segment_busy(const struct tcp_seg *seg)
1725 {
1726   LWIP_ASSERT("tcp_output_segment_busy: invalid seg", seg != NULL);
1727 
1728   /* We only need to check the first pbuf here:
1729      If a pbuf is queued for transmission, a driver calls pbuf_ref(),
1730      which only changes the ref count of the first pbuf */
1731   if (atomic_read(&seg->p->ref) != 1) {
1732     /* other reference found */
1733     return 1;
1734   }
1735   /* no other references found */
1736   return 0;
1737 }
1738 
1739 /**
1740  * Called by tcp_output() to actually send a TCP segment over IP.
1741  *
1742  * @param seg the tcp_seg to send
1743  * @param pcb the tcp_pcb for the TCP connection used to send the segment
1744  * @param netif the netif used to send the segment
1745  */
1746 err_t
tcp_output_segment(struct tcp_seg * seg,struct tcp_pcb * pcb,struct netif * netif)1747 tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif)
1748 {
1749   err_t err;
1750   u16_t len;
1751   u32_t *opts;
1752 #if TCP_CHECKSUM_ON_COPY
1753   int seg_chksum_was_swapped = 0;
1754 #endif
1755 #if LWIP_SACK_DATA_SEG_PIGGYBACK
1756   u8_t cnt = 0;
1757 #endif
1758 
1759   LWIP_ASSERT("tcp_output_segment: invalid seg", seg != NULL);
1760   LWIP_ASSERT("tcp_output_segment: invalid pcb", pcb != NULL);
1761 
1762   if (netif == NULL) {
1763     netif = ip_route_pcb(&pcb->remote_ip, (struct ip_pcb*)pcb);
1764     if (netif == NULL) {
1765       /* Don't even try to send a SYN packet if we have no route since that will fail. */
1766       return ERR_NETUNREACH;
1767     }
1768   }
1769 
1770 #if DRIVER_STATUS_CHECK
1771    /* Driver interface is not yet ready. */
1772   if (!(netif->flags & NETIF_FLAG_DRIVER_RDY)) {
1773     /* Update driver status if not ready */
1774     pcb->drv_status = DRV_NOT_READY;
1775     return ERR_RTE;
1776   }
1777 #endif
1778 
1779   if (tcp_output_segment_busy(seg)) {
1780     /* This should not happen: rexmit functions should have checked this.
1781        However, since this function modifies p->len, we must not continue in this case. */
1782     LWIP_DEBUGF(TCP_RTO_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_output_segment: segment busy\n"));
1783     return ERR_OK;
1784   }
1785 
1786   /* The TCP header has already been constructed, but the ackno and
1787    wnd fields remain. */
1788   seg->tcphdr->ackno = lwip_htonl(pcb->rcv_nxt);
1789 
1790   /* advertise our receive window size in this TCP segment */
1791 #if LWIP_WND_SCALE
1792   if (seg->flags & TF_SEG_OPTS_WND_SCALE) {
1793     /* The Window field in a SYN segment itself (the only type where we send
1794        the window scale option) is never scaled. */
1795     seg->tcphdr->wnd = lwip_htons(TCPWND_MIN16(pcb->rcv_ann_wnd));
1796   } else
1797 #endif /* LWIP_WND_SCALE */
1798   {
1799     seg->tcphdr->wnd = lwip_htons(TCPWND_MIN16(RCV_WND_SCALE(pcb, pcb->rcv_ann_wnd)));
1800   }
1801 
1802   pcb->rcv_ann_right_edge = pcb->rcv_nxt + pcb->rcv_ann_wnd;
1803 
1804   /* Add any requested options.  NB MSS option is only set on SYN
1805      packets, so ignore it here */
1806 #if LWIP_SACK_DATA_SEG_PIGGYBACK
1807   if (seg->flags & TF_SEG_OPTS_SACK_OPTIONS) {
1808     cnt = tcp_check_and_alloc_sack_options(seg, pcb);
1809     LWIP_DEBUGF(TCP_SACK_DEBUG, ("Sending SACK Count: %d \n", cnt));
1810   }
1811 #endif
1812 
1813   /* Add any requested options.  NB MSS option is only set on SYN
1814      packets, so ignore it here */
1815   /* cast through void* to get rid of alignment warnings */
1816   opts = (u32_t *)(void *)(seg->tcphdr + 1);
1817   if (seg->flags & TF_SEG_OPTS_MSS) {
1818 #if LWIP_TCP_MAXSEG
1819     u16_t mss = ((pcb->usr_mss == 0) ? (TCP_MSS) : (pcb->usr_mss));
1820 #else
1821     u16_t mss = TCP_MSS;
1822 #endif /* LWIP_TCP_MAXSEG */
1823 #if TCP_CALCULATE_EFF_SEND_MSS
1824     mss = tcp_eff_send_mss_netif(mss, netif, &pcb->remote_ip);
1825 #endif /* TCP_CALCULATE_EFF_SEND_MSS */
1826     *opts = TCP_BUILD_MSS_OPTION(mss);
1827     opts += 1;
1828   }
1829 #if LWIP_TCP_TIMESTAMPS
1830   pcb->ts_lastacksent = pcb->rcv_nxt;
1831 
1832   if (seg->flags & TF_SEG_OPTS_TS) {
1833     tcp_build_timestamp_option(pcb, opts);
1834     opts += 3;
1835   }
1836 #endif
1837 #if LWIP_WND_SCALE
1838   if (seg->flags & TF_SEG_OPTS_WND_SCALE) {
1839     tcp_build_wnd_scale_option(opts);
1840     opts += 1;
1841   }
1842 #endif
1843 #if LWIP_SACK
1844   if (seg->flags & TF_SEG_OPTS_SACK_PERMITTED) {
1845     /* Fix for sending SACK options along with data */
1846     tcp_build_sack_permitted_option(opts);
1847     opts += 1;
1848   }
1849 
1850   /* Fix for sending SACK options along with data */
1851 #if LWIP_SACK_DATA_SEG_PIGGYBACK
1852   if (seg->flags & TF_SEG_OPTS_SACK_OPTIONS) {
1853     tcp_build_sack_option(pcb, cnt, opts);
1854     /* Zero can be given at the here, when SACK is not enabled */
1855     opts += (LWIP_TCP_SACK_OPT_LENGTH(cnt)) / sizeof(u32_t);
1856   }
1857 #endif
1858 
1859 #if LWIP_SACK_PERF_OPT
1860   /* update packet sequence number */
1861   if (pcb->flags & TF_SACK) {
1862     seg->pkt_trans_seq_cntr = pcb->pkt_seq_num;
1863     pcb->pkt_seq_num++;
1864   }
1865 #endif
1866 #endif /* LWIP_SACK */
1867   /* Set retransmission timer running if it is not currently enabled
1868      This must be set before checking the route. */
1869   if (pcb->rtime < 0) {
1870     pcb->rtime = 0;
1871   }
1872 
1873   /* don't make RTT sample if packets are being retransmitted per Karn's Algorithm */
1874   if ((pcb->rttest == 0) && seg->len && TCP_SEQ_LEQ(pcb->snd_nxt, lwip_ntohl(seg->tcphdr->seqno))) {
1875     pcb->rttest = sys_now();
1876     if (pcb->rttest == 0) {
1877       /* "rttest==0" means RTT sample not started */
1878       pcb->rttest = 1;
1879     }
1880     pcb->rtseq = lwip_ntohl(seg->tcphdr->seqno);
1881     LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_output_segment: rtseq %"U32_F", rttest %"U32_F"\n", pcb->rtseq, pcb->rttest));
1882   }
1883   LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output_segment: %"U32_F":%"U32_F"\n",
1884                                  lwip_htonl(seg->tcphdr->seqno), lwip_htonl(seg->tcphdr->seqno) +
1885                                  seg->len));
1886 
1887   len = (u16_t)((u8_t *)seg->tcphdr - (u8_t *)seg->p->payload);
1888   if (len == 0) {
1889     /** Exclude retransmitted segments from this count. */
1890     MIB2_STATS_INC(mib2.tcpoutsegs);
1891   }
1892 
1893   seg->p->len -= len;
1894   seg->p->tot_len -= len;
1895   pcb->last_payload_len = seg->len;
1896   seg->p->payload = seg->tcphdr;
1897 
1898   seg->tcphdr->chksum = 0;
1899 
1900 #ifdef LWIP_HOOK_TCP_OUT_ADD_TCPOPTS
1901   opts = LWIP_HOOK_TCP_OUT_ADD_TCPOPTS(seg->p, seg->tcphdr, pcb, opts);
1902 #endif
1903   LWIP_ASSERT("options not filled", (u8_t *)opts == ((u8_t *)(seg->tcphdr + 1)) + LWIP_TCP_OPT_LENGTH_SEGMENT(seg->flags, pcb));
1904 
1905 #if CHECKSUM_GEN_TCP
1906   IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) {
1907 #if TCP_CHECKSUM_ON_COPY
1908     u32_t acc;
1909 #if TCP_CHECKSUM_ON_COPY_SANITY_CHECK
1910     u16_t chksum_slow = ip_chksum_pseudo(seg->p, IP_PROTO_TCP,
1911                                          seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip);
1912 #endif /* TCP_CHECKSUM_ON_COPY_SANITY_CHECK */
1913     if ((seg->flags & TF_SEG_DATA_CHECKSUMMED) == 0) {
1914       LWIP_ASSERT("data included but not checksummed",
1915                   seg->p->tot_len == TCPH_HDRLEN_BYTES(seg->tcphdr));
1916     }
1917 
1918     /* rebuild TCP header checksum (TCP header changes for retransmissions!) */
1919     acc = ip_chksum_pseudo_partial(seg->p, IP_PROTO_TCP,
1920                                    seg->p->tot_len, TCPH_HDRLEN_BYTES(seg->tcphdr), &pcb->local_ip, &pcb->remote_ip);
1921     /* add payload checksum */
1922     if (seg->chksum_swapped) {
1923       seg_chksum_was_swapped = 1;
1924       seg->chksum = SWAP_BYTES_IN_WORD(seg->chksum);
1925       seg->chksum_swapped = 0;
1926     }
1927     acc = (u16_t)~acc + seg->chksum;
1928     seg->tcphdr->chksum = (u16_t)~FOLD_U32T(acc);
1929 #if TCP_CHECKSUM_ON_COPY_SANITY_CHECK
1930     if (chksum_slow != seg->tcphdr->chksum) {
1931       TCP_CHECKSUM_ON_COPY_SANITY_CHECK_FAIL(
1932         ("tcp_output_segment: calculated checksum is %"X16_F" instead of %"X16_F"\n",
1933          seg->tcphdr->chksum, chksum_slow));
1934       seg->tcphdr->chksum = chksum_slow;
1935     }
1936 #endif /* TCP_CHECKSUM_ON_COPY_SANITY_CHECK */
1937 #else /* TCP_CHECKSUM_ON_COPY */
1938     seg->tcphdr->chksum = ip_chksum_pseudo(seg->p, IP_PROTO_TCP,
1939                                            seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip);
1940 #endif /* TCP_CHECKSUM_ON_COPY */
1941   }
1942 #endif /* CHECKSUM_GEN_TCP */
1943   TCP_STATS_INC(tcp.xmit);
1944 
1945 #if LWIP_SO_DONTROUTE
1946   if (ip_get_option((struct ip_pcb *)pcb, SOF_DONTROUTE)) {
1947     seg->p->flags |= PBUF_FLAG_IS_LINK_ONLY;
1948   }
1949 #endif /* LWIP_SO_DONTROUTE */
1950 
1951   NETIF_SET_HINTS(netif, &(pcb->netif_hints));
1952 
1953 #if LWIP_SO_PRIORITY
1954   seg->p->priority = pcb->priority;
1955 #endif /* LWIP_SO_PRIORITY */
1956 
1957   err = ip_output_if(seg->p, &pcb->local_ip, &pcb->remote_ip, pcb->ttl,
1958                      pcb->tos, IP_PROTO_TCP, netif);
1959   NETIF_RESET_HINTS(netif);
1960 
1961 #if TCP_CHECKSUM_ON_COPY
1962   if (seg_chksum_was_swapped) {
1963     /* if data is added to this segment later, chksum needs to be swapped,
1964        so restore this now */
1965     seg->chksum = SWAP_BYTES_IN_WORD(seg->chksum);
1966     seg->chksum_swapped = 1;
1967   }
1968 #endif
1969 
1970   return err;
1971 }
1972 
1973 /**
1974  * Requeue all unacked segments for retransmission
1975  *
1976  * Called by tcp_slowtmr() for slow retransmission.
1977  *
1978  * @param pcb the tcp_pcb for which to re-enqueue all unacked segments
1979  */
1980 err_t
tcp_rexmit_rto_prepare(struct tcp_pcb * pcb)1981 tcp_rexmit_rto_prepare(struct tcp_pcb *pcb)
1982 {
1983   struct tcp_seg *seg;
1984 
1985   LWIP_ASSERT("tcp_rexmit_rto_prepare: invalid pcb", pcb != NULL);
1986 
1987   if (pcb->unacked == NULL) {
1988     return ERR_VAL;
1989   }
1990 
1991   /* Move all unacked segments to the head of the unsent queue.
1992      However, give up if any of the unsent pbufs are still referenced by the
1993      netif driver due to deferred transmission. No point loading the link further
1994      if it is struggling to flush its buffered writes. */
1995   for (seg = pcb->unacked; seg->next != NULL; seg = seg->next) {
1996     if (tcp_output_segment_busy(seg)) {
1997       LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit_rto: segment busy\n"));
1998       return ERR_VAL;
1999     }
2000 
2001 #if LWIP_SACK
2002 #if DRIVER_STATUS_CHECK
2003     /* Clear the loss recovery seg_type */
2004     seg->seg_type = SEG_TYPE_NONE;
2005 #endif /* DRIVER_STATUS_CHECK */
2006     if (seg->state & TF_SEG_SACKED) {
2007       seg->state =  (u32_t)(seg->state & (~TF_SEG_SACKED));
2008     }
2009 #endif /* LWIP_SACK */
2010   }
2011   if (tcp_output_segment_busy(seg)) {
2012     LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit_rto: segment busy\n"));
2013     return ERR_VAL;
2014   }
2015 
2016 #if LWIP_SACK
2017 #if DRIVER_STATUS_CHECK
2018   /* Clear the loss recovery seg_type */
2019   seg->seg_type = SEG_TYPE_NONE;
2020 #endif /* DRIVER_STATUS_CHECK */
2021   if (seg->state & TF_SEG_SACKED) {
2022     seg->state =  (u32_t)(seg->state & (~TF_SEG_SACKED));
2023   }
2024 #endif /* LWIP_SACK */
2025   /* concatenate unsent queue after unacked queue */
2026   seg->next = pcb->unsent;
2027 #if TCP_OVERSIZE_DBGCHECK
2028   /* if last unsent changed, we need to update unsent_oversize */
2029   if (pcb->unsent == NULL) {
2030     pcb->unsent_oversize = seg->oversize_left;
2031   }
2032 #endif /* TCP_OVERSIZE_DBGCHECK */
2033   /* unsent queue is the concatenated queue (of unacked, unsent) */
2034   pcb->unsent = pcb->unacked;
2035   /* unacked queue is now empty */
2036   pcb->unacked = NULL;
2037 
2038   /* Mark RTO in-progress */
2039   tcp_set_flags(pcb, TF_RTO);
2040   /* Record the next byte following retransmit */
2041   pcb->rto_end = lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg);
2042   /* Don't take any RTT measurements after retransmitting. */
2043   pcb->rttest = 0;
2044 
2045 #if LWIP_SACK
2046   /* As per section 5.1 in RFC 6675 */
2047   /* If RTO happens, then need to stop pipe based loss recovery */
2048   /* And need to capture high data to recovery point */
2049   /* New recovery phase should not be initiated untill high ack */
2050   /* is greater than or equal to recovery point. */
2051   if (pcb->flags & TF_SACK) {
2052     pcb->recovery_point = pcb->high_data;
2053     pcb->flags =  (tcpflags_t)(pcb->flags & (~TF_IN_SACK_FRLR));
2054     pcb->flags |= TF_IN_SACK_RTO;
2055     LWIP_DEBUGF(TCP_SACK_DEBUG, ("tcp_rexmit_rto: In retransmit timeout\n"));
2056     LWIP_DEBUGF(TCP_SACK_DEBUG, ("tcp_receive : Pipe is %"U32_F
2057                                  " RecoveryPoint is %"U32_F" HighSacked is %"U32_F" HighData is %"U32_F
2058                                  " HighRxt is %"U32_F" RescueRxt is %"U32_F"\n", pcb->pipe, pcb->recovery_point,
2059                                  pcb->high_sacked, pcb->high_data, pcb->high_rxt, pcb->rescue_rxt));
2060 #if LWIP_SACK_PERF_OPT
2061     LWIP_DEBUGF(TCP_SACK_DEBUG, ("Freeing the Fast retransmitted segments\n"));
2062     tcp_fr_segs_free(pcb->fr_segs);
2063     pcb->fr_segs = NULL;
2064     pcb->last_frseg = NULL;
2065 #if LWIP_SACK_CWND_OPT
2066     pcb->recover_cwnd = 0;
2067     pcb->recover_ssthresh = 0;
2068 #endif /* LWIP_SACK_CWND_OPT */
2069 #endif /* LWIP_SACK_PERF_OPT */
2070     pcb->sacked = 0;
2071 #if LWIP_FACK_THRESHOLD_BASED_FR
2072     /* just to make sure that the FACK reflect the forward-most data held by the receiver AT ANY point of time */
2073     pcb->fack = pcb->lastack;
2074 #endif /* LWIP_FACK_THRESHOLD_BASED_FR */
2075   }
2076 #endif /* LWIP_SACK */
2077   return ERR_OK;
2078 }
2079 
2080 /**
2081  * Requeue all unacked segments for retransmission
2082  *
2083  * Called by tcp_slowtmr() for slow retransmission.
2084  *
2085  * @param pcb the tcp_pcb for which to re-enqueue all unacked segments
2086  */
2087 void
tcp_rexmit_rto_commit(struct tcp_pcb * pcb)2088 tcp_rexmit_rto_commit(struct tcp_pcb *pcb)
2089 {
2090   LWIP_ASSERT("tcp_rexmit_rto_commit: invalid pcb", pcb != NULL);
2091 
2092   /* increment number of retransmissions */
2093   if (pcb->nrtx < 0xFF) {
2094     ++pcb->nrtx;
2095   }
2096   /* Do the actual retransmission */
2097   tcp_output(pcb);
2098 }
2099 
2100 /**
2101  * Requeue all unacked segments for retransmission
2102  *
2103  * Called by tcp_process() only, tcp_slowtmr() needs to do some things between
2104  * "prepare" and "commit".
2105  *
2106  * @param pcb the tcp_pcb for which to re-enqueue all unacked segments
2107  */
2108 void
tcp_rexmit_rto(struct tcp_pcb * pcb)2109 tcp_rexmit_rto(struct tcp_pcb *pcb)
2110 {
2111   LWIP_ASSERT("tcp_rexmit_rto: invalid pcb", pcb != NULL);
2112 
2113   if (tcp_rexmit_rto_prepare(pcb) == ERR_OK) {
2114     tcp_rexmit_rto_commit(pcb);
2115   }
2116 }
2117 
2118 /**
2119  * Requeue the first unacked segment for retransmission
2120  *
2121  * Called by tcp_receive() for fast retransmit.
2122  *
2123  * @param pcb the tcp_pcb for which to retransmit the first unacked segment
2124  */
2125 err_t
tcp_rexmit(struct tcp_pcb * pcb)2126 tcp_rexmit(struct tcp_pcb *pcb)
2127 {
2128   struct tcp_seg *seg;
2129   struct tcp_seg **cur_seg;
2130 
2131   LWIP_ASSERT("tcp_rexmit: invalid pcb", pcb != NULL);
2132 
2133   if (pcb->unacked == NULL) {
2134     return ERR_VAL;
2135   }
2136 
2137   seg = pcb->unacked;
2138 
2139   /* Give up if the segment is still referenced by the netif driver
2140      due to deferred transmission. */
2141   if (tcp_output_segment_busy(seg)) {
2142     LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit busy\n"));
2143     return ERR_VAL;
2144   }
2145 
2146   /* Move the first unacked segment to the unsent queue */
2147   /* Keep the unsent queue sorted. */
2148   pcb->unacked = seg->next;
2149 
2150   cur_seg = &(pcb->unsent);
2151   while (*cur_seg &&
2152          TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) {
2153     cur_seg = &((*cur_seg)->next );
2154   }
2155   seg->next = *cur_seg;
2156   *cur_seg = seg;
2157 #if TCP_OVERSIZE
2158   if (seg->next == NULL) {
2159     /* the retransmitted segment is last in unsent, so reset unsent_oversize */
2160     pcb->unsent_oversize = 0;
2161   }
2162 #endif /* TCP_OVERSIZE */
2163 
2164   /* Don't take any rtt measurements after retransmitting. */
2165   pcb->rttest = 0;
2166 
2167   /* Do the actual retransmission. */
2168   MIB2_STATS_INC(mib2.tcpretranssegs);
2169   /* No need to call tcp_output: we are always called from tcp_input()
2170      and thus tcp_output directly returns. */
2171   return ERR_OK;
2172 }
2173 
2174 
2175 /**
2176  * Handle retransmission after three dupacks received
2177  *
2178  * @param pcb the tcp_pcb for which to retransmit the first unacked segment
2179  */
2180 void
tcp_rexmit_fast(struct tcp_pcb * pcb)2181 tcp_rexmit_fast(struct tcp_pcb *pcb)
2182 {
2183   /*
2184       [RFC 6582] Section 3.2.  Specification
2185        2)  Three duplicate ACKs:
2186        When the third duplicate ACK is received, the TCP sender first
2187        checks the value of recover to see if the Cumulative
2188        Acknowledgment field covers more than recover.  If so, the value
2189        of recover is incremented to the value of the highest sequence
2190        number transmitted by the TCP so far.  The TCP then enters fast
2191        retransmit (step 2 of Section 3.2 of [RFC5681]).  If not, the TCP
2192        does not enter fast retransmit and does not reset ssthresh.
2193 
2194       Step 2 above specifies a check that the Cumulative Acknowledgment
2195          field covers more than recover.  Because the acknowledgment field
2196          contains the sequence number that the sender next expects to receive,
2197          the acknowledgment "ack_number" covers more than recover when
2198 
2199             ack_number - 1 > recover;
2200 
2201          i.e., at least one byte more of data is acknowledged beyond the
2202          highest byte that was outstanding when fast retransmit was last
2203          entered.
2204   */
2205   if (pcb->unacked != NULL && !(pcb->flags & TF_INFR) &&
2206       TCP_SEQ_GEQ(pcb->lastack, pcb->fast_recovery_point)) {
2207     /* This is fast retransmit. Retransmit the first unacked segment. */
2208     LWIP_DEBUGF(TCP_FR_DEBUG,
2209                 ("tcp_receive: dupacks %"U16_F" (%"U32_F
2210                  "), fast retransmit %"U32_F"\n",
2211                  (u16_t)pcb->dupacks, pcb->lastack,
2212                  lwip_ntohl(pcb->unacked->tcphdr->seqno)));
2213     if (tcp_rexmit(pcb) == ERR_OK) {
2214       /*
2215         [RFC5681] Section  3.2.  Fast Retransmit/Fast Recovery
2216 
2217         2.  When the third duplicate ACK is received, a TCP MUST set ssthresh
2218              to no more than the value given in equation (4).  When [RFC3042]
2219              is in use, additional data sent in limited transmit MUST NOT be
2220              included in this calculation.
2221 
2222              ssthresh equals to max (FlightSize / 2, 2 * SMSS)            (4)
2223 
2224              Violation: lwIP keeps ssthresh minimum to (8 * SMSS)
2225 
2226               where, as discussed above, FlightSize is the amount of outstanding
2227              data in the network.
2228 
2229             Violation: Actualy flighsize is not calculated rather window size used..
2230       */
2231       /* Set ssthresh to half of the minimum of the current
2232        * cwnd and the advertised window */
2233       pcb->ssthresh = (tcpwnd_size_t)(LWIP_MIN(pcb->cwnd, pcb->snd_wnd) / ((tcpwnd_size_t)2));
2234       pcb->ssthresh = (tcpwnd_size_t)(LWIP_MAX(pcb->ssthresh, (tcpwnd_size_t)(pcb->mss << 3))); // 8* SMSS
2235 
2236       /*
2237          3.  The lost segment starting at SND.UNA MUST be retransmitted and
2238              cwnd set to ssthresh plus 3*SMSS.  This artificially "inflates"
2239              the congestion window by the number of segments (three) that have
2240              left the network and which the receiver has buffered.
2241       */
2242       pcb->cwnd = pcb->ssthresh;
2243       TCP_WND_INC(pcb->cwnd, (tcpwnd_size_t)(3 * pcb->mss));
2244 
2245       tcp_set_flags(pcb, TF_INFR);
2246       tcp_set_flags(pcb, TF_INFR_FPACK);
2247 
2248       /* record recovery point if new Reno enabled */
2249       pcb->fast_recovery_point = pcb->snd_nxt;
2250 
2251       /* Reset the retransmission timer to prevent immediate rto retransmissions */
2252       pcb->rtime = 0;
2253     }
2254   }
2255 }
2256 
2257 static struct pbuf *
tcp_output_alloc_header_common(u32_t ackno,u16_t optlen,u16_t datalen,u32_t seqno_be,u16_t src_port,u16_t dst_port,u8_t flags,u16_t wnd)2258 tcp_output_alloc_header_common(u32_t ackno, u16_t optlen, u16_t datalen,
2259                         u32_t seqno_be /* already in network byte order */,
2260                         u16_t src_port, u16_t dst_port, u8_t flags, u16_t wnd)
2261 {
2262   struct tcp_hdr *tcphdr;
2263   struct pbuf *p;
2264 
2265   p = pbuf_alloc(PBUF_IP, TCP_HLEN + optlen + datalen, PBUF_RAM);
2266   if (p != NULL) {
2267     LWIP_ASSERT("check that first pbuf can hold struct tcp_hdr",
2268                 (p->len >= TCP_HLEN + optlen));
2269     tcphdr = (struct tcp_hdr *)p->payload;
2270     tcphdr->src = lwip_htons(src_port);
2271     tcphdr->dest = lwip_htons(dst_port);
2272     tcphdr->seqno = seqno_be;
2273     tcphdr->ackno = lwip_htonl(ackno);
2274     TCPH_HDRLEN_FLAGS_SET(tcphdr, (5 + optlen / 4), flags);
2275     tcphdr->wnd = lwip_htons(wnd);
2276     tcphdr->chksum = 0;
2277     tcphdr->urgp = 0;
2278   }
2279   return p;
2280 }
2281 
2282 /** Allocate a pbuf and create a tcphdr at p->payload, used for output
2283  * functions other than the default tcp_output -> tcp_output_segment
2284  * (e.g. tcp_send_empty_ack, etc.)
2285  *
2286  * @param pcb tcp pcb for which to send a packet (used to initialize tcp_hdr)
2287  * @param optlen length of header-options
2288  * @param datalen length of tcp data to reserve in pbuf
2289  * @param seqno_be seqno in network byte order (big-endian)
2290  * @return pbuf with p->payload being the tcp_hdr
2291  */
2292 static struct pbuf *
tcp_output_alloc_header(struct tcp_pcb * pcb,u16_t optlen,u16_t datalen,u32_t seqno_be)2293 tcp_output_alloc_header(struct tcp_pcb *pcb, u16_t optlen, u16_t datalen,
2294                         u32_t seqno_be /* already in network byte order */)
2295 {
2296   struct pbuf *p;
2297 
2298   LWIP_ASSERT("tcp_output_alloc_header: invalid pcb", pcb != NULL);
2299 
2300   p = tcp_output_alloc_header_common(pcb->rcv_nxt, optlen, datalen,
2301     seqno_be, pcb->local_port, pcb->remote_port, TCP_ACK,
2302     TCPWND_MIN16(RCV_WND_SCALE(pcb, pcb->rcv_ann_wnd)));
2303   if (p != NULL) {
2304     /* If we're sending a packet, update the announced right window edge */
2305     pcb->rcv_ann_right_edge = pcb->rcv_nxt + pcb->rcv_ann_wnd;
2306   }
2307   return p;
2308 }
2309 
2310 /* Fill in options for control segments */
2311 static void
tcp_output_fill_options(struct tcp_pcb * pcb,struct pbuf * p,u8_t optflags,u8_t num_sacks)2312 tcp_output_fill_options(struct tcp_pcb *pcb, struct pbuf *p, u8_t optflags, u8_t num_sacks)
2313 {
2314   struct tcp_hdr *tcphdr;
2315   u32_t *opts;
2316 
2317   LWIP_ASSERT("tcp_output_fill_options: invalid pbuf", p != NULL);
2318 
2319   tcphdr = (struct tcp_hdr *)p->payload;
2320   opts = (u32_t *)(void *)(tcphdr + 1);
2321 
2322   /* NB. MSS and window scale options are only sent on SYNs, so ignore them here */
2323 
2324 #if LWIP_TCP_TIMESTAMPS
2325   if (optflags & TF_SEG_OPTS_TS) {
2326     tcp_build_timestamp_option(pcb, opts);
2327     opts += 3;
2328   }
2329 #endif
2330 
2331 #if LWIP_SACK
2332   if (optflags & TF_SEG_OPTS_SACK) {
2333     tcp_build_sack_option(pcb, num_sacks, opts);
2334     /* Zero can be given in the numerator when cnt is 0 */
2335     opts += (LWIP_TCP_SACK_OPT_LENGTH(num_sacks)) / sizeof(u32_t); // Number of words
2336   }
2337 #endif
2338 
2339   LWIP_UNUSED_ARG(num_sacks);
2340 
2341 #ifdef LWIP_HOOK_TCP_OUT_ADD_TCPOPTS
2342   opts = LWIP_HOOK_TCP_OUT_ADD_TCPOPTS(p, tcphdr, pcb, opts);
2343 #endif
2344 
2345   LWIP_UNUSED_ARG(pcb);
2346   LWIP_UNUSED_ARG(optflags); /* for LWIP_NOASSERT */
2347   LWIP_UNUSED_ARG(opts); /* for LWIP_NOASSERT */
2348 }
2349 
2350 /** Output a control segment pbuf to IP.
2351  *
2352  * Called from tcp_rst, tcp_send_empty_ack, tcp_keepalive and tcp_zero_window_probe,
2353  * this function combines selecting a netif for transmission, generating the tcp
2354  * header checksum and calling ip_output_if while handling netif hints and stats.
2355  */
2356 static err_t
tcp_output_control_segment(const struct tcp_pcb * pcb,struct pbuf * p,const ip_addr_t * src,const ip_addr_t * dst)2357 tcp_output_control_segment(const struct tcp_pcb *pcb, struct pbuf *p,
2358                            const ip_addr_t *src, const ip_addr_t *dst)
2359 {
2360   err_t err;
2361   struct netif *netif;
2362 
2363   LWIP_ASSERT("tcp_output_control_segment: invalid pbuf", p != NULL);
2364 
2365   netif = tcp_route(pcb, src, dst);
2366   if (netif == NULL) {
2367     err = ERR_RTE;
2368   } else {
2369     u8_t ttl, tos;
2370 #if CHECKSUM_GEN_TCP
2371     IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) {
2372       struct tcp_hdr *tcphdr = (struct tcp_hdr *)p->payload;
2373       tcphdr->chksum = ip_chksum_pseudo(p, IP_PROTO_TCP, p->tot_len,
2374                                         src, dst);
2375     }
2376 #endif
2377     if (pcb != NULL) {
2378       NETIF_SET_HINTS(netif, LWIP_CONST_CAST(struct netif_hint*, &(pcb->netif_hints)));
2379       ttl = pcb->ttl;
2380       tos = pcb->tos;
2381 #if LWIP_SO_DONTROUTE
2382       if (ip_get_option((struct ip_pcb *)pcb, SOF_DONTROUTE)) {
2383         p->flags |= PBUF_FLAG_IS_LINK_ONLY;
2384       }
2385 #endif /* LWIP_SO_DONTROUTE */
2386     } else {
2387       /* Send output with hardcoded TTL/HL since we have no access to the pcb */
2388       ttl = TCP_TTL;
2389       tos = 0;
2390     }
2391     TCP_STATS_INC(tcp.xmit);
2392 
2393 #if LWIP_SO_PRIORITY
2394     p->priority = LWIP_PKT_PRIORITY_MIN;
2395     if (pcb != NULL) {
2396       p->priority = pcb->priority;
2397     }
2398 #endif /* LWIP_SO_PRIORITY */
2399 
2400     err = ip_output_if(p, src, dst, ttl, tos, IP_PROTO_TCP, netif);
2401     NETIF_RESET_HINTS(netif);
2402   }
2403   pbuf_free(p);
2404   return err;
2405 }
2406 
2407 /**
2408  * Send a TCP RESET packet (empty segment with RST flag set) either to
2409  * abort a connection or to show that there is no matching local connection
2410  * for a received segment.
2411  *
2412  * Called by tcp_abort() (to abort a local connection), tcp_input() (if no
2413  * matching local pcb was found), tcp_listen_input() (if incoming segment
2414  * has ACK flag set) and tcp_process() (received segment in the wrong state)
2415  *
2416  * Since a RST segment is in most cases not sent for an active connection,
2417  * tcp_rst() has a number of arguments that are taken from a tcp_pcb for
2418  * most other segment output functions.
2419  *
2420  * @param pcb TCP pcb (may be NULL if no pcb is available)
2421  * @param seqno the sequence number to use for the outgoing segment
2422  * @param ackno the acknowledge number to use for the outgoing segment
2423  * @param local_ip the local IP address to send the segment from
2424  * @param remote_ip the remote IP address to send the segment to
2425  * @param local_port the local TCP port to send the segment from
2426  * @param remote_port the remote TCP port to send the segment to
2427  */
2428 void
tcp_rst(struct tcp_pcb * pcb,u32_t seqno,u32_t ackno,const ip_addr_t * local_ip,const ip_addr_t * remote_ip,u16_t local_port,u16_t remote_port)2429 tcp_rst(struct tcp_pcb *pcb, u32_t seqno, u32_t ackno,
2430         const ip_addr_t *local_ip, const ip_addr_t *remote_ip,
2431         u16_t local_port, u16_t remote_port)
2432 {
2433   struct pbuf *p;
2434   u16_t wnd;
2435   u8_t optlen;
2436 
2437   LWIP_ASSERT("tcp_rst: invalid local_ip", local_ip != NULL);
2438   LWIP_ASSERT("tcp_rst: invalid remote_ip", remote_ip != NULL);
2439 
2440   optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(0, pcb);
2441 
2442 #if LWIP_WND_SCALE
2443   wnd = PP_HTONS(((TCP_WND >> TCP_RCV_SCALE) & 0xFFFF));
2444 #else
2445   wnd = PP_HTONS(TCP_WND);
2446 #endif
2447 
2448   p = tcp_output_alloc_header_common(ackno, optlen, 0, lwip_htonl(seqno), local_port,
2449     remote_port, TCP_RST | TCP_ACK, wnd);
2450   if (p == NULL) {
2451     LWIP_DEBUGF(TCP_DEBUG, ("tcp_rst: could not allocate memory for pbuf\n"));
2452     return;
2453   }
2454   tcp_output_fill_options(pcb, p, 0, 0);
2455 
2456   MIB2_STATS_INC(mib2.tcpoutrsts);
2457 
2458   tcp_output_control_segment(pcb, p, local_ip, remote_ip);
2459   LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_rst: seqno %"U32_F" ackno %"U32_F".\n", seqno, ackno));
2460 }
2461 
2462 /**
2463  * Send an ACK without data.
2464  *
2465  * @param pcb Protocol control block for the TCP connection to send the ACK
2466  */
2467 err_t
tcp_send_empty_ack(struct tcp_pcb * pcb)2468 tcp_send_empty_ack(struct tcp_pcb *pcb)
2469 {
2470   err_t err;
2471   struct pbuf *p;
2472   u8_t optlen, optflags = 0;
2473   u8_t num_sacks = 0;
2474 
2475   LWIP_ASSERT("tcp_send_empty_ack: invalid pcb", pcb != NULL);
2476 
2477 #if DRIVER_STATUS_CHECK
2478   if (pcb->drv_status == DRV_NOT_READY) {
2479     return ERR_RTE;
2480   }
2481 #endif
2482 
2483 #if LWIP_TCP_TIMESTAMPS
2484   if (pcb->flags & TF_TIMESTAMP) {
2485     optflags = TF_SEG_OPTS_TS;
2486   }
2487 #endif
2488   optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb);
2489 #if LWIP_SACK
2490   if (pcb->flags & TF_SACK) {
2491     num_sacks = tcp_get_sack_block_count_for_send(pcb, optlen);
2492     optlen = (u8_t)(optlen + LWIP_TCP_SACK_OPT_LENGTH(num_sacks));
2493     optflags |= TF_SEG_OPTS_SACK;
2494   }
2495 #endif
2496 
2497   p = tcp_output_alloc_header(pcb, optlen, 0, lwip_htonl(pcb->snd_nxt));
2498   if (p == NULL) {
2499     /* let tcp_fasttmr retry sending this ACK */
2500     tcp_set_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
2501     LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output: (ACK) could not allocate pbuf\n"));
2502     return ERR_BUF;
2503   }
2504   tcp_output_fill_options(pcb, p, optflags, num_sacks);
2505 
2506 #if LWIP_TCP_TIMESTAMPS
2507   pcb->ts_lastacksent = pcb->rcv_nxt;
2508 #endif
2509 
2510   LWIP_DEBUGF(TCP_OUTPUT_DEBUG,
2511               ("tcp_output: sending ACK for %"U32_F"\n", pcb->rcv_nxt));
2512   err = tcp_output_control_segment(pcb, p, &pcb->local_ip, &pcb->remote_ip);
2513   if (err != ERR_OK) {
2514     /* let tcp_fasttmr retry sending this ACK */
2515     tcp_set_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
2516   } else {
2517     /* remove ACK flags from the PCB, as we sent an empty ACK now */
2518     tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
2519   }
2520 
2521   return err;
2522 }
2523 
2524 /**
2525  * Send keepalive packets to keep a connection active although
2526  * no data is sent over it.
2527  *
2528  * Called by tcp_slowtmr()
2529  *
2530  * @param pcb the tcp_pcb for which to send a keepalive packet
2531  */
2532 err_t
tcp_keepalive(struct tcp_pcb * pcb)2533 tcp_keepalive(struct tcp_pcb *pcb)
2534 {
2535   err_t err;
2536   struct pbuf *p;
2537   u8_t optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(0, pcb);
2538 
2539   LWIP_ASSERT("tcp_keepalive: invalid pcb", pcb != NULL);
2540 
2541   LWIP_DEBUGF(TCP_DEBUG, ("tcp_keepalive: sending KEEPALIVE probe to "));
2542   ip_addr_debug_print_val(TCP_DEBUG, pcb->remote_ip);
2543   LWIP_DEBUGF(TCP_DEBUG, ("\n"));
2544 
2545   LWIP_DEBUGF(TCP_DEBUG, ("tcp_keepalive: tcp_ticks %"U32_F"   pcb->tmr %"U32_F" pcb->keep_cnt_sent %"U16_F"\n",
2546                           tcp_ticks, pcb->tmr, (u16_t)pcb->keep_cnt_sent));
2547 
2548   p = tcp_output_alloc_header(pcb, optlen, 0, lwip_htonl(pcb->snd_nxt - 1));
2549   if (p == NULL) {
2550     LWIP_DEBUGF(TCP_DEBUG,
2551                 ("tcp_keepalive: could not allocate memory for pbuf\n"));
2552     return ERR_MEM;
2553   }
2554   tcp_output_fill_options(pcb, p, 0, 0);
2555   err = tcp_output_control_segment(pcb, p, &pcb->local_ip, &pcb->remote_ip);
2556 
2557   LWIP_DEBUGF(TCP_DEBUG, ("tcp_keepalive: seqno %"U32_F" ackno %"U32_F" err %d.\n",
2558                           pcb->snd_nxt - 1, pcb->rcv_nxt, (int)err));
2559   return err;
2560 }
2561 
2562 /**
2563  * Send persist timer zero-window probes to keep a connection active
2564  * when a window update is lost.
2565  *
2566  * Called by tcp_slowtmr()
2567  *
2568  * @param pcb the tcp_pcb for which to send a zero-window probe packet
2569  */
2570 err_t
tcp_zero_window_probe(struct tcp_pcb * pcb)2571 tcp_zero_window_probe(struct tcp_pcb *pcb)
2572 {
2573   err_t err;
2574   struct pbuf *p;
2575   struct tcp_hdr *tcphdr;
2576   struct tcp_seg *seg;
2577   u8_t is_fin;
2578   u32_t seq;
2579   u8_t optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(0, pcb);
2580 
2581   LWIP_ASSERT("tcp_zero_window_probe: invalid pcb", pcb != NULL);
2582 
2583   LWIP_DEBUGF(TCP_DEBUG, ("tcp_zero_window_probe: sending ZERO WINDOW probe to "));
2584   ip_addr_debug_print_val(TCP_DEBUG, pcb->remote_ip);
2585   LWIP_DEBUGF(TCP_DEBUG, ("\n"));
2586 
2587   LWIP_DEBUGF(TCP_DEBUG,
2588               ("tcp_zero_window_probe: tcp_ticks %"U32_F
2589                "   pcb->tmr %"U32_F" pcb->keep_cnt_sent %"U16_F"\n",
2590                tcp_ticks, pcb->tmr, (u16_t)pcb->keep_cnt_sent));
2591 
2592   /* Only consider unsent, persist timer should be off when there is data in-flight */
2593   seg = pcb->unsent;
2594   if (seg == NULL) {
2595     /* Not expected, persist timer should be off when the send buffer is empty */
2596     return ERR_OK;
2597   }
2598 
2599   /* increment probe count. NOTE: we record probe even if it fails
2600      to actually transmit due to an error. This ensures memory exhaustion/
2601      routing problem doesn't leave a zero-window pcb as an indefinite zombie.
2602      RTO mechanism has similar behavior, see pcb->nrtx */
2603   if (pcb->persist_probe < 0xFF) {
2604     ++pcb->persist_probe;
2605   }
2606 
2607   is_fin = ((TCPH_FLAGS(seg->tcphdr) & TCP_FIN) != 0) && (seg->len == 0);
2608   seq = is_fin ? seg->tcphdr->seqno : lwip_htonl(pcb->snd_nxt - 1);
2609   /* Change the behaviour of zero window probe from send 1 byte payload to 0 byte payload probe.
2610      The implementation is RFC non compliance(RFC793 section 3.7), but the behaviour is aligned with Linux
2611      It will help future code maintance. */
2612   /* we want to send one seqno: either FIN or data (no options) */
2613   p = tcp_output_alloc_header(pcb, optlen, 0, seq);
2614   if (p == NULL) {
2615     LWIP_DEBUGF(TCP_DEBUG, ("tcp_zero_window_probe: no memory for pbuf\n"));
2616     return ERR_MEM;
2617   }
2618   tcphdr = (struct tcp_hdr *)p->payload;
2619 
2620   if (is_fin) {
2621     /* FIN segment, no data */
2622     TCPH_FLAGS_SET(tcphdr, TCP_ACK | TCP_FIN);
2623   }
2624 
2625   tcp_output_fill_options(pcb, p, 0, 0);
2626 
2627   err = tcp_output_control_segment(pcb, p, &pcb->local_ip, &pcb->remote_ip);
2628 
2629   LWIP_DEBUGF(TCP_DEBUG, ("tcp_zero_window_probe: seqno %"U32_F
2630                           " ackno %"U32_F" err %d.\n",
2631                           pcb->snd_nxt - 1, pcb->rcv_nxt, (int)err));
2632   return err;
2633 }
2634 
2635 #if DRIVER_STATUS_CHECK
2636 #if LWIP_SACK
tcp_search_and_flush_sack_on_wake_queue(struct tcp_pcb * pcb,u32_t sack_type)2637 static void tcp_search_and_flush_sack_on_wake_queue(struct tcp_pcb *pcb, u32_t sack_type)
2638 {
2639   struct tcp_seg *sack_seg = pcb->unacked;
2640   while ((sack_seg != NULL) && (TCP_SEQ_LT(ntohl(sack_seg->tcphdr->seqno), pcb->high_sacked))) {
2641     if (sack_seg->seg_type == sack_type) {
2642       LWIP_DEBUGF(DRV_STS_DEBUG, ("Retransmitting unsacked segment seq num(%u) of type %d\n",
2643                                   ntohl(sack_seg->tcphdr->seqno), sack_type));
2644       /* flush packet and clear state */
2645       (void)tcp_output_segment(sack_seg, pcb, NULL);
2646       sack_seg->seg_type = SEG_TYPE_NONE;
2647     }
2648     sack_seg = sack_seg->next;
2649   }
2650 
2651   return;
2652 }
2653 #endif
2654 
tcp_flush_pcb_on_wake_queue(struct tcp_pcb * pcb,u8_t status)2655 void tcp_flush_pcb_on_wake_queue(struct tcp_pcb *pcb, u8_t status)
2656 {
2657   /* update driver status for that PCB */
2658   pcb->drv_status = status;
2659 
2660   LWIP_DEBUGF(DRV_STS_DEBUG, ("Flushing PCB  Local Port: %u) on Wake Queue\n", pcb->local_port));
2661   ip_addr_debug_print_val(DRV_STS_DEBUG, pcb->local_ip);
2662 
2663   if (pcb->flags & TF_RST_ON_DRV_WAKE) {
2664     LWIP_DEBUGF(DRV_STS_DEBUG, ("Sending Reset on Wake Queue\n"));
2665     tcp_rst(pcb, pcb->snd_nxt, pcb->rcv_nxt, &pcb->local_ip, &pcb->remote_ip,
2666             pcb->local_port, pcb->remote_port);
2667     return;
2668   }
2669 
2670   /* Send empty ACK if no data packets */
2671   if ((pcb->unsent == NULL) && (pcb->flags & (TF_ACK_NOW | TF_ACK_DELAY))) {
2672     LWIP_DEBUGF(DRV_STS_DEBUG, ("Sending Empty ACK on Wake Queue\n"));
2673     (void)tcp_send_empty_ack(pcb);
2674   }
2675 
2676   /* If Data Piggy back is disabled, then send SACK Options in empty ACK */
2677 #if !LWIP_SACK_DATA_SEG_PIGGYBACK
2678   if (pcb->ooseq != NULL) {
2679     LWIP_DEBUGF(DRV_STS_DEBUG, ("Sending Empty ACK on Wake Queue\n"));
2680     (void)tcp_send_empty_ack(pcb);
2681   }
2682 #endif
2683 
2684 #if LWIP_SACK
2685   /* flush all unsacked packets as per loss recovery algorithm */
2686   if (pcb->unacked) {
2687     u32_t next_type = FAST_RETX_SEG;
2688     LWIP_DEBUGF(DRV_STS_DEBUG, ("Going to flush UnackedQueue\n"));
2689     /* seach for all segments one by */
2690     do {
2691       tcp_search_and_flush_sack_on_wake_queue(pcb, next_type);
2692       switch (next_type) {
2693         case FAST_RETX_SEG:
2694           next_type = UNSACKED_AND_LOST_SEG;
2695           break;
2696         case UNSACKED_AND_LOST_SEG:
2697           next_type = UNSENT_SEG;
2698           /* flush tcp queue */
2699           (void)tcp_output(pcb);
2700           /* fall-through */
2701         case UNSENT_SEG:
2702           next_type = UNSACKED_SEG;
2703           break;
2704         case UNSACKED_SEG:
2705           next_type = RESCUE_RX_SEG;
2706           break;
2707         case RESCUE_RX_SEG:
2708           next_type = SEG_TYPE_NONE;
2709           /* fall-through */
2710         default:
2711           break;
2712       }
2713 
2714       /* break loop if seg_type is none */
2715     } while (next_type != SEG_TYPE_NONE);
2716     LWIP_DEBUGF(DRV_STS_DEBUG, ("UnackedQueue flushed\n"));
2717   } else {
2718     LWIP_DEBUGF(DRV_STS_DEBUG, ("Going to flush TCP unsent\n"));
2719     /* flush tcp queue */
2720     (void)tcp_output(pcb);
2721     LWIP_DEBUGF(DRV_STS_DEBUG, ("Flushed TCP unsent\n"));
2722   }
2723 #else
2724   LWIP_DEBUGF(DRV_STS_DEBUG, ("Going to flush TCP unsent\n"));
2725   /* Flush unsent segments */
2726   (void)tcp_output(pcb);
2727   LWIP_DEBUGF(DRV_STS_DEBUG, ("Flushed TCP unsent\n"));
2728 #endif
2729 
2730   return;
2731 }
2732 #endif
2733 
2734 #endif /* LWIP_TCP */
2735