1 /**
2 * @file
3 * Transmission Control Protocol, outgoing traffic
4 *
5 * The output functions of TCP.
6 *
7 * There are two distinct ways for TCP segments to get sent:
8 * - queued data: these are segments transferring data or segments containing
9 * SYN or FIN (which both count as one sequence number). They are created as
10 * struct @ref pbuf together with a struct tcp_seg and enqueue to the
11 * unsent list of the pcb. They are sent by tcp_output:
12 * - @ref tcp_write : creates data segments
13 * - @ref tcp_split_unsent_seg : splits a data segment
14 * - @ref tcp_enqueue_flags : creates SYN-only or FIN-only segments
15 * - @ref tcp_output / tcp_output_segment : finalize the tcp header
16 * (e.g. sequence numbers, options, checksum) and output to IP
17 * - the various tcp_rexmit functions shuffle around segments between the
18 * unsent an unacked lists to retransmit them
19 * - tcp_create_segment and tcp_pbuf_prealloc allocate pbuf and
20 * segment for these functions
21 * - direct send: these segments don't contain data but control the connection
22 * behaviour. They are created as pbuf only and sent directly without
23 * enqueueing them:
24 * - @ref tcp_send_empty_ack sends an ACK-only segment
25 * - @ref tcp_rst sends a RST segment
26 * - @ref tcp_keepalive sends a keepalive segment
27 * - @ref tcp_zero_window_probe sends a window probe segment
28 * - tcp_output_alloc_header allocates a header-only pbuf for these functions
29 */
30
31 /*
32 * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
33 * All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without modification,
36 * are permitted provided that the following conditions are met:
37 *
38 * 1. Redistributions of source code must retain the above copyright notice,
39 * this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright notice,
41 * this list of conditions and the following disclaimer in the documentation
42 * and/or other materials provided with the distribution.
43 * 3. The name of the author may not be used to endorse or promote products
44 * derived from this software without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
47 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
48 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
49 * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
50 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
51 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
54 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
55 * OF SUCH DAMAGE.
56 *
57 * This file is part of the lwIP TCP/IP stack.
58 *
59 * Author: Adam Dunkels <adam@sics.se>
60 *
61 */
62
63 #include "lwip/opt.h"
64
65 #if LWIP_TCP /* don't build if not configured for use in lwipopts.h */
66
67 #include "lwip/priv/tcp_priv.h"
68 #include "lwip/def.h"
69 #include "lwip/mem.h"
70 #include "lwip/memp.h"
71 #include "lwip/ip_addr.h"
72 #include "lwip/netif.h"
73 #include "lwip/inet_chksum.h"
74 #include "lwip/stats.h"
75 #include "lwip/ip6.h"
76 #include "lwip/ip6_addr.h"
77 #include "lwip/sys.h"
78 #include "lwip/tcp_sack.h"
79
80 #include <string.h>
81
82 #ifdef LWIP_HOOK_FILENAME
83 #include LWIP_HOOK_FILENAME
84 #endif
85
86 /* Allow to add custom TCP header options by defining this hook */
87 #ifdef LWIP_HOOK_TCP_OUT_TCPOPT_LENGTH
88 #define LWIP_TCP_OPT_LENGTH_SEGMENT(flags, pcb) LWIP_HOOK_TCP_OUT_TCPOPT_LENGTH(pcb, LWIP_TCP_OPT_LENGTH(flags))
89 #else
90 #define LWIP_TCP_OPT_LENGTH_SEGMENT(flags, pcb) LWIP_TCP_OPT_LENGTH(flags)
91 #endif
92
93 /* Define some copy-macros for checksum-on-copy so that the code looks
94 nicer by preventing too many ifdef's. */
95 #if TCP_CHECKSUM_ON_COPY
96 #define TCP_DATA_COPY(dst, src, len, seg) do { \
97 tcp_seg_add_chksum(LWIP_CHKSUM_COPY(dst, src, len), \
98 len, &seg->chksum, &seg->chksum_swapped); \
99 seg->flags |= TF_SEG_DATA_CHECKSUMMED; } while(0)
100 #define TCP_DATA_COPY2(dst, src, len, chksum, chksum_swapped) \
101 tcp_seg_add_chksum(LWIP_CHKSUM_COPY(dst, src, len), len, chksum, chksum_swapped);
102 #else /* TCP_CHECKSUM_ON_COPY*/
103 #define TCP_DATA_COPY(dst, src, len, seg) MEMCPY(dst, src, len)
104 #define TCP_DATA_COPY2(dst, src, len, chksum, chksum_swapped) MEMCPY(dst, src, len)
105 #endif /* TCP_CHECKSUM_ON_COPY*/
106
107 /** Define this to 1 for an extra check that the output checksum is valid
108 * (usefule when the checksum is generated by the application, not the stack) */
109 #ifndef TCP_CHECKSUM_ON_COPY_SANITY_CHECK
110 #define TCP_CHECKSUM_ON_COPY_SANITY_CHECK 0
111 #endif
112 /* Allow to override the failure of sanity check from warning to e.g. hard failure */
113 #if TCP_CHECKSUM_ON_COPY_SANITY_CHECK
114 #ifndef TCP_CHECKSUM_ON_COPY_SANITY_CHECK_FAIL
115 #define TCP_CHECKSUM_ON_COPY_SANITY_CHECK_FAIL(msg) LWIP_DEBUGF(TCP_DEBUG | LWIP_DBG_LEVEL_WARNING, msg)
116 #endif
117 #endif
118
119 #if TCP_OVERSIZE
120 /** The size of segment pbufs created when TCP_OVERSIZE is enabled */
121 #ifndef TCP_OVERSIZE_CALC_LENGTH
122 #define TCP_OVERSIZE_CALC_LENGTH(length) ((length) + TCP_OVERSIZE)
123 #endif
124 #endif
125
126 /* tcp_route: common code that returns a fixed bound netif or calls ip_route */
127 static struct netif *
tcp_route(const struct tcp_pcb * pcb,const ip_addr_t * src,const ip_addr_t * dst)128 tcp_route(const struct tcp_pcb *pcb, const ip_addr_t *src, const ip_addr_t *dst)
129 {
130 LWIP_UNUSED_ARG(src); /* in case IPv4-only and source-based routing is disabled */
131
132 if ((pcb != NULL) && (pcb->netif_idx != NETIF_NO_INDEX)) {
133 return netif_get_by_index(pcb->netif_idx);
134 } else {
135 if (pcb == NULL) {
136 return ip_route(src, dst);
137 } else {
138 return ip_route_pcb(dst, (struct ip_pcb*)pcb);
139 }
140 }
141 }
142
143 /**
144 * Create a TCP segment with prefilled header.
145 *
146 * Called by @ref tcp_write, @ref tcp_enqueue_flags and @ref tcp_split_unsent_seg
147 *
148 * @param pcb Protocol control block for the TCP connection.
149 * @param p pbuf that is used to hold the TCP header.
150 * @param hdrflags TCP flags for header.
151 * @param seqno TCP sequence number of this packet
152 * @param optflags options to include in TCP header
153 * @return a new tcp_seg pointing to p, or NULL.
154 * The TCP header is filled in except ackno and wnd.
155 * p is freed on failure.
156 */
157 static struct tcp_seg *
tcp_create_segment(const struct tcp_pcb * pcb,struct pbuf * p,u8_t hdrflags,u32_t seqno,u8_t optflags)158 tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags)
159 {
160 struct tcp_seg *seg;
161 u8_t optlen = (u8_t)(LWIP_TCP_OPT_LENGTH(optflags));
162
163 #if LWIP_SACK_DATA_SEG_PIGGYBACK
164 u8_t cnt = 0;
165
166 if (optflags & TF_SEG_OPTS_SACK_OPTIONS) {
167 cnt = tcp_get_sack_block_count_for_send(pcb, optlen);
168 optlen = (u8_t)(optlen + LWIP_TCP_SACK_OPT_LENGTH(cnt));
169 }
170 #endif
171
172 if ((seg = (struct tcp_seg *)memp_malloc(MEMP_TCP_SEG)) == NULL) {
173 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_create_segment: no memory.\n"));
174 pbuf_free(p);
175 return NULL;
176 }
177 seg->flags = optflags;
178 seg->next = NULL;
179 seg->p = p;
180 LWIP_ASSERT("p->tot_len >= optlen", p->tot_len >= optlen);
181 seg->len = p->tot_len - optlen;
182
183 #if LWIP_SACK
184 tcp_update_sack_fields_for_new_seg(seg);
185 #endif /* LWIP_SACK */
186
187 #if TCP_OVERSIZE_DBGCHECK
188 seg->oversize_left = 0;
189 #endif /* TCP_OVERSIZE_DBGCHECK */
190 #if TCP_CHECKSUM_ON_COPY
191 seg->chksum = 0;
192 seg->chksum_swapped = 0;
193 /* check optflags */
194 LWIP_ASSERT("invalid optflags passed: TF_SEG_DATA_CHECKSUMMED",
195 (optflags & TF_SEG_DATA_CHECKSUMMED) == 0);
196 #endif /* TCP_CHECKSUM_ON_COPY */
197
198 #if LWIP_SACK && DRIVER_STATUS_CHECK
199 seg->seg_type = SEG_TYPE_NONE;
200 #endif
201
202 /* build TCP header */
203 if (pbuf_add_header(p, TCP_HLEN)) {
204 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_create_segment: no room for TCP header in pbuf.\n"));
205 TCP_STATS_INC(tcp.err);
206 tcp_seg_free(seg);
207 return NULL;
208 }
209 seg->tcphdr = (struct tcp_hdr *)seg->p->payload;
210 seg->tcphdr->src = lwip_htons(pcb->local_port);
211 seg->tcphdr->dest = lwip_htons(pcb->remote_port);
212 seg->tcphdr->seqno = lwip_htonl(seqno);
213 /* ackno is set in tcp_output */
214 TCPH_HDRLEN_FLAGS_SET(seg->tcphdr, (5 + optlen / 4), hdrflags);
215 /* wnd and chksum are set in tcp_output */
216 seg->tcphdr->urgp = 0;
217 return seg;
218 }
219
220 /**
221 * Allocate a PBUF_RAM pbuf, perhaps with extra space at the end.
222 *
223 * This function is like pbuf_alloc(layer, length, PBUF_RAM) except
224 * there may be extra bytes available at the end.
225 *
226 * Called by @ref tcp_write
227 *
228 * @param layer flag to define header size.
229 * @param length size of the pbuf's payload.
230 * @param max_length maximum usable size of payload+oversize.
231 * @param oversize pointer to a u16_t that will receive the number of usable tail bytes.
232 * @param pcb The TCP connection that will enqueue the pbuf.
233 * @param apiflags API flags given to tcp_write.
234 * @param first_seg true when this pbuf will be used in the first enqueued segment.
235 */
236 #if TCP_OVERSIZE
237 static struct pbuf *
tcp_pbuf_prealloc(pbuf_layer layer,u16_t length,u16_t max_length,u16_t * oversize,const struct tcp_pcb * pcb,u8_t apiflags,u8_t first_seg)238 tcp_pbuf_prealloc(pbuf_layer layer, u16_t length, u16_t max_length,
239 u16_t *oversize, const struct tcp_pcb *pcb, u8_t apiflags,
240 u8_t first_seg)
241 {
242 struct pbuf *p;
243 u16_t alloc = length;
244
245 LWIP_ASSERT("tcp_pbuf_prealloc: invalid oversize", oversize != NULL);
246 LWIP_ASSERT("tcp_pbuf_prealloc: invalid pcb", pcb != NULL);
247
248 #if LWIP_NETIF_TX_SINGLE_PBUF
249 LWIP_UNUSED_ARG(max_length);
250 LWIP_UNUSED_ARG(pcb);
251 LWIP_UNUSED_ARG(apiflags);
252 LWIP_UNUSED_ARG(first_seg);
253 alloc = max_length;
254 #else /* LWIP_NETIF_TX_SINGLE_PBUF */
255 if (length < max_length) {
256 /* Should we allocate an oversized pbuf, or just the minimum
257 * length required? If tcp_write is going to be called again
258 * before this segment is transmitted, we want the oversized
259 * buffer. If the segment will be transmitted immediately, we can
260 * save memory by allocating only length. We use a simple
261 * heuristic based on the following information:
262 *
263 * Did the user set TCP_WRITE_FLAG_MORE?
264 *
265 * Will the Nagle algorithm defer transmission of this segment?
266 */
267 if ((apiflags & TCP_WRITE_FLAG_MORE) ||
268 (!(pcb->flags & TF_NODELAY) &&
269 (!first_seg ||
270 pcb->unsent != NULL ||
271 pcb->unacked != NULL))) {
272 alloc = LWIP_MIN(max_length, LWIP_MEM_ALIGN_SIZE(TCP_OVERSIZE_CALC_LENGTH(length)));
273 }
274 }
275 #endif /* LWIP_NETIF_TX_SINGLE_PBUF */
276 p = pbuf_alloc(layer, alloc, PBUF_RAM);
277 if (p == NULL) {
278 return NULL;
279 }
280 LWIP_ASSERT("need unchained pbuf", p->next == NULL);
281 *oversize = p->len - length;
282 /* trim p->len to the currently used size */
283 p->len = p->tot_len = length;
284 return p;
285 }
286 #else /* TCP_OVERSIZE */
287 #define tcp_pbuf_prealloc(layer, length, mx, os, pcb, api, fst) pbuf_alloc((layer), (length), PBUF_RAM)
288 #endif /* TCP_OVERSIZE */
289
290 #if TCP_CHECKSUM_ON_COPY
291 /** Add a checksum of newly added data to the segment.
292 *
293 * Called by tcp_write and tcp_split_unsent_seg.
294 */
295 static void
tcp_seg_add_chksum(u16_t chksum,u16_t len,u16_t * seg_chksum,u8_t * seg_chksum_swapped)296 tcp_seg_add_chksum(u16_t chksum, u16_t len, u16_t *seg_chksum,
297 u8_t *seg_chksum_swapped)
298 {
299 u32_t helper;
300 /* add chksum to old chksum and fold to u16_t */
301 helper = chksum + *seg_chksum;
302 chksum = FOLD_U32T(helper);
303 if ((len & 1) != 0) {
304 *seg_chksum_swapped = 1 - *seg_chksum_swapped;
305 chksum = SWAP_BYTES_IN_WORD(chksum);
306 }
307 *seg_chksum = chksum;
308 }
309 #endif /* TCP_CHECKSUM_ON_COPY */
310
311 /** Checks if tcp_write is allowed or not (checks state, snd_buf and snd_queuelen).
312 *
313 * @param pcb the tcp pcb to check for
314 * @param len length of data to send (checked agains snd_buf)
315 * @return ERR_OK if tcp_write is allowed to proceed, another err_t otherwise
316 */
317 static err_t
tcp_write_checks(struct tcp_pcb * pcb,u16_t len)318 tcp_write_checks(struct tcp_pcb *pcb, u16_t len)
319 {
320 LWIP_ASSERT("tcp_write_checks: invalid pcb", pcb != NULL);
321
322 /* connection is in invalid state for data transmission? */
323 if ((pcb->state != ESTABLISHED) &&
324 (pcb->state != CLOSE_WAIT) &&
325 (pcb->state != SYN_SENT) &&
326 (pcb->state != SYN_RCVD)) {
327 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_STATE | LWIP_DBG_LEVEL_SEVERE, ("tcp_write() called in invalid state\n"));
328 return (pcb->state > ESTABLISHED) ? ERR_PIPE : ERR_CONN;
329 } else if (len == 0) {
330 return ERR_OK;
331 }
332
333 /* fail on too much data */
334 if (len > pcb->snd_buf) {
335 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SEVERE, ("tcp_write: too much data (len=%"U16_F" > snd_buf=%"U32_F")\n",
336 len, pcb->snd_buf));
337 tcp_set_flags(pcb, TF_NAGLEMEMERR);
338 return ERR_MEM;
339 }
340
341 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_write: queuelen: %"TCPWNDSIZE_F"\n", (tcpwnd_size_t)pcb->snd_queuelen));
342
343 /* If total number of pbufs on the unsent/unacked queues exceeds the
344 * configured maximum, return an error */
345 /* check for configured max queuelen and possible overflow */
346 if ((pcb->snd_queuelen >= pcb->snd_queuelen_max) || (pcb->snd_queuelen > TCP_SNDQUEUELEN_OVERFLOW)) {
347 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SEVERE,
348 ("tcp_write: too long queue %"TCPWNDSIZE_F" (max %"TCPWNDSIZE_F")\n",
349 pcb->snd_queuelen, pcb->snd_queuelen_max));
350 TCP_STATS_INC(tcp.memerr);
351 tcp_set_flags(pcb, TF_NAGLEMEMERR);
352 return ERR_MEM;
353 }
354 if (pcb->snd_queuelen != 0) {
355 LWIP_ASSERT("tcp_write: pbufs on queue => at least one queue non-empty",
356 pcb->unacked != NULL || pcb->unsent != NULL);
357 } else {
358 LWIP_ASSERT("tcp_write: no pbufs on queue => both queues empty",
359 pcb->unacked == NULL && pcb->unsent == NULL);
360 }
361 return ERR_OK;
362 }
363
364 /**
365 * @ingroup tcp_raw
366 * Write data for sending (but does not send it immediately).
367 *
368 * It waits in the expectation of more data being sent soon (as
369 * it can send them more efficiently by combining them together).
370 * To prompt the system to send data now, call tcp_output() after
371 * calling tcp_write().
372 *
373 * This function enqueues the data pointed to by the argument dataptr. The length of
374 * the data is passed as the len parameter. The apiflags can be one or more of:
375 * - TCP_WRITE_FLAG_COPY: indicates whether the new memory should be allocated
376 * for the data to be copied into. If this flag is not given, no new memory
377 * should be allocated and the data should only be referenced by pointer. This
378 * also means that the memory behind dataptr must not change until the data is
379 * ACKed by the remote host
380 * - TCP_WRITE_FLAG_MORE: indicates that more data follows. If this is omitted,
381 * the PSH flag is set in the last segment created by this call to tcp_write.
382 * If this flag is given, the PSH flag is not set.
383 *
384 * The tcp_write() function will fail and return ERR_MEM if the length
385 * of the data exceeds the current send buffer size or if the length of
386 * the queue of outgoing segment is larger than the upper limit defined
387 * in lwipopts.h. The number of bytes available in the output queue can
388 * be retrieved with the tcp_sndbuf() function.
389 *
390 * The proper way to use this function is to call the function with at
391 * most tcp_sndbuf() bytes of data. If the function returns ERR_MEM,
392 * the application should wait until some of the currently enqueued
393 * data has been successfully received by the other host and try again.
394 *
395 * @param pcb Protocol control block for the TCP connection to enqueue data for.
396 * @param arg Pointer to the data to be enqueued for sending.
397 * @param len Data length in bytes
398 * @param apiflags combination of following flags :
399 * - TCP_WRITE_FLAG_COPY (0x01) data will be copied into memory belonging to the stack
400 * - TCP_WRITE_FLAG_MORE (0x02) for TCP connection, PSH flag will not be set on last segment sent,
401 * @return ERR_OK if enqueued, another err_t on error
402 */
403 err_t
tcp_write(struct tcp_pcb * pcb,const void * arg,u16_t len,u8_t apiflags)404 tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags)
405 {
406 struct pbuf *concat_p = NULL;
407 struct tcp_seg *last_unsent = NULL, *seg = NULL, *prev_seg = NULL, *queue = NULL;
408 u16_t pos = 0; /* position in 'arg' data */
409 tcpwnd_size_t queuelen;
410 u8_t optlen;
411 u8_t optflags = 0;
412 #if TCP_OVERSIZE
413 u16_t oversize = 0;
414 u16_t oversize_used = 0;
415 #if TCP_OVERSIZE_DBGCHECK
416 u16_t oversize_add = 0;
417 #endif /* TCP_OVERSIZE_DBGCHECK*/
418 #endif /* TCP_OVERSIZE */
419 u16_t extendlen = 0;
420 #if TCP_CHECKSUM_ON_COPY
421 u16_t concat_chksum = 0;
422 u8_t concat_chksum_swapped = 0;
423 u16_t concat_chksummed = 0;
424 #endif /* TCP_CHECKSUM_ON_COPY */
425 err_t err;
426 u16_t mss_local;
427 #if LWIP_SACK_DATA_SEG_PIGGYBACK
428 u8_t cnt = 0;
429 #endif
430
431 LWIP_ERROR("tcp_write: invalid pcb", pcb != NULL, return ERR_ARG);
432
433 /* don't allocate segments bigger than half the maximum window we ever received */
434 mss_local = LWIP_MIN(pcb->mss, TCPWND_MIN16(pcb->snd_wnd_max / 2));
435 mss_local = mss_local ? mss_local : pcb->mss;
436
437 LWIP_ASSERT_CORE_LOCKED();
438
439 #if LWIP_NETIF_TX_SINGLE_PBUF
440 /* Always copy to try to create single pbufs for TX */
441 apiflags |= TCP_WRITE_FLAG_COPY;
442 #endif /* LWIP_NETIF_TX_SINGLE_PBUF */
443
444 LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_write(pcb=%p, data=%p, len=%"U16_F", apiflags=%"U16_F")\n",
445 (void *)pcb, arg, len, (u16_t)apiflags));
446 LWIP_ERROR("tcp_write: arg == NULL && len > 0 (programmer violates API)",
447 (arg != NULL) || (len == 0), return ERR_ARG;);
448
449 err = tcp_write_checks(pcb, len);
450 if (err != ERR_OK) {
451 return err;
452 }
453 queuelen = pcb->snd_queuelen;
454
455 #if LWIP_TCP_TIMESTAMPS
456 if ((pcb->flags & TF_TIMESTAMP)) {
457 /* Make sure the timestamp option is only included in data segments if we
458 agreed about it with the remote host. */
459 optflags = TF_SEG_OPTS_TS;
460 optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(TF_SEG_OPTS_TS, pcb);
461 /* ensure that segments can hold at least one data byte... */
462 mss_local = LWIP_MAX(mss_local, LWIP_TCP_OPT_LEN_TS + 1);
463 } else
464 #endif /* LWIP_TCP_TIMESTAMPS */
465 {
466 optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(0, pcb);
467 }
468
469 #if LWIP_SACK_DATA_SEG_PIGGYBACK
470 if (pcb->flags & TF_SACK) {
471 cnt = tcp_get_sack_block_count_for_send(pcb, optlen);
472 if (cnt) {
473 optflags |= TF_SEG_OPTS_SACK_OPTIONS;
474 optlen = (u8_t)(optlen + LWIP_TCP_SACK_OPT_LENGTH(cnt));
475 }
476 }
477 #endif
478
479 /*
480 * TCP segmentation is done in three phases with increasing complexity:
481 *
482 * 1. Copy data directly into an oversized pbuf.
483 * 2. Chain a new pbuf to the end of pcb->unsent.
484 * 3. Create new segments.
485 *
486 * We may run out of memory at any point. In that case we must
487 * return ERR_MEM and not change anything in pcb. Therefore, all
488 * changes are recorded in local variables and committed at the end
489 * of the function. Some pcb fields are maintained in local copies:
490 *
491 * queuelen = pcb->snd_queuelen
492 * oversize = pcb->unsent_oversize
493 *
494 * These variables are set consistently by the phases:
495 *
496 * seg points to the last segment tampered with.
497 *
498 * pos records progress as data is segmented.
499 */
500
501 /* Find the tail of the unsent queue. */
502 if (pcb->unsent != NULL) {
503 u16_t space;
504 u16_t unsent_optlen;
505
506 /* @todo: this could be sped up by keeping last_unsent in the pcb */
507 for (last_unsent = pcb->unsent; last_unsent->next != NULL;
508 last_unsent = last_unsent->next);
509
510 /* Usable space at the end of the last unsent segment */
511 unsent_optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(last_unsent->flags, pcb);
512
513 #if LWIP_SACK_DATA_SEG_PIGGYBACK
514 if (last_unsent->flags & TF_SEG_OPTS_SACK_OPTIONS) {
515 cnt = tcp_get_sack_block_count_for_send(pcb, (u8_t)unsent_optlen);
516 /*
517 Here, it is not possible to predict previous sack blocks, so ensure that overflow does not happen.
518 send whatever SACK blocks which can be sent on wire with available space
519 This validation avoids space underflow...
520 */
521 if (mss_local >= (last_unsent->len + unsent_optlen + LWIP_TCP_SACK_OPT_LENGTH(cnt))) {
522 unsent_optlen = (u8_t)(unsent_optlen + LWIP_TCP_SACK_OPT_LENGTH(cnt));
523 }
524 }
525 #endif
526
527 LWIP_ASSERT("mss_local is too small", mss_local >= last_unsent->len + unsent_optlen);
528 space = mss_local - (last_unsent->len + unsent_optlen);
529
530 /*
531 * Phase 1: Copy data directly into an oversized pbuf.
532 *
533 * The number of bytes copied is recorded in the oversize_used
534 * variable. The actual copying is done at the bottom of the
535 * function.
536 */
537 #if TCP_OVERSIZE
538 #if TCP_OVERSIZE_DBGCHECK
539 /* check that pcb->unsent_oversize matches last_unsent->oversize_left */
540 LWIP_ASSERT("unsent_oversize mismatch (pcb vs. last_unsent)",
541 pcb->unsent_oversize == last_unsent->oversize_left);
542 #endif /* TCP_OVERSIZE_DBGCHECK */
543 oversize = pcb->unsent_oversize;
544 if (oversize > 0) {
545 LWIP_ASSERT("inconsistent oversize vs. space", oversize <= space);
546 seg = last_unsent;
547 oversize_used = LWIP_MIN(space, LWIP_MIN(oversize, len));
548 pos += oversize_used;
549 oversize -= oversize_used;
550 space -= oversize_used;
551 }
552 /* now we are either finished or oversize is zero */
553 LWIP_ASSERT("inconsistent oversize vs. len", (oversize == 0) || (pos == len));
554 #endif /* TCP_OVERSIZE */
555
556 #if !LWIP_NETIF_TX_SINGLE_PBUF
557 /*
558 * Phase 2: Chain a new pbuf to the end of pcb->unsent.
559 *
560 * As an exception when NOT copying the data, if the given data buffer
561 * directly follows the last unsent data buffer in memory, extend the last
562 * ROM pbuf reference to the buffer, thus saving a ROM pbuf allocation.
563 *
564 * We don't extend segments containing SYN/FIN flags or options
565 * (len==0). The new pbuf is kept in concat_p and pbuf_cat'ed at
566 * the end.
567 *
568 * This phase is skipped for LWIP_NETIF_TX_SINGLE_PBUF as we could only execute
569 * it after rexmit puts a segment from unacked to unsent and at this point,
570 * oversize info is lost.
571 */
572 #if LWIP_SACK
573 if ((last_unsent->len >= mss_local) && (pos < len) && (space > 0) && (last_unsent->len > 0))
574 #else
575 if ((pos < len) && (space > 0) && (last_unsent->len > 0))
576 #endif
577 {
578 u16_t seglen = LWIP_MIN(space, len - pos);
579 seg = last_unsent;
580
581 /* Create a pbuf with a copy or reference to seglen bytes. We
582 * can use PBUF_RAW here since the data appears in the middle of
583 * a segment. A header will never be prepended. */
584 if (apiflags & TCP_WRITE_FLAG_COPY) {
585 /* Data is copied */
586 if ((concat_p = tcp_pbuf_prealloc(PBUF_RAW, seglen, space, &oversize, pcb, apiflags, 1)) == NULL) {
587 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
588 ("tcp_write : could not allocate memory for pbuf copy size %"U16_F"\n",
589 seglen));
590 goto memerr;
591 }
592 #if TCP_OVERSIZE_DBGCHECK
593 oversize_add = oversize;
594 #endif /* TCP_OVERSIZE_DBGCHECK */
595 TCP_DATA_COPY2(concat_p->payload, (const u8_t *)arg + pos, seglen, &concat_chksum, &concat_chksum_swapped);
596 #if TCP_CHECKSUM_ON_COPY
597 concat_chksummed += seglen;
598 #endif /* TCP_CHECKSUM_ON_COPY */
599 queuelen += pbuf_clen(concat_p);
600 } else {
601 /* Data is not copied */
602 /* If the last unsent pbuf is of type PBUF_ROM, try to extend it. */
603 struct pbuf *p;
604 for (p = last_unsent->p; p->next != NULL; p = p->next);
605 if (((p->type_internal & (PBUF_TYPE_FLAG_STRUCT_DATA_CONTIGUOUS | PBUF_TYPE_FLAG_DATA_VOLATILE)) == 0) &&
606 (const u8_t *)p->payload + p->len == (const u8_t *)arg) {
607 LWIP_ASSERT("tcp_write: ROM pbufs cannot be oversized", pos == 0);
608 extendlen = seglen;
609 } else {
610 if ((concat_p = pbuf_alloc(PBUF_RAW, seglen, PBUF_ROM)) == NULL) {
611 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
612 ("tcp_write: could not allocate memory for zero-copy pbuf\n"));
613 goto memerr;
614 }
615 /* reference the non-volatile payload data */
616 ((struct pbuf_rom *)concat_p)->payload = (const u8_t *)arg + pos;
617 queuelen += pbuf_clen(concat_p);
618 }
619 #if TCP_CHECKSUM_ON_COPY
620 /* calculate the checksum of nocopy-data */
621 tcp_seg_add_chksum(~inet_chksum((const u8_t *)arg + pos, seglen), seglen,
622 &concat_chksum, &concat_chksum_swapped);
623 concat_chksummed += seglen;
624 #endif /* TCP_CHECKSUM_ON_COPY */
625 }
626
627 pos += seglen;
628 }
629 #endif /* !LWIP_NETIF_TX_SINGLE_PBUF */
630 } else {
631 #if TCP_OVERSIZE
632 LWIP_ASSERT("unsent_oversize mismatch (pcb->unsent is NULL)",
633 pcb->unsent_oversize == 0);
634 #endif /* TCP_OVERSIZE */
635 }
636
637 /*
638 * Phase 3: Create new segments.
639 *
640 * The new segments are chained together in the local 'queue'
641 * variable, ready to be appended to pcb->unsent.
642 */
643 while (pos < len) {
644 struct pbuf *p;
645 u16_t left = len - pos;
646 u16_t max_len = mss_local - optlen;
647 u16_t seglen = LWIP_MIN(left, max_len);
648 #if TCP_CHECKSUM_ON_COPY
649 u16_t chksum = 0;
650 u8_t chksum_swapped = 0;
651 #endif /* TCP_CHECKSUM_ON_COPY */
652
653 if (apiflags & TCP_WRITE_FLAG_COPY) {
654 /* If copy is set, memory should be allocated and data copied
655 * into pbuf */
656 if ((p = tcp_pbuf_prealloc(PBUF_TRANSPORT, seglen + optlen, mss_local, &oversize, pcb, apiflags, queue == NULL)) == NULL) {
657 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_write : could not allocate memory for pbuf copy size %"U16_F"\n", seglen));
658 goto memerr;
659 }
660 LWIP_ASSERT("tcp_write: check that first pbuf can hold the complete seglen",
661 (p->len >= seglen));
662 TCP_DATA_COPY2((char *)p->payload + optlen, (const u8_t *)arg + pos, seglen, &chksum, &chksum_swapped);
663 } else {
664 /* Copy is not set: First allocate a pbuf for holding the data.
665 * Since the referenced data is available at least until it is
666 * sent out on the link (as it has to be ACKed by the remote
667 * party) we can safely use PBUF_ROM instead of PBUF_REF here.
668 */
669 struct pbuf *p2;
670 #if TCP_OVERSIZE
671 LWIP_ASSERT("oversize == 0", oversize == 0);
672 #endif /* TCP_OVERSIZE */
673 if ((p2 = pbuf_alloc(PBUF_TRANSPORT, seglen, PBUF_ROM)) == NULL) {
674 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_write: could not allocate memory for zero-copy pbuf\n"));
675 goto memerr;
676 }
677 #if TCP_CHECKSUM_ON_COPY
678 /* calculate the checksum of nocopy-data */
679 chksum = ~inet_chksum((const u8_t *)arg + pos, seglen);
680 if (seglen & 1) {
681 chksum_swapped = 1;
682 chksum = SWAP_BYTES_IN_WORD(chksum);
683 }
684 #endif /* TCP_CHECKSUM_ON_COPY */
685 /* reference the non-volatile payload data */
686 ((struct pbuf_rom *)p2)->payload = (const u8_t *)arg + pos;
687
688 /* Second, allocate a pbuf for the headers. */
689 if ((p = pbuf_alloc(PBUF_TRANSPORT, optlen, PBUF_RAM)) == NULL) {
690 /* If allocation fails, we have to deallocate the data pbuf as
691 * well. */
692 pbuf_free(p2);
693 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_write: could not allocate memory for header pbuf\n"));
694 goto memerr;
695 }
696 /* Concatenate the headers and data pbufs together. */
697 pbuf_cat(p/*header*/, p2/*data*/);
698 }
699
700 queuelen += pbuf_clen(p);
701
702 /* Now that there are more segments queued, we check again if the
703 * length of the queue exceeds the configured maximum or
704 * overflows. */
705 if ((queuelen > pcb->snd_queuelen_max) || (queuelen > TCP_SNDQUEUELEN_OVERFLOW)) {
706 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
707 ("tcp_write: queue too long %"TCPWNDSIZE_F" (%"TCPWNDSIZE_F")\n",
708 queuelen, pcb->snd_queuelen_max));
709 pbuf_free(p);
710 goto memerr;
711 }
712
713 if ((seg = tcp_create_segment(pcb, p, 0, pcb->snd_lbb + pos, optflags)) == NULL) {
714 goto memerr;
715 }
716 #if TCP_OVERSIZE_DBGCHECK
717 seg->oversize_left = oversize;
718 #endif /* TCP_OVERSIZE_DBGCHECK */
719 #if TCP_CHECKSUM_ON_COPY
720 seg->chksum = chksum;
721 seg->chksum_swapped = chksum_swapped;
722 seg->flags |= TF_SEG_DATA_CHECKSUMMED;
723 #endif /* TCP_CHECKSUM_ON_COPY */
724
725 /* first segment of to-be-queued data? */
726 if (queue == NULL) {
727 queue = seg;
728 } else {
729 /* Attach the segment to the end of the queued segments */
730 LWIP_ASSERT("prev_seg != NULL", prev_seg != NULL);
731 prev_seg->next = seg;
732 }
733 /* remember last segment of to-be-queued data for next iteration */
734 prev_seg = seg;
735
736 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_TRACE, ("tcp_write: queueing %"U32_F":%"U32_F"\n",
737 lwip_ntohl(seg->tcphdr->seqno),
738 lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg)));
739
740 pos += seglen;
741 }
742
743 /*
744 * All three segmentation phases were successful. We can commit the
745 * transaction.
746 */
747 #if TCP_OVERSIZE_DBGCHECK
748 if ((last_unsent != NULL) && (oversize_add != 0)) {
749 last_unsent->oversize_left += oversize_add;
750 }
751 #endif /* TCP_OVERSIZE_DBGCHECK */
752
753 /*
754 * Phase 1: If data has been added to the preallocated tail of
755 * last_unsent, we update the length fields of the pbuf chain.
756 */
757 #if TCP_OVERSIZE
758 if ((last_unsent != NULL) && (oversize_used > 0)) {
759 struct pbuf *p;
760 /* Bump tot_len of whole chain, len of tail */
761 for (p = last_unsent->p; p; p = p->next) {
762 p->tot_len += oversize_used;
763 if (p->next == NULL) {
764 TCP_DATA_COPY((char *)p->payload + p->len, arg, oversize_used, last_unsent);
765 p->len += oversize_used;
766 }
767 }
768 last_unsent->len += oversize_used;
769 #if TCP_OVERSIZE_DBGCHECK
770 LWIP_ASSERT("last_unsent->oversize_left >= oversize_used",
771 last_unsent->oversize_left >= oversize_used);
772 last_unsent->oversize_left -= oversize_used;
773 #endif /* TCP_OVERSIZE_DBGCHECK */
774 }
775 pcb->unsent_oversize = oversize;
776 #endif /* TCP_OVERSIZE */
777
778 /*
779 * Phase 2: concat_p can be concatenated onto last_unsent->p, unless we
780 * determined that the last ROM pbuf can be extended to include the new data.
781 */
782 if (last_unsent != NULL) {
783 if (concat_p != NULL) {
784 LWIP_ASSERT("tcp_write: cannot concatenate when pcb->unsent is empty",
785 (last_unsent != NULL));
786 pbuf_cat(last_unsent->p, concat_p);
787 last_unsent->len += concat_p->tot_len;
788 } else if (extendlen > 0) {
789 struct pbuf *p;
790 LWIP_ASSERT("tcp_write: extension of reference requires reference",
791 last_unsent != NULL && last_unsent->p != NULL);
792 for (p = last_unsent->p; p->next != NULL; p = p->next) {
793 p->tot_len += extendlen;
794 }
795 p->tot_len += extendlen;
796 p->len += extendlen;
797 last_unsent->len += extendlen;
798 }
799 }
800 #if TCP_CHECKSUM_ON_COPY
801 if (concat_chksummed) {
802 LWIP_ASSERT("tcp_write: concat checksum needs concatenated data",
803 concat_p != NULL || extendlen > 0);
804 /*if concat checksumm swapped - swap it back */
805 if (concat_chksum_swapped) {
806 concat_chksum = SWAP_BYTES_IN_WORD(concat_chksum);
807 }
808 if (last_unsent != NULL) {
809 tcp_seg_add_chksum(concat_chksum, concat_chksummed, &last_unsent->chksum,
810 &last_unsent->chksum_swapped);
811 last_unsent->flags |= TF_SEG_DATA_CHECKSUMMED;
812 }
813 }
814 #endif /* TCP_CHECKSUM_ON_COPY */
815
816 /*
817 * Phase 3: Append queue to pcb->unsent. Queue may be NULL, but that
818 * is harmless
819 */
820 if (last_unsent == NULL) {
821 pcb->unsent = queue;
822 } else {
823 last_unsent->next = queue;
824 }
825
826 /*
827 * Finally update the pcb state.
828 */
829 pcb->snd_lbb += len;
830 pcb->snd_buf -= len;
831 pcb->snd_queuelen = queuelen;
832
833 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_write: %"S16_F" (after enqueued)\n",
834 pcb->snd_queuelen));
835 if (pcb->snd_queuelen != 0) {
836 LWIP_ASSERT("tcp_write: valid queue length",
837 pcb->unacked != NULL || pcb->unsent != NULL);
838 }
839
840 /* Set the PSH flag in the last segment that we enqueued. */
841 if (seg != NULL && seg->tcphdr != NULL && ((apiflags & TCP_WRITE_FLAG_MORE) == 0)) {
842 TCPH_SET_FLAG(seg->tcphdr, TCP_PSH);
843 }
844
845 return ERR_OK;
846 memerr:
847 tcp_set_flags(pcb, TF_NAGLEMEMERR);
848 TCP_STATS_INC(tcp.memerr);
849
850 if (concat_p != NULL) {
851 pbuf_free(concat_p);
852 }
853 if (queue != NULL) {
854 tcp_segs_free(queue);
855 }
856 if (pcb->snd_queuelen != 0) {
857 LWIP_ASSERT("tcp_write: valid queue length", pcb->unacked != NULL ||
858 pcb->unsent != NULL);
859 }
860 LWIP_DEBUGF(TCP_QLEN_DEBUG | LWIP_DBG_STATE, ("tcp_write: %"TCPWNDSIZE_F" (with mem err)\n", pcb->snd_queuelen));
861 return ERR_MEM;
862 }
863
864 /**
865 * Split segment on the head of the unsent queue. If return is not
866 * ERR_OK, existing head remains intact
867 *
868 * The split is accomplished by creating a new TCP segment and pbuf
869 * which holds the remainder payload after the split. The original
870 * pbuf is trimmed to new length. This allows splitting of read-only
871 * pbufs
872 *
873 * @param pcb the tcp_pcb for which to split the unsent head
874 * @param split the amount of payload to remain in the head
875 */
876 err_t
tcp_split_unsent_seg(struct tcp_pcb * pcb,u16_t split)877 tcp_split_unsent_seg(struct tcp_pcb *pcb, u16_t split)
878 {
879 struct tcp_seg *seg = NULL, *useg = NULL;
880 struct pbuf *p = NULL;
881 u8_t optlen;
882 u8_t optflags;
883 u8_t split_flags;
884 u8_t remainder_flags;
885 u16_t remainder;
886 u16_t offset;
887 #if TCP_CHECKSUM_ON_COPY
888 u16_t chksum = 0;
889 u8_t chksum_swapped = 0;
890 struct pbuf *q;
891 #endif /* TCP_CHECKSUM_ON_COPY */
892
893 LWIP_ASSERT("tcp_split_unsent_seg: invalid pcb", pcb != NULL);
894
895 useg = pcb->unsent;
896 if (useg == NULL) {
897 return ERR_MEM;
898 }
899
900 if (split == 0) {
901 LWIP_ASSERT("Can't split segment into length 0", 0);
902 return ERR_VAL;
903 }
904
905 if (useg->len <= split) {
906 return ERR_OK;
907 }
908
909 LWIP_ASSERT("split <= mss", split <= pcb->mss);
910 LWIP_ASSERT("useg->len > 0", useg->len > 0);
911
912 /* We should check that we don't exceed TCP_SND_QUEUELEN but we need
913 * to split this packet so we may actually exceed the max value by
914 * one!
915 */
916 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_enqueue: split_unsent_seg: %u\n", (unsigned int)pcb->snd_queuelen));
917
918 optflags = useg->flags;
919 #if TCP_CHECKSUM_ON_COPY
920 /* Remove since checksum is not stored until after tcp_create_segment() */
921 optflags &= ~TF_SEG_DATA_CHECKSUMMED;
922 #endif /* TCP_CHECKSUM_ON_COPY */
923 optlen = LWIP_TCP_OPT_LENGTH(optflags);
924 remainder = useg->len - split;
925
926 /* Create new pbuf for the remainder of the split */
927 p = pbuf_alloc(PBUF_TRANSPORT, remainder + optlen, PBUF_RAM);
928 if (p == NULL) {
929 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
930 ("tcp_split_unsent_seg: could not allocate memory for pbuf remainder %u\n", remainder));
931 goto memerr;
932 }
933
934 /* Offset into the original pbuf is past TCP/IP headers, options, and split amount */
935 offset = useg->p->tot_len - useg->len + split;
936 /* Copy remainder into new pbuf, headers and options will not be filled out */
937 if (pbuf_copy_partial(useg->p, (u8_t *)p->payload + optlen, remainder, offset ) != remainder) {
938 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
939 ("tcp_split_unsent_seg: could not copy pbuf remainder %u\n", remainder));
940 goto memerr;
941 }
942 #if TCP_CHECKSUM_ON_COPY
943 /* calculate the checksum on remainder data */
944 tcp_seg_add_chksum(~inet_chksum((const u8_t *)p->payload + optlen, remainder), remainder,
945 &chksum, &chksum_swapped);
946 #endif /* TCP_CHECKSUM_ON_COPY */
947
948 /* Options are created when calling tcp_output() */
949
950 /* Migrate flags from original segment */
951 split_flags = TCPH_FLAGS(useg->tcphdr);
952 remainder_flags = 0; /* ACK added in tcp_output() */
953
954 if (split_flags & TCP_PSH) {
955 split_flags &= ~TCP_PSH;
956 remainder_flags |= TCP_PSH;
957 }
958 if (split_flags & TCP_FIN) {
959 split_flags &= ~TCP_FIN;
960 remainder_flags |= TCP_FIN;
961 }
962 /* SYN should be left on split, RST should not be present with data */
963
964 seg = tcp_create_segment(pcb, p, remainder_flags, lwip_ntohl(useg->tcphdr->seqno) + split, optflags);
965 if (seg == NULL) {
966 p = NULL; /* Freed by tcp_create_segment */
967 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
968 ("tcp_split_unsent_seg: could not create new TCP segment\n"));
969 goto memerr;
970 }
971
972 #if TCP_CHECKSUM_ON_COPY
973 seg->chksum = chksum;
974 seg->chksum_swapped = chksum_swapped;
975 seg->flags |= TF_SEG_DATA_CHECKSUMMED;
976 #endif /* TCP_CHECKSUM_ON_COPY */
977
978 /* Remove this segment from the queue since trimming it may free pbufs */
979 pcb->snd_queuelen -= pbuf_clen(useg->p);
980
981 /* Trim the original pbuf into our split size. At this point our remainder segment must be setup
982 successfully because we are modifying the original segment */
983 pbuf_realloc(useg->p, useg->p->tot_len - remainder);
984 useg->len -= remainder;
985 TCPH_SET_FLAG(useg->tcphdr, split_flags);
986 #if TCP_OVERSIZE_DBGCHECK
987 /* By trimming, realloc may have actually shrunk the pbuf, so clear oversize_left */
988 useg->oversize_left = 0;
989 #endif /* TCP_OVERSIZE_DBGCHECK */
990
991 /* Add back to the queue with new trimmed pbuf */
992 pcb->snd_queuelen += pbuf_clen(useg->p);
993
994 #if TCP_CHECKSUM_ON_COPY
995 /* The checksum on the split segment is now incorrect. We need to re-run it over the split */
996 useg->chksum = 0;
997 useg->chksum_swapped = 0;
998 q = useg->p;
999 offset = q->tot_len - useg->len; /* Offset due to exposed headers */
1000
1001 /* Advance to the pbuf where the offset ends */
1002 while (q != NULL && offset > q->len) {
1003 offset -= q->len;
1004 q = q->next;
1005 }
1006 LWIP_ASSERT("Found start of payload pbuf", q != NULL);
1007 /* Checksum the first payload pbuf accounting for offset, then other pbufs are all payload */
1008 for (; q != NULL; offset = 0, q = q->next) {
1009 tcp_seg_add_chksum(~inet_chksum((const u8_t *)q->payload + offset, q->len - offset), q->len - offset,
1010 &useg->chksum, &useg->chksum_swapped);
1011 }
1012 #endif /* TCP_CHECKSUM_ON_COPY */
1013
1014 /* Update number of segments on the queues. Note that length now may
1015 * exceed TCP_SND_QUEUELEN! We don't have to touch pcb->snd_buf
1016 * because the total amount of data is constant when packet is split */
1017 pcb->snd_queuelen += pbuf_clen(seg->p);
1018
1019 /* Finally insert remainder into queue after split (which stays head) */
1020 seg->next = useg->next;
1021 useg->next = seg;
1022
1023 #if TCP_OVERSIZE
1024 /* If remainder is last segment on the unsent, ensure we clear the oversize amount
1025 * because the remainder is always sized to the exact remaining amount */
1026 if (seg->next == NULL) {
1027 pcb->unsent_oversize = 0;
1028 }
1029 #endif /* TCP_OVERSIZE */
1030
1031 return ERR_OK;
1032 memerr:
1033 TCP_STATS_INC(tcp.memerr);
1034
1035 LWIP_ASSERT("seg == NULL", seg == NULL);
1036 if (p != NULL) {
1037 pbuf_free(p);
1038 }
1039
1040 return ERR_MEM;
1041 }
1042
1043 /**
1044 * Called by tcp_close() to send a segment including FIN flag but not data.
1045 * This FIN may be added to an existing segment or a new, otherwise empty
1046 * segment is enqueued.
1047 *
1048 * @param pcb the tcp_pcb over which to send a segment
1049 * @return ERR_OK if sent, another err_t otherwise
1050 */
1051 err_t
tcp_send_fin(struct tcp_pcb * pcb)1052 tcp_send_fin(struct tcp_pcb *pcb)
1053 {
1054 LWIP_ASSERT("tcp_send_fin: invalid pcb", pcb != NULL);
1055
1056 /* first, try to add the fin to the last unsent segment */
1057 if (pcb->unsent != NULL) {
1058 struct tcp_seg *last_unsent;
1059 for (last_unsent = pcb->unsent; last_unsent->next != NULL;
1060 last_unsent = last_unsent->next);
1061
1062 if ((TCPH_FLAGS(last_unsent->tcphdr) & (TCP_SYN | TCP_FIN | TCP_RST)) == 0) {
1063 /* no SYN/FIN/RST flag in the header, we can add the FIN flag */
1064 TCPH_SET_FLAG(last_unsent->tcphdr, TCP_FIN);
1065 tcp_set_flags(pcb, TF_FIN);
1066 return ERR_OK;
1067 }
1068 }
1069 /* no data, no length, flags, copy=1, no optdata */
1070 return tcp_enqueue_flags(pcb, TCP_FIN);
1071 }
1072
1073 /**
1074 * Enqueue SYN or FIN for transmission.
1075 *
1076 * Called by @ref tcp_connect, tcp_listen_input, and @ref tcp_close
1077 * (via @ref tcp_send_fin)
1078 *
1079 * @param pcb Protocol control block for the TCP connection.
1080 * @param flags TCP header flags to set in the outgoing segment.
1081 */
1082 err_t
tcp_enqueue_flags(struct tcp_pcb * pcb,u8_t flags)1083 tcp_enqueue_flags(struct tcp_pcb *pcb, u8_t flags)
1084 {
1085 struct pbuf *p;
1086 struct tcp_seg *seg;
1087 u8_t optflags = 0;
1088 u8_t optlen = 0;
1089
1090 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_enqueue_flags: queuelen: %"U16_F"\n", (u16_t)pcb->snd_queuelen));
1091
1092 LWIP_ASSERT("tcp_enqueue_flags: need either TCP_SYN or TCP_FIN in flags (programmer violates API)",
1093 (flags & (TCP_SYN | TCP_FIN)) != 0);
1094 LWIP_ASSERT("tcp_enqueue_flags: invalid pcb", pcb != NULL);
1095
1096 /* check for configured max queuelen and possible overflow (FIN flag should always come through!) */
1097 if (((pcb->snd_queuelen >= pcb->snd_queuelen_max) || (pcb->snd_queuelen > TCP_SNDQUEUELEN_OVERFLOW)) &&
1098 ((flags & TCP_FIN) == 0)) {
1099 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SEVERE,
1100 ("tcp_enqueue_flags: too long queue %"TCPWNDSIZE_F" (max %"TCPWNDSIZE_F")\n",
1101 pcb->snd_queuelen, pcb->snd_queuelen_max));
1102 TCP_STATS_INC(tcp.memerr);
1103 pcb->flags |= TF_NAGLEMEMERR;
1104 return ERR_MEM;
1105 }
1106
1107 /* No need to check pcb->snd_queuelen if only SYN or FIN are allowed! */
1108
1109 #if LWIP_SACK_DATA_SEG_PIGGYBACK
1110 if (flags & TCP_FIN) {
1111 if (pcb->flags & TF_SACK) {
1112 optflags |= TF_SEG_OPTS_SACK_OPTIONS;
1113 }
1114 }
1115 #endif
1116
1117 /* Get options for this segment. This is a special case since this is the
1118 only place where a SYN can be sent. */
1119 if (flags & TCP_SYN) {
1120 optflags = TF_SEG_OPTS_MSS;
1121 #if LWIP_WND_SCALE
1122 if ((pcb->state != SYN_RCVD) || (pcb->flags & TF_WND_SCALE)) {
1123 /* In a <SYN,ACK> (sent in state SYN_RCVD), the window scale option may only
1124 be sent if we received a window scale option from the remote host. */
1125 optflags |= TF_SEG_OPTS_WND_SCALE;
1126 }
1127 #endif /* LWIP_WND_SCALE */
1128
1129 #if LWIP_SACK
1130 tcp_enqueue_flags_sack(pcb, &optflags);
1131 #endif
1132 }
1133 #if LWIP_TCP_TIMESTAMPS
1134 if ((pcb->flags & TF_TIMESTAMP) || ((flags & TCP_SYN) && (pcb->state != SYN_RCVD))) {
1135 /* Make sure the timestamp option is only included in data segments if we
1136 agreed about it with the remote host (and in active open SYN segments). */
1137 optflags |= TF_SEG_OPTS_TS;
1138 }
1139 #endif /* LWIP_TCP_TIMESTAMPS */
1140 optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb);
1141
1142 #if LWIP_SACK_DATA_SEG_PIGGYBACK
1143 if (optflags & TF_SEG_OPTS_SACK_OPTIONS) {
1144 u8_t cnt = tcp_get_sack_block_count_for_send(pcb, optlen);
1145 optlen = (u8_t)(optlen + LWIP_TCP_SACK_OPT_LENGTH(cnt));
1146 }
1147 #endif
1148
1149 /* Allocate pbuf with room for TCP header + options */
1150 if ((p = pbuf_alloc(PBUF_TRANSPORT, optlen, PBUF_RAM)) == NULL) {
1151 tcp_set_flags(pcb, TF_NAGLEMEMERR);
1152 TCP_STATS_INC(tcp.memerr);
1153 return ERR_MEM;
1154 }
1155 LWIP_ASSERT("tcp_enqueue_flags: check that first pbuf can hold optlen",
1156 (p->len >= optlen));
1157
1158 /* Allocate memory for tcp_seg, and fill in fields. */
1159 if ((seg = tcp_create_segment(pcb, p, flags, pcb->snd_lbb, optflags)) == NULL) {
1160 tcp_set_flags(pcb, TF_NAGLEMEMERR);
1161 TCP_STATS_INC(tcp.memerr);
1162 return ERR_MEM;
1163 }
1164 LWIP_ASSERT("seg->tcphdr not aligned", ((mem_ptr_t)seg->tcphdr % LWIP_MIN(MEM_ALIGNMENT, 4)) == 0);
1165 LWIP_ASSERT("tcp_enqueue_flags: invalid segment length", seg->len == 0);
1166
1167 LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_TRACE,
1168 ("tcp_enqueue_flags: queueing %"U32_F":%"U32_F" (0x%"X16_F")\n",
1169 lwip_ntohl(seg->tcphdr->seqno),
1170 lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg),
1171 (u16_t)flags));
1172
1173 /* Now append seg to pcb->unsent queue */
1174 if (pcb->unsent == NULL) {
1175 pcb->unsent = seg;
1176 } else {
1177 struct tcp_seg *useg;
1178 for (useg = pcb->unsent; useg->next != NULL; useg = useg->next);
1179 useg->next = seg;
1180 }
1181 #if TCP_OVERSIZE
1182 /* The new unsent tail has no space */
1183 pcb->unsent_oversize = 0;
1184 #endif /* TCP_OVERSIZE */
1185
1186 /* SYN and FIN bump the sequence number */
1187 if ((flags & TCP_SYN) || (flags & TCP_FIN)) {
1188 pcb->snd_lbb++;
1189 /* optlen does not influence snd_buf */
1190 }
1191 if (flags & TCP_FIN) {
1192 tcp_set_flags(pcb, TF_FIN);
1193 }
1194
1195 /* update number of segments on the queues */
1196 pcb->snd_queuelen += pbuf_clen(seg->p);
1197 LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_enqueue_flags: %"S16_F" (after enqueued)\n", pcb->snd_queuelen));
1198 if (pcb->snd_queuelen != 0) {
1199 LWIP_ASSERT("tcp_enqueue_flags: invalid queue length",
1200 pcb->unacked != NULL || pcb->unsent != NULL);
1201 }
1202
1203 return ERR_OK;
1204 }
1205
1206 #if LWIP_TCP_TIMESTAMPS
1207 /* Build a timestamp option (12 bytes long) at the specified options pointer)
1208 *
1209 * @param pcb tcp_pcb
1210 * @param opts option pointer where to store the timestamp option
1211 */
1212 static void
tcp_build_timestamp_option(const struct tcp_pcb * pcb,u32_t * opts)1213 tcp_build_timestamp_option(const struct tcp_pcb *pcb, u32_t *opts)
1214 {
1215 LWIP_ASSERT("tcp_build_timestamp_option: invalid pcb", pcb != NULL);
1216
1217 /* Pad with two NOP options to make everything nicely aligned */
1218 opts[0] = PP_HTONL(0x0101080A);
1219 opts[1] = lwip_htonl(sys_now());
1220 opts[2] = lwip_htonl(pcb->ts_recent);
1221 }
1222 #endif
1223
1224 #if LWIP_WND_SCALE
1225 /** Build a window scale option (3 bytes long) at the specified options pointer)
1226 *
1227 * @param opts option pointer where to store the window scale option
1228 */
1229 static void
tcp_build_wnd_scale_option(u32_t * opts)1230 tcp_build_wnd_scale_option(u32_t *opts)
1231 {
1232 LWIP_ASSERT("tcp_build_wnd_scale_option: invalid opts", opts != NULL);
1233
1234 /* Pad with one NOP option to make everything nicely aligned */
1235 opts[0] = PP_HTONL(0x01030300 | TCP_RCV_SCALE);
1236 }
1237 #endif
1238
1239 #if LWIP_TCP_TLP_SUPPORT
1240 /*
1241 * This is the TLP probe timeout function as in draft-dukkipati-tcpm-tcp-loss-probe-01
1242 *
1243 * When PTO fires:
1244 * (a) If a new previously unsent segment exists:
1245 * -> Transmit new segment.
1246 * -> FlightSize += SMSS. cwnd remains unchanged.
1247 * (b) If no new segment exists:
1248 * -> Retransmit the last segment.
1249 * (c) Increment statistics counter for loss probes.
1250 * (d) If conditions in (2) are satisfied:
1251 * -> Reschedule next PTO.
1252 * Else:
1253 * -> Rearm RTO to fire at epoch 'now+RTO'.
1254 * Note:- TLP was scheduled when the connection was in open-state i.e. the sender has so far
1255 * received in-sequence ACKs with no SACK blocks. But when actually the pto timer fires
1256 * (at the tcp_slowtmr or tcp_fasttmr),
1257 * then there may be chances of receiving the SACK block from peer within that interval. However,
1258 * lwip send out the tail-loss-probe segment here hoping that any additional dupack because of
1259 * the probe segment can trigger early retransmit and have the chance of fat recovery.
1260 * @param pcb Protocol control block for the TCP connection to send data
1261 * @return None
1262 *
1263 */
tcp_pto_fire(struct tcp_pcb * pcb)1264 void tcp_pto_fire(struct tcp_pcb *pcb)
1265 {
1266 struct tcp_seg *unacked = NULL;
1267 u32_t wnd;
1268 err_t err;
1269 u32_t cwnd;
1270
1271 if (!((pcb->tlp_rtx_out == 0) || (pcb->tlp_high_rxt == pcb->snd_nxt))) {
1272 LWIP_DEBUGF(TCP_TLP_DEBUG, ("tcp_pto_fire: tlp_rtx_out %u, tlp_high_rxt %u, snd_nxt %u\n",
1273 pcb->tlp_rtx_out, pcb->tlp_high_rxt, pcb->snd_nxt));
1274 return;
1275 }
1276
1277 LWIP_DEBUGF(TCP_TLP_DEBUG, ("tcp_pto_fire: timer fired\n"));
1278
1279 wnd = LWIP_MIN(pcb->snd_wnd, pcb->cwnd);
1280
1281 /* Currently there is unsent packet but not sent due to cwnd, then force it to send 1 more
1282 if it is due to snd_wnd then, try to send unacked... as sending unsent may be declined by peer due to
1283 insufficient buffer
1284 */
1285 #if defined(LWIP_DEBUG)
1286 if (pcb->unsent) {
1287 LWIP_DEBUGF(TCP_TLP_DEBUG, ("tcp_pto_fire: unsent segment:%"U32_F" Available WND:%"TCPWNDSIZE_F""
1288 "Required WND :%"TCPWNDSIZE_F"\n", lwip_htonl(pcb->unsent->tcphdr->seqno),
1289 wnd, (lwip_ntohl(pcb->unsent->tcphdr->seqno) - pcb->lastack + pcb->unsent->len)));
1290 }
1291 #endif
1292
1293 /* unsent is allowed to be sent only once : There is not clarification from RFC, so probe using unsent only once,
1294 after first probe with unsent, it will be moved to unacked, so use unacked after that
1295 */
1296 if ((pcb->unsent != NULL) &&
1297 (lwip_ntohl(pcb->unsent->tcphdr->seqno) - pcb->lastack + pcb->unsent->len > wnd) &&
1298 (wnd == pcb->cwnd) && (pcb->tlp_rtx_out == 0)) {
1299 cwnd = pcb->cwnd;
1300 TCP_WND_INC(pcb->cwnd, pcb->mss);
1301 LWIP_DEBUGF(TCP_TLP_DEBUG, ("tcp_pto_fire: Probing with unsent segment %"U32_F"\n",
1302 lwip_htonl(pcb->unsent->tcphdr->seqno)));
1303
1304 /* HACK, increase cwnd+1 and after send reduce it by one again */
1305 err = tcp_output(pcb);
1306 pcb->tlp_pto_cnt++;
1307 pcb->cwnd = cwnd; /* restore cwnd */
1308 if (err != ERR_OK) {
1309 return;
1310 }
1311 } else if (pcb->unacked != NULL) { /* if unacked is present */
1312 for (unacked = pcb->unacked; unacked->next != NULL; unacked = unacked->next) ;
1313 LWIP_DEBUGF(TCP_TLP_DEBUG, ("tcp_pto_fire: Probing with unacked segment %"U32_F"\n",
1314 lwip_htonl(unacked->tcphdr->seqno)));
1315 err = tcp_output_segment(unacked, pcb, NULL);
1316 if (err != ERR_OK) {
1317 return;
1318 }
1319 } else {
1320 return;
1321 }
1322
1323 /* tcp_output might have started scheduled pto timer, let it be... */
1324 if (pcb->tlp_rtx_out == 0) {
1325 pcb->tlp_high_rxt = pcb->snd_nxt;
1326 }
1327 pcb->tlp_rtx_out++;
1328
1329 LWIP_DEBUGF(TCP_TLP_DEBUG,
1330 ("tcp_pto_fire: send TLP, pcb %p, tlp_rtx_out %u, tlp_high_rxt %u, tlp_pto_cnt %u, rtime %d\n",
1331 pcb, pcb->tlp_rtx_out, pcb->tlp_high_rxt, pcb->tlp_pto_cnt, pcb->rtime));
1332 tcp_tlp_schedule_probe(pcb, wnd);
1333
1334 return;
1335 }
1336 #endif /* LWIP_TCP_TLP_SUPPORT */
1337
1338 /**
1339 * @ingroup tcp_raw
1340 * Find out what we can send and send it
1341 *
1342 * @param pcb Protocol control block for the TCP connection to send data
1343 * @return ERR_OK if data has been sent or nothing to send
1344 * another err_t on error
1345 */
1346 err_t
tcp_output(struct tcp_pcb * pcb)1347 tcp_output(struct tcp_pcb *pcb)
1348 {
1349 struct tcp_seg *seg, *useg;
1350 u32_t wnd, snd_nxt;
1351 u32_t wnd_ex;
1352 err_t err;
1353 struct netif *netif;
1354 #if TCP_CWND_DEBUG
1355 s16_t i = 0;
1356 #endif /* TCP_CWND_DEBUG */
1357
1358 LWIP_ASSERT_CORE_LOCKED();
1359
1360 LWIP_ASSERT("tcp_output: invalid pcb", pcb != NULL);
1361 /* pcb->state LISTEN not allowed here */
1362 LWIP_ASSERT("don't call tcp_output for listen-pcbs",
1363 pcb->state != LISTEN);
1364
1365 /* First, check if we are invoked by the TCP input processing
1366 code. If so, we do not output anything. Instead, we rely on the
1367 input processing code to call us when input processing is done
1368 with. */
1369 if (tcp_input_pcb == pcb) {
1370 return ERR_OK;
1371 }
1372
1373 #if DRIVER_STATUS_CHECK
1374 if (pcb->drv_status == DRV_NOT_READY) {
1375 LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output: Cannot send data as netif driver is not ready\n"));
1376 /* Drop the packet here and return success */
1377 return ERR_OK;
1378 }
1379 #endif
1380
1381 /*
1382 [RFC 5681 Section 3.2]
1383 The fast retransmit and fast recovery algorithms are implemented
1384 together as follows:
1385 1. On the first and second duplicate ACKs received at a sender, a
1386 TCP SHOULD send a segment of previously unsent data per [RFC3042]
1387 provided that the receiver's advertised window allows, the total
1388 FlightSize would remain less than or equal to cwnd plus 2*SMSS,
1389 and that new data is available for transmission. Further, the
1390 TCP sender MUST NOT change cwnd to reflect these two segments
1391 [RFC3042].
1392 */
1393 wnd_ex = pcb->cwnd;
1394 TCP_WND_INC(wnd_ex, ((pcb->dupacks > 0 && pcb->dupacks <= 2) ?
1395 (tcpwnd_size_t)(pcb->dupacks * pcb->mss) : (tcpwnd_size_t)0));
1396 wnd = LWIP_MIN(pcb->snd_wnd, wnd_ex);
1397
1398 seg = pcb->unsent;
1399
1400 if (seg == NULL) {
1401 LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output: nothing to send (%p)\n",
1402 (void *)pcb->unsent));
1403 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_output: snd_wnd %"TCPWNDSIZE_F
1404 ", cwnd %"TCPWNDSIZE_F", wnd %"U32_F
1405 ", seg == NULL, ack %"U32_F"\n",
1406 pcb->snd_wnd, pcb->cwnd, wnd, pcb->lastack));
1407
1408 /* If the TF_ACK_NOW flag is set and the ->unsent queue is empty, construct
1409 * an empty ACK segment and send it. */
1410 if (pcb->flags & TF_ACK_NOW) {
1411 return tcp_send_empty_ack(pcb);
1412 }
1413 /* nothing to send: shortcut out of here */
1414 goto output_done;
1415 } else {
1416 LWIP_DEBUGF(TCP_CWND_DEBUG,
1417 ("tcp_output: snd_wnd %"TCPWNDSIZE_F", cwnd %"TCPWNDSIZE_F", wnd %"U32_F
1418 ", effwnd %"U32_F", seq %"U32_F", ack %"U32_F"\n",
1419 pcb->snd_wnd, pcb->cwnd, wnd,
1420 lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len,
1421 lwip_ntohl(seg->tcphdr->seqno), pcb->lastack));
1422 }
1423
1424 netif = tcp_route(pcb, &pcb->local_ip, &pcb->remote_ip);
1425 if (netif == NULL) {
1426 return ERR_RTE;
1427 }
1428
1429 /* If we don't have a local IP address, we get one from netif */
1430 if (ip_addr_isany(&pcb->local_ip)) {
1431 const ip_addr_t *local_ip = ip_netif_get_local_ip(netif, &pcb->remote_ip);
1432 if (local_ip == NULL) {
1433 return ERR_RTE;
1434 }
1435 ip_addr_copy(pcb->local_ip, *local_ip);
1436 }
1437
1438 /* Handle the current segment not fitting within the window */
1439 if (lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len > wnd) {
1440 /* Check if we need to start the persistent timer when the next unsent segment
1441 * does not fit within the remaining send window and RTO timer is not running (we
1442 * have no in-flight data). A traditional approach would fill the remaining window
1443 * with part of the unsent segment (which will engage zero-window probing upon
1444 * reception of the zero window update from the receiver). This ensures the
1445 * subsequent window update is reliably received. With the goal of being lightweight,
1446 * we avoid splitting the unsent segment and treat the window as already zero.
1447 */
1448 if (wnd > 0 && wnd == pcb->snd_wnd && pcb->unacked == NULL && pcb->persist_backoff == 0) {
1449 pcb->persist_cnt = 0;
1450 pcb->persist_backoff = 1;
1451 pcb->persist_probe = 0;
1452 }
1453 /* We need an ACK, but can't send data now, so send an empty ACK */
1454 if (pcb->flags & TF_ACK_NOW) {
1455 return tcp_send_empty_ack(pcb);
1456 }
1457 goto output_done;
1458 }
1459 /* Stop persist timer, above conditions are not active */
1460 pcb->persist_backoff = 0;
1461
1462 /* useg should point to last segment on unacked queue */
1463 useg = pcb->unacked;
1464 if (useg != NULL) {
1465 for (; useg->next != NULL; useg = useg->next);
1466 }
1467
1468 /*
1469 [RFC 5681] 4.1. Restarting Idle Connections
1470 [PENDING -Not yet implemented]
1471 A known problem with the TCP congestion control algorithms described
1472 above is that they allow a potentially inappropriate burst of traffic
1473 to be transmitted after TCP has been idle for a relatively long
1474 period of time. After an idle period, TCP cannot use the ACK clock
1475 to strobe new segments into the network, as all the ACKs have drained
1476 from the network. Therefore, as specified above, TCP can potentially
1477 send a cwnd-size line-rate burst into the network after an idle
1478 period. In addition, changing network conditions may have rendered
1479 TCP's notion of the available end-to-end network capacity between two
1480 endpoints, as estimated by cwnd, inaccurate during the course of a
1481 long idle period.
1482
1483 [Jac88] recommends that a TCP use slow start to restart transmission
1484 after a relatively long idle period. Slow start serves to restart
1485 the ACK clock, just as it does at the beginning of a transfer. This
1486 mechanism has been widely deployed in the following manner. When TCP
1487 has not received a segment for more than one retransmission timeout,
1488 cwnd is reduced to the value of the restart window (RW) before
1489 transmission begins.
1490
1491 For the purposes of this standard, we define RW = min(IW,cwnd).
1492
1493 Using the last time a segment was received to determine whether or
1494 not to decrease cwnd can fail to deflate cwnd in the common case of
1495 persistent HTTP connections [HTH98]. In this case, a Web server
1496 receives a request before transmitting data to the Web client. The
1497 reception of the request makes the test for an idle connection fail,
1498 and allows the TCP to begin transmission with a possibly
1499 inappropriately large cwnd.
1500
1501 Therefore, a TCP SHOULD set cwnd to no more than RW before beginning
1502 transmission if the TCP has not sent data in an interval exceeding
1503 the retransmission timeout.
1504 */
1505
1506 /* data available and window allows it to be sent? */
1507 while (seg != NULL &&
1508 #if DRIVER_STATUS_CHECK
1509 pcb->drv_status == DRV_READY &&
1510 #endif /* NETIF DRIVER STATUS */
1511 lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) {
1512 LWIP_ASSERT("RST not expected here!",
1513 (TCPH_FLAGS(seg->tcphdr) & TCP_RST) == 0);
1514 /* Stop sending if the nagle algorithm would prevent it
1515 * Don't stop:
1516 * - if tcp_write had a memory error before (prevent delayed ACK timeout) or
1517 * - if FIN was already enqueued for this PCB (SYN is always alone in a segment -
1518 * either seg->next != NULL or pcb->unacked == NULL;
1519 * RST is no sent using tcp_write/tcp_output.
1520 */
1521 if ((tcp_do_output_nagle(pcb) == 0) &&
1522 ((pcb->flags & (TF_NAGLEMEMERR | TF_FIN)) == 0)) {
1523 break;
1524 }
1525 #if TCP_CWND_DEBUG
1526 LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_output: snd_wnd %"TCPWNDSIZE_F", cwnd %"TCPWNDSIZE_F", wnd %"U32_F", effwnd %"U32_F", seq %"U32_F", ack %"U32_F", i %"S16_F"\n",
1527 pcb->snd_wnd, pcb->cwnd, wnd,
1528 lwip_ntohl(seg->tcphdr->seqno) + seg->len -
1529 pcb->lastack,
1530 lwip_ntohl(seg->tcphdr->seqno), pcb->lastack, i));
1531 ++i;
1532 #endif /* TCP_CWND_DEBUG */
1533
1534 if (pcb->state != SYN_SENT) {
1535 TCPH_SET_FLAG(seg->tcphdr, TCP_ACK);
1536 }
1537
1538 #if TCP_OVERSIZE_DBGCHECK
1539 seg->oversize_left = 0;
1540 #endif /* TCP_OVERSIZE_DBGCHECK */
1541 err = tcp_output_segment(seg, pcb, netif);
1542 if (err != ERR_OK) {
1543 /* segment could not be sent, for whatever reason */
1544 tcp_set_flags(pcb, TF_NAGLEMEMERR);
1545 return err;
1546 }
1547 pcb->unsent = seg->next;
1548 if (pcb->state != SYN_SENT) {
1549 tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
1550 }
1551 snd_nxt = lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg);
1552 if (TCP_SEQ_LT(pcb->snd_nxt, snd_nxt)) {
1553 pcb->snd_nxt = snd_nxt;
1554 if (TCP_TCPLEN(seg) < pcb->mss) {
1555 pcb->snd_sml = snd_nxt;
1556 }
1557 #if LWIP_SACK
1558 if (pcb->flags & TF_SACK) {
1559 pcb->high_data = pcb->snd_nxt - 1;
1560 }
1561 #endif
1562 }
1563 /* put segment on unacknowledged list if length > 0 */
1564 if (TCP_TCPLEN(seg) > 0) {
1565 seg->next = NULL;
1566 /* unacked list is empty? */
1567 if (pcb->unacked == NULL) {
1568 pcb->unacked = seg;
1569 useg = seg;
1570 /* unacked list is not empty? */
1571 } else {
1572 /* In the case of fast retransmit, the packet should not go to the tail
1573 * of the unacked queue, but rather somewhere before it. We need to check for
1574 * this case. -STJ Jul 27, 2004 */
1575 if (useg != NULL) {
1576 if (TCP_SEQ_LT(lwip_ntohl(seg->tcphdr->seqno), lwip_ntohl(useg->tcphdr->seqno))) {
1577 /* add segment to before tail of unacked list, keeping the list sorted */
1578 struct tcp_seg **cur_seg = &(pcb->unacked);
1579 while (*cur_seg &&
1580 TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) {
1581 cur_seg = &((*cur_seg)->next);
1582 }
1583 seg->next = (*cur_seg);
1584 (*cur_seg) = seg;
1585 } else {
1586 /* add segment to tail of unacked list */
1587 useg->next = seg;
1588 useg = useg->next;
1589 }
1590 }
1591 }
1592 /* do not queue empty segments on the unacked list */
1593 } else {
1594 tcp_seg_free(seg);
1595 }
1596 seg = pcb->unsent;
1597
1598 #if LWIP_TCP_TLP_SUPPORT
1599 /* stop In-progressing PTO if new data transmit */
1600 if (pcb->tlp_time_stamp) {
1601 LWIP_TCP_TLP_CLEAR_VARS(pcb);
1602 if ((pcb->unacked != NULL) && (pcb->rtime == -1)) {
1603 pcb->rtime = 0;
1604 }
1605 }
1606
1607 /* draft-dukkipati-tcpm-tcp-loss-probe-01: Schedule PTO after transmission of new data in Open state */
1608 /* lwip violates this draft as PTO still be scheduled in Disorder state */
1609 tcp_tlp_schedule_probe(pcb, wnd);
1610 #endif /* LWIP_TCP_TLP_SUPPORT */
1611 }
1612 output_done:
1613 #if TCP_OVERSIZE
1614 if (pcb->unsent == NULL) {
1615 /* last unsent has been removed, reset unsent_oversize */
1616 pcb->unsent_oversize = 0;
1617 }
1618 #endif /* TCP_OVERSIZE */
1619
1620 tcp_clear_flags(pcb, TF_NAGLEMEMERR);
1621 return ERR_OK;
1622 }
1623
1624 #if LWIP_SACK_DATA_SEG_PIGGYBACK
1625
1626 /*
1627 * Called by tcp_output_segment.
1628 *
1629 * @param seg the tcp_seg to send
1630 * @param pcb the tcp_pcb for the TCP connection used to send the segment
1631 */
1632 u8_t
tcp_check_and_alloc_sack_options(struct tcp_seg * seg,const struct tcp_pcb * pcb)1633 tcp_check_and_alloc_sack_options(struct tcp_seg *seg, const struct tcp_pcb *pcb)
1634 {
1635 struct pbuf *p = NULL;
1636 struct pbuf *p2 = NULL;
1637 u8_t optlen;
1638 u8_t cnt, orig_cnt;
1639 u8_t flags = 0;
1640 struct tcp_hdr *new_tcphdr = NULL;
1641 u16_t offset;
1642 u16_t mss_local = (u16_t)LWIP_MIN(pcb->mss, pcb->snd_wnd_max >> 1);
1643
1644 u16_t alloc_len;
1645 u8_t sack_optlen, orig_sack_optlen;
1646
1647 /* Zero can be given as right operand based on flags. */
1648 optlen = (u8_t)(LWIP_TCP_OPT_LENGTH(seg->flags));
1649
1650 /* Header length - 20 - Other options length excluding SACK Opts */
1651 orig_sack_optlen = (u8_t)((TCPH_HDRLEN_BYTES(seg->tcphdr) - TCP_HLEN) - (LWIP_TCP_OPT_LENGTH(seg->flags)));
1652 /* 4 - First byte(Padding + Kind + Length) 8 - Number of bytes for each SACK Block */
1653 orig_cnt = (u8_t)((orig_sack_optlen >= 4) ? (orig_sack_optlen - 4) / 8 : 0);
1654
1655 cnt = tcp_get_sack_block_count_for_send(pcb, optlen);
1656 sack_optlen = (u8_t)(LWIP_TCP_SACK_OPT_LENGTH(cnt));
1657 if ((seg->len + optlen + sack_optlen) > mss_local) {
1658 sack_optlen = orig_sack_optlen;
1659 }
1660
1661 if (sack_optlen == orig_sack_optlen) {
1662 LWIP_DEBUGF(TCP_SACK_DEBUG, ("SACK count same. Rebuilding seg not required\n"));
1663 return orig_cnt;
1664 }
1665
1666 /* 4 - First byte(Padding + Kind + Length) 8 - Number of bytes for each SACK Block */
1667 cnt = (u8_t)((sack_optlen >= 4) ? (sack_optlen - 4) / 8 : 0);
1668 LWIP_DEBUGF(TCP_SACK_DEBUG, ("Calculated SACK Count: %d SACK Optlen: %d\n", cnt, sack_optlen));
1669
1670 optlen = (u8_t)(optlen + sack_optlen);
1671
1672 if (seg->tcphdr < (struct tcp_hdr *)seg->p->payload) {
1673 /* Sanity check before finding offset below */
1674 return orig_cnt;
1675 }
1676
1677 offset = (u16_t)((u8_t *)seg->tcphdr - (u8_t *)seg->p->payload);
1678 alloc_len = (u16_t)(optlen + (seg->p->len - (offset + TCPH_HDRLEN_BYTES(seg->tcphdr))));
1679
1680 p = pbuf_alloc(PBUF_TRANSPORT, alloc_len, PBUF_RAM);
1681 if (p == NULL) {
1682 return orig_cnt;
1683 }
1684
1685 if (!pbuf_header(p, TCP_HLEN)) {
1686 new_tcphdr = (struct tcp_hdr *)p->payload;
1687 (void)memcpy_s(new_tcphdr, TCP_HLEN, seg->tcphdr, TCP_HLEN);
1688
1689 alloc_len = (u16_t)(alloc_len - optlen); /* options will be filled below */
1690
1691 (void)memcpy_s((char *)new_tcphdr + TCP_HLEN + optlen, alloc_len,
1692 (char *)seg->tcphdr + TCPH_HDRLEN_BYTES(seg->tcphdr), alloc_len);
1693
1694 flags = TCPH_FLAGS(new_tcphdr);
1695 TCPH_HDRLEN_FLAGS_SET(new_tcphdr, (5 + optlen / 4), flags);
1696 p2 = seg->p->next;
1697 while (p2 != NULL) {
1698 pbuf_ref(p2);
1699 pbuf_cat(p, p2);
1700 p2 = p2->next;
1701 }
1702 (void)pbuf_free(seg->p);
1703 seg->p = p;
1704 seg->tcphdr = (struct tcp_hdr *)seg->p->payload;
1705 } else {
1706 LWIP_DEBUGF(TCP_SACK_DEBUG, ("tcp_output_segment: pbuf_alloc failed \n"));
1707 (void)pbuf_free(p);
1708 return orig_cnt;
1709 }
1710 return cnt;
1711 }
1712 #endif
1713
1714 /** Check if a segment's pbufs are used by someone else than TCP.
1715 * This can happen on retransmission if the pbuf of this segment is still
1716 * referenced by the netif driver due to deferred transmission.
1717 * This is the case (only!) if someone down the TX call path called
1718 * pbuf_ref() on one of the pbufs!
1719 *
1720 * @arg seg the tcp segment to check
1721 * @return 1 if ref != 1, 0 if ref == 1
1722 */
1723 static int
tcp_output_segment_busy(const struct tcp_seg * seg)1724 tcp_output_segment_busy(const struct tcp_seg *seg)
1725 {
1726 LWIP_ASSERT("tcp_output_segment_busy: invalid seg", seg != NULL);
1727
1728 /* We only need to check the first pbuf here:
1729 If a pbuf is queued for transmission, a driver calls pbuf_ref(),
1730 which only changes the ref count of the first pbuf */
1731 if (atomic_read(&seg->p->ref) != 1) {
1732 /* other reference found */
1733 return 1;
1734 }
1735 /* no other references found */
1736 return 0;
1737 }
1738
1739 /**
1740 * Called by tcp_output() to actually send a TCP segment over IP.
1741 *
1742 * @param seg the tcp_seg to send
1743 * @param pcb the tcp_pcb for the TCP connection used to send the segment
1744 * @param netif the netif used to send the segment
1745 */
1746 err_t
tcp_output_segment(struct tcp_seg * seg,struct tcp_pcb * pcb,struct netif * netif)1747 tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif)
1748 {
1749 err_t err;
1750 u16_t len;
1751 u32_t *opts;
1752 #if TCP_CHECKSUM_ON_COPY
1753 int seg_chksum_was_swapped = 0;
1754 #endif
1755 #if LWIP_SACK_DATA_SEG_PIGGYBACK
1756 u8_t cnt = 0;
1757 #endif
1758
1759 LWIP_ASSERT("tcp_output_segment: invalid seg", seg != NULL);
1760 LWIP_ASSERT("tcp_output_segment: invalid pcb", pcb != NULL);
1761
1762 if (netif == NULL) {
1763 netif = ip_route_pcb(&pcb->remote_ip, (struct ip_pcb*)pcb);
1764 if (netif == NULL) {
1765 /* Don't even try to send a SYN packet if we have no route since that will fail. */
1766 return ERR_NETUNREACH;
1767 }
1768 }
1769
1770 #if DRIVER_STATUS_CHECK
1771 /* Driver interface is not yet ready. */
1772 if (!(netif->flags & NETIF_FLAG_DRIVER_RDY)) {
1773 /* Update driver status if not ready */
1774 pcb->drv_status = DRV_NOT_READY;
1775 return ERR_RTE;
1776 }
1777 #endif
1778
1779 if (tcp_output_segment_busy(seg)) {
1780 /* This should not happen: rexmit functions should have checked this.
1781 However, since this function modifies p->len, we must not continue in this case. */
1782 LWIP_DEBUGF(TCP_RTO_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_output_segment: segment busy\n"));
1783 return ERR_OK;
1784 }
1785
1786 /* The TCP header has already been constructed, but the ackno and
1787 wnd fields remain. */
1788 seg->tcphdr->ackno = lwip_htonl(pcb->rcv_nxt);
1789
1790 /* advertise our receive window size in this TCP segment */
1791 #if LWIP_WND_SCALE
1792 if (seg->flags & TF_SEG_OPTS_WND_SCALE) {
1793 /* The Window field in a SYN segment itself (the only type where we send
1794 the window scale option) is never scaled. */
1795 seg->tcphdr->wnd = lwip_htons(TCPWND_MIN16(pcb->rcv_ann_wnd));
1796 } else
1797 #endif /* LWIP_WND_SCALE */
1798 {
1799 seg->tcphdr->wnd = lwip_htons(TCPWND_MIN16(RCV_WND_SCALE(pcb, pcb->rcv_ann_wnd)));
1800 }
1801
1802 pcb->rcv_ann_right_edge = pcb->rcv_nxt + pcb->rcv_ann_wnd;
1803
1804 /* Add any requested options. NB MSS option is only set on SYN
1805 packets, so ignore it here */
1806 #if LWIP_SACK_DATA_SEG_PIGGYBACK
1807 if (seg->flags & TF_SEG_OPTS_SACK_OPTIONS) {
1808 cnt = tcp_check_and_alloc_sack_options(seg, pcb);
1809 LWIP_DEBUGF(TCP_SACK_DEBUG, ("Sending SACK Count: %d \n", cnt));
1810 }
1811 #endif
1812
1813 /* Add any requested options. NB MSS option is only set on SYN
1814 packets, so ignore it here */
1815 /* cast through void* to get rid of alignment warnings */
1816 opts = (u32_t *)(void *)(seg->tcphdr + 1);
1817 if (seg->flags & TF_SEG_OPTS_MSS) {
1818 #if LWIP_TCP_MAXSEG
1819 u16_t mss = ((pcb->usr_mss == 0) ? (TCP_MSS) : (pcb->usr_mss));
1820 #else
1821 u16_t mss = TCP_MSS;
1822 #endif /* LWIP_TCP_MAXSEG */
1823 #if TCP_CALCULATE_EFF_SEND_MSS
1824 mss = tcp_eff_send_mss_netif(mss, netif, &pcb->remote_ip);
1825 #endif /* TCP_CALCULATE_EFF_SEND_MSS */
1826 *opts = TCP_BUILD_MSS_OPTION(mss);
1827 opts += 1;
1828 }
1829 #if LWIP_TCP_TIMESTAMPS
1830 pcb->ts_lastacksent = pcb->rcv_nxt;
1831
1832 if (seg->flags & TF_SEG_OPTS_TS) {
1833 tcp_build_timestamp_option(pcb, opts);
1834 opts += 3;
1835 }
1836 #endif
1837 #if LWIP_WND_SCALE
1838 if (seg->flags & TF_SEG_OPTS_WND_SCALE) {
1839 tcp_build_wnd_scale_option(opts);
1840 opts += 1;
1841 }
1842 #endif
1843 #if LWIP_SACK
1844 if (seg->flags & TF_SEG_OPTS_SACK_PERMITTED) {
1845 /* Fix for sending SACK options along with data */
1846 tcp_build_sack_permitted_option(opts);
1847 opts += 1;
1848 }
1849
1850 /* Fix for sending SACK options along with data */
1851 #if LWIP_SACK_DATA_SEG_PIGGYBACK
1852 if (seg->flags & TF_SEG_OPTS_SACK_OPTIONS) {
1853 tcp_build_sack_option(pcb, cnt, opts);
1854 /* Zero can be given at the here, when SACK is not enabled */
1855 opts += (LWIP_TCP_SACK_OPT_LENGTH(cnt)) / sizeof(u32_t);
1856 }
1857 #endif
1858
1859 #if LWIP_SACK_PERF_OPT
1860 /* update packet sequence number */
1861 if (pcb->flags & TF_SACK) {
1862 seg->pkt_trans_seq_cntr = pcb->pkt_seq_num;
1863 pcb->pkt_seq_num++;
1864 }
1865 #endif
1866 #endif /* LWIP_SACK */
1867 /* Set retransmission timer running if it is not currently enabled
1868 This must be set before checking the route. */
1869 if (pcb->rtime < 0) {
1870 pcb->rtime = 0;
1871 }
1872
1873 /* don't make RTT sample if packets are being retransmitted per Karn's Algorithm */
1874 if ((pcb->rttest == 0) && seg->len && TCP_SEQ_LEQ(pcb->snd_nxt, lwip_ntohl(seg->tcphdr->seqno))) {
1875 pcb->rttest = sys_now();
1876 if (pcb->rttest == 0) {
1877 /* "rttest==0" means RTT sample not started */
1878 pcb->rttest = 1;
1879 }
1880 pcb->rtseq = lwip_ntohl(seg->tcphdr->seqno);
1881 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_output_segment: rtseq %"U32_F", rttest %"U32_F"\n", pcb->rtseq, pcb->rttest));
1882 }
1883 LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output_segment: %"U32_F":%"U32_F"\n",
1884 lwip_htonl(seg->tcphdr->seqno), lwip_htonl(seg->tcphdr->seqno) +
1885 seg->len));
1886
1887 len = (u16_t)((u8_t *)seg->tcphdr - (u8_t *)seg->p->payload);
1888 if (len == 0) {
1889 /** Exclude retransmitted segments from this count. */
1890 MIB2_STATS_INC(mib2.tcpoutsegs);
1891 }
1892
1893 seg->p->len -= len;
1894 seg->p->tot_len -= len;
1895 pcb->last_payload_len = seg->len;
1896 seg->p->payload = seg->tcphdr;
1897
1898 seg->tcphdr->chksum = 0;
1899
1900 #ifdef LWIP_HOOK_TCP_OUT_ADD_TCPOPTS
1901 opts = LWIP_HOOK_TCP_OUT_ADD_TCPOPTS(seg->p, seg->tcphdr, pcb, opts);
1902 #endif
1903 LWIP_ASSERT("options not filled", (u8_t *)opts == ((u8_t *)(seg->tcphdr + 1)) + LWIP_TCP_OPT_LENGTH_SEGMENT(seg->flags, pcb));
1904
1905 #if CHECKSUM_GEN_TCP
1906 IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) {
1907 #if TCP_CHECKSUM_ON_COPY
1908 u32_t acc;
1909 #if TCP_CHECKSUM_ON_COPY_SANITY_CHECK
1910 u16_t chksum_slow = ip_chksum_pseudo(seg->p, IP_PROTO_TCP,
1911 seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip);
1912 #endif /* TCP_CHECKSUM_ON_COPY_SANITY_CHECK */
1913 if ((seg->flags & TF_SEG_DATA_CHECKSUMMED) == 0) {
1914 LWIP_ASSERT("data included but not checksummed",
1915 seg->p->tot_len == TCPH_HDRLEN_BYTES(seg->tcphdr));
1916 }
1917
1918 /* rebuild TCP header checksum (TCP header changes for retransmissions!) */
1919 acc = ip_chksum_pseudo_partial(seg->p, IP_PROTO_TCP,
1920 seg->p->tot_len, TCPH_HDRLEN_BYTES(seg->tcphdr), &pcb->local_ip, &pcb->remote_ip);
1921 /* add payload checksum */
1922 if (seg->chksum_swapped) {
1923 seg_chksum_was_swapped = 1;
1924 seg->chksum = SWAP_BYTES_IN_WORD(seg->chksum);
1925 seg->chksum_swapped = 0;
1926 }
1927 acc = (u16_t)~acc + seg->chksum;
1928 seg->tcphdr->chksum = (u16_t)~FOLD_U32T(acc);
1929 #if TCP_CHECKSUM_ON_COPY_SANITY_CHECK
1930 if (chksum_slow != seg->tcphdr->chksum) {
1931 TCP_CHECKSUM_ON_COPY_SANITY_CHECK_FAIL(
1932 ("tcp_output_segment: calculated checksum is %"X16_F" instead of %"X16_F"\n",
1933 seg->tcphdr->chksum, chksum_slow));
1934 seg->tcphdr->chksum = chksum_slow;
1935 }
1936 #endif /* TCP_CHECKSUM_ON_COPY_SANITY_CHECK */
1937 #else /* TCP_CHECKSUM_ON_COPY */
1938 seg->tcphdr->chksum = ip_chksum_pseudo(seg->p, IP_PROTO_TCP,
1939 seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip);
1940 #endif /* TCP_CHECKSUM_ON_COPY */
1941 }
1942 #endif /* CHECKSUM_GEN_TCP */
1943 TCP_STATS_INC(tcp.xmit);
1944
1945 #if LWIP_SO_DONTROUTE
1946 if (ip_get_option((struct ip_pcb *)pcb, SOF_DONTROUTE)) {
1947 seg->p->flags |= PBUF_FLAG_IS_LINK_ONLY;
1948 }
1949 #endif /* LWIP_SO_DONTROUTE */
1950
1951 NETIF_SET_HINTS(netif, &(pcb->netif_hints));
1952
1953 #if LWIP_SO_PRIORITY
1954 seg->p->priority = pcb->priority;
1955 #endif /* LWIP_SO_PRIORITY */
1956
1957 err = ip_output_if(seg->p, &pcb->local_ip, &pcb->remote_ip, pcb->ttl,
1958 pcb->tos, IP_PROTO_TCP, netif);
1959 NETIF_RESET_HINTS(netif);
1960
1961 #if TCP_CHECKSUM_ON_COPY
1962 if (seg_chksum_was_swapped) {
1963 /* if data is added to this segment later, chksum needs to be swapped,
1964 so restore this now */
1965 seg->chksum = SWAP_BYTES_IN_WORD(seg->chksum);
1966 seg->chksum_swapped = 1;
1967 }
1968 #endif
1969
1970 return err;
1971 }
1972
1973 /**
1974 * Requeue all unacked segments for retransmission
1975 *
1976 * Called by tcp_slowtmr() for slow retransmission.
1977 *
1978 * @param pcb the tcp_pcb for which to re-enqueue all unacked segments
1979 */
1980 err_t
tcp_rexmit_rto_prepare(struct tcp_pcb * pcb)1981 tcp_rexmit_rto_prepare(struct tcp_pcb *pcb)
1982 {
1983 struct tcp_seg *seg;
1984
1985 LWIP_ASSERT("tcp_rexmit_rto_prepare: invalid pcb", pcb != NULL);
1986
1987 if (pcb->unacked == NULL) {
1988 return ERR_VAL;
1989 }
1990
1991 /* Move all unacked segments to the head of the unsent queue.
1992 However, give up if any of the unsent pbufs are still referenced by the
1993 netif driver due to deferred transmission. No point loading the link further
1994 if it is struggling to flush its buffered writes. */
1995 for (seg = pcb->unacked; seg->next != NULL; seg = seg->next) {
1996 if (tcp_output_segment_busy(seg)) {
1997 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit_rto: segment busy\n"));
1998 return ERR_VAL;
1999 }
2000
2001 #if LWIP_SACK
2002 #if DRIVER_STATUS_CHECK
2003 /* Clear the loss recovery seg_type */
2004 seg->seg_type = SEG_TYPE_NONE;
2005 #endif /* DRIVER_STATUS_CHECK */
2006 if (seg->state & TF_SEG_SACKED) {
2007 seg->state = (u32_t)(seg->state & (~TF_SEG_SACKED));
2008 }
2009 #endif /* LWIP_SACK */
2010 }
2011 if (tcp_output_segment_busy(seg)) {
2012 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit_rto: segment busy\n"));
2013 return ERR_VAL;
2014 }
2015
2016 #if LWIP_SACK
2017 #if DRIVER_STATUS_CHECK
2018 /* Clear the loss recovery seg_type */
2019 seg->seg_type = SEG_TYPE_NONE;
2020 #endif /* DRIVER_STATUS_CHECK */
2021 if (seg->state & TF_SEG_SACKED) {
2022 seg->state = (u32_t)(seg->state & (~TF_SEG_SACKED));
2023 }
2024 #endif /* LWIP_SACK */
2025 /* concatenate unsent queue after unacked queue */
2026 seg->next = pcb->unsent;
2027 #if TCP_OVERSIZE_DBGCHECK
2028 /* if last unsent changed, we need to update unsent_oversize */
2029 if (pcb->unsent == NULL) {
2030 pcb->unsent_oversize = seg->oversize_left;
2031 }
2032 #endif /* TCP_OVERSIZE_DBGCHECK */
2033 /* unsent queue is the concatenated queue (of unacked, unsent) */
2034 pcb->unsent = pcb->unacked;
2035 /* unacked queue is now empty */
2036 pcb->unacked = NULL;
2037
2038 /* Mark RTO in-progress */
2039 tcp_set_flags(pcb, TF_RTO);
2040 /* Record the next byte following retransmit */
2041 pcb->rto_end = lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg);
2042 /* Don't take any RTT measurements after retransmitting. */
2043 pcb->rttest = 0;
2044
2045 #if LWIP_SACK
2046 /* As per section 5.1 in RFC 6675 */
2047 /* If RTO happens, then need to stop pipe based loss recovery */
2048 /* And need to capture high data to recovery point */
2049 /* New recovery phase should not be initiated untill high ack */
2050 /* is greater than or equal to recovery point. */
2051 if (pcb->flags & TF_SACK) {
2052 pcb->recovery_point = pcb->high_data;
2053 pcb->flags = (tcpflags_t)(pcb->flags & (~TF_IN_SACK_FRLR));
2054 pcb->flags |= TF_IN_SACK_RTO;
2055 LWIP_DEBUGF(TCP_SACK_DEBUG, ("tcp_rexmit_rto: In retransmit timeout\n"));
2056 LWIP_DEBUGF(TCP_SACK_DEBUG, ("tcp_receive : Pipe is %"U32_F
2057 " RecoveryPoint is %"U32_F" HighSacked is %"U32_F" HighData is %"U32_F
2058 " HighRxt is %"U32_F" RescueRxt is %"U32_F"\n", pcb->pipe, pcb->recovery_point,
2059 pcb->high_sacked, pcb->high_data, pcb->high_rxt, pcb->rescue_rxt));
2060 #if LWIP_SACK_PERF_OPT
2061 LWIP_DEBUGF(TCP_SACK_DEBUG, ("Freeing the Fast retransmitted segments\n"));
2062 tcp_fr_segs_free(pcb->fr_segs);
2063 pcb->fr_segs = NULL;
2064 pcb->last_frseg = NULL;
2065 #if LWIP_SACK_CWND_OPT
2066 pcb->recover_cwnd = 0;
2067 pcb->recover_ssthresh = 0;
2068 #endif /* LWIP_SACK_CWND_OPT */
2069 #endif /* LWIP_SACK_PERF_OPT */
2070 pcb->sacked = 0;
2071 #if LWIP_FACK_THRESHOLD_BASED_FR
2072 /* just to make sure that the FACK reflect the forward-most data held by the receiver AT ANY point of time */
2073 pcb->fack = pcb->lastack;
2074 #endif /* LWIP_FACK_THRESHOLD_BASED_FR */
2075 }
2076 #endif /* LWIP_SACK */
2077 return ERR_OK;
2078 }
2079
2080 /**
2081 * Requeue all unacked segments for retransmission
2082 *
2083 * Called by tcp_slowtmr() for slow retransmission.
2084 *
2085 * @param pcb the tcp_pcb for which to re-enqueue all unacked segments
2086 */
2087 void
tcp_rexmit_rto_commit(struct tcp_pcb * pcb)2088 tcp_rexmit_rto_commit(struct tcp_pcb *pcb)
2089 {
2090 LWIP_ASSERT("tcp_rexmit_rto_commit: invalid pcb", pcb != NULL);
2091
2092 /* increment number of retransmissions */
2093 if (pcb->nrtx < 0xFF) {
2094 ++pcb->nrtx;
2095 }
2096 /* Do the actual retransmission */
2097 tcp_output(pcb);
2098 }
2099
2100 /**
2101 * Requeue all unacked segments for retransmission
2102 *
2103 * Called by tcp_process() only, tcp_slowtmr() needs to do some things between
2104 * "prepare" and "commit".
2105 *
2106 * @param pcb the tcp_pcb for which to re-enqueue all unacked segments
2107 */
2108 void
tcp_rexmit_rto(struct tcp_pcb * pcb)2109 tcp_rexmit_rto(struct tcp_pcb *pcb)
2110 {
2111 LWIP_ASSERT("tcp_rexmit_rto: invalid pcb", pcb != NULL);
2112
2113 if (tcp_rexmit_rto_prepare(pcb) == ERR_OK) {
2114 tcp_rexmit_rto_commit(pcb);
2115 }
2116 }
2117
2118 /**
2119 * Requeue the first unacked segment for retransmission
2120 *
2121 * Called by tcp_receive() for fast retransmit.
2122 *
2123 * @param pcb the tcp_pcb for which to retransmit the first unacked segment
2124 */
2125 err_t
tcp_rexmit(struct tcp_pcb * pcb)2126 tcp_rexmit(struct tcp_pcb *pcb)
2127 {
2128 struct tcp_seg *seg;
2129 struct tcp_seg **cur_seg;
2130
2131 LWIP_ASSERT("tcp_rexmit: invalid pcb", pcb != NULL);
2132
2133 if (pcb->unacked == NULL) {
2134 return ERR_VAL;
2135 }
2136
2137 seg = pcb->unacked;
2138
2139 /* Give up if the segment is still referenced by the netif driver
2140 due to deferred transmission. */
2141 if (tcp_output_segment_busy(seg)) {
2142 LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit busy\n"));
2143 return ERR_VAL;
2144 }
2145
2146 /* Move the first unacked segment to the unsent queue */
2147 /* Keep the unsent queue sorted. */
2148 pcb->unacked = seg->next;
2149
2150 cur_seg = &(pcb->unsent);
2151 while (*cur_seg &&
2152 TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) {
2153 cur_seg = &((*cur_seg)->next );
2154 }
2155 seg->next = *cur_seg;
2156 *cur_seg = seg;
2157 #if TCP_OVERSIZE
2158 if (seg->next == NULL) {
2159 /* the retransmitted segment is last in unsent, so reset unsent_oversize */
2160 pcb->unsent_oversize = 0;
2161 }
2162 #endif /* TCP_OVERSIZE */
2163
2164 /* Don't take any rtt measurements after retransmitting. */
2165 pcb->rttest = 0;
2166
2167 /* Do the actual retransmission. */
2168 MIB2_STATS_INC(mib2.tcpretranssegs);
2169 /* No need to call tcp_output: we are always called from tcp_input()
2170 and thus tcp_output directly returns. */
2171 return ERR_OK;
2172 }
2173
2174
2175 /**
2176 * Handle retransmission after three dupacks received
2177 *
2178 * @param pcb the tcp_pcb for which to retransmit the first unacked segment
2179 */
2180 void
tcp_rexmit_fast(struct tcp_pcb * pcb)2181 tcp_rexmit_fast(struct tcp_pcb *pcb)
2182 {
2183 /*
2184 [RFC 6582] Section 3.2. Specification
2185 2) Three duplicate ACKs:
2186 When the third duplicate ACK is received, the TCP sender first
2187 checks the value of recover to see if the Cumulative
2188 Acknowledgment field covers more than recover. If so, the value
2189 of recover is incremented to the value of the highest sequence
2190 number transmitted by the TCP so far. The TCP then enters fast
2191 retransmit (step 2 of Section 3.2 of [RFC5681]). If not, the TCP
2192 does not enter fast retransmit and does not reset ssthresh.
2193
2194 Step 2 above specifies a check that the Cumulative Acknowledgment
2195 field covers more than recover. Because the acknowledgment field
2196 contains the sequence number that the sender next expects to receive,
2197 the acknowledgment "ack_number" covers more than recover when
2198
2199 ack_number - 1 > recover;
2200
2201 i.e., at least one byte more of data is acknowledged beyond the
2202 highest byte that was outstanding when fast retransmit was last
2203 entered.
2204 */
2205 if (pcb->unacked != NULL && !(pcb->flags & TF_INFR) &&
2206 TCP_SEQ_GEQ(pcb->lastack, pcb->fast_recovery_point)) {
2207 /* This is fast retransmit. Retransmit the first unacked segment. */
2208 LWIP_DEBUGF(TCP_FR_DEBUG,
2209 ("tcp_receive: dupacks %"U16_F" (%"U32_F
2210 "), fast retransmit %"U32_F"\n",
2211 (u16_t)pcb->dupacks, pcb->lastack,
2212 lwip_ntohl(pcb->unacked->tcphdr->seqno)));
2213 if (tcp_rexmit(pcb) == ERR_OK) {
2214 /*
2215 [RFC5681] Section 3.2. Fast Retransmit/Fast Recovery
2216
2217 2. When the third duplicate ACK is received, a TCP MUST set ssthresh
2218 to no more than the value given in equation (4). When [RFC3042]
2219 is in use, additional data sent in limited transmit MUST NOT be
2220 included in this calculation.
2221
2222 ssthresh equals to max (FlightSize / 2, 2 * SMSS) (4)
2223
2224 Violation: lwIP keeps ssthresh minimum to (8 * SMSS)
2225
2226 where, as discussed above, FlightSize is the amount of outstanding
2227 data in the network.
2228
2229 Violation: Actualy flighsize is not calculated rather window size used..
2230 */
2231 /* Set ssthresh to half of the minimum of the current
2232 * cwnd and the advertised window */
2233 pcb->ssthresh = (tcpwnd_size_t)(LWIP_MIN(pcb->cwnd, pcb->snd_wnd) / ((tcpwnd_size_t)2));
2234 pcb->ssthresh = (tcpwnd_size_t)(LWIP_MAX(pcb->ssthresh, (tcpwnd_size_t)(pcb->mss << 3))); // 8* SMSS
2235
2236 /*
2237 3. The lost segment starting at SND.UNA MUST be retransmitted and
2238 cwnd set to ssthresh plus 3*SMSS. This artificially "inflates"
2239 the congestion window by the number of segments (three) that have
2240 left the network and which the receiver has buffered.
2241 */
2242 pcb->cwnd = pcb->ssthresh;
2243 TCP_WND_INC(pcb->cwnd, (tcpwnd_size_t)(3 * pcb->mss));
2244
2245 tcp_set_flags(pcb, TF_INFR);
2246 tcp_set_flags(pcb, TF_INFR_FPACK);
2247
2248 /* record recovery point if new Reno enabled */
2249 pcb->fast_recovery_point = pcb->snd_nxt;
2250
2251 /* Reset the retransmission timer to prevent immediate rto retransmissions */
2252 pcb->rtime = 0;
2253 }
2254 }
2255 }
2256
2257 static struct pbuf *
tcp_output_alloc_header_common(u32_t ackno,u16_t optlen,u16_t datalen,u32_t seqno_be,u16_t src_port,u16_t dst_port,u8_t flags,u16_t wnd)2258 tcp_output_alloc_header_common(u32_t ackno, u16_t optlen, u16_t datalen,
2259 u32_t seqno_be /* already in network byte order */,
2260 u16_t src_port, u16_t dst_port, u8_t flags, u16_t wnd)
2261 {
2262 struct tcp_hdr *tcphdr;
2263 struct pbuf *p;
2264
2265 p = pbuf_alloc(PBUF_IP, TCP_HLEN + optlen + datalen, PBUF_RAM);
2266 if (p != NULL) {
2267 LWIP_ASSERT("check that first pbuf can hold struct tcp_hdr",
2268 (p->len >= TCP_HLEN + optlen));
2269 tcphdr = (struct tcp_hdr *)p->payload;
2270 tcphdr->src = lwip_htons(src_port);
2271 tcphdr->dest = lwip_htons(dst_port);
2272 tcphdr->seqno = seqno_be;
2273 tcphdr->ackno = lwip_htonl(ackno);
2274 TCPH_HDRLEN_FLAGS_SET(tcphdr, (5 + optlen / 4), flags);
2275 tcphdr->wnd = lwip_htons(wnd);
2276 tcphdr->chksum = 0;
2277 tcphdr->urgp = 0;
2278 }
2279 return p;
2280 }
2281
2282 /** Allocate a pbuf and create a tcphdr at p->payload, used for output
2283 * functions other than the default tcp_output -> tcp_output_segment
2284 * (e.g. tcp_send_empty_ack, etc.)
2285 *
2286 * @param pcb tcp pcb for which to send a packet (used to initialize tcp_hdr)
2287 * @param optlen length of header-options
2288 * @param datalen length of tcp data to reserve in pbuf
2289 * @param seqno_be seqno in network byte order (big-endian)
2290 * @return pbuf with p->payload being the tcp_hdr
2291 */
2292 static struct pbuf *
tcp_output_alloc_header(struct tcp_pcb * pcb,u16_t optlen,u16_t datalen,u32_t seqno_be)2293 tcp_output_alloc_header(struct tcp_pcb *pcb, u16_t optlen, u16_t datalen,
2294 u32_t seqno_be /* already in network byte order */)
2295 {
2296 struct pbuf *p;
2297
2298 LWIP_ASSERT("tcp_output_alloc_header: invalid pcb", pcb != NULL);
2299
2300 p = tcp_output_alloc_header_common(pcb->rcv_nxt, optlen, datalen,
2301 seqno_be, pcb->local_port, pcb->remote_port, TCP_ACK,
2302 TCPWND_MIN16(RCV_WND_SCALE(pcb, pcb->rcv_ann_wnd)));
2303 if (p != NULL) {
2304 /* If we're sending a packet, update the announced right window edge */
2305 pcb->rcv_ann_right_edge = pcb->rcv_nxt + pcb->rcv_ann_wnd;
2306 }
2307 return p;
2308 }
2309
2310 /* Fill in options for control segments */
2311 static void
tcp_output_fill_options(struct tcp_pcb * pcb,struct pbuf * p,u8_t optflags,u8_t num_sacks)2312 tcp_output_fill_options(struct tcp_pcb *pcb, struct pbuf *p, u8_t optflags, u8_t num_sacks)
2313 {
2314 struct tcp_hdr *tcphdr;
2315 u32_t *opts;
2316
2317 LWIP_ASSERT("tcp_output_fill_options: invalid pbuf", p != NULL);
2318
2319 tcphdr = (struct tcp_hdr *)p->payload;
2320 opts = (u32_t *)(void *)(tcphdr + 1);
2321
2322 /* NB. MSS and window scale options are only sent on SYNs, so ignore them here */
2323
2324 #if LWIP_TCP_TIMESTAMPS
2325 if (optflags & TF_SEG_OPTS_TS) {
2326 tcp_build_timestamp_option(pcb, opts);
2327 opts += 3;
2328 }
2329 #endif
2330
2331 #if LWIP_SACK
2332 if (optflags & TF_SEG_OPTS_SACK) {
2333 tcp_build_sack_option(pcb, num_sacks, opts);
2334 /* Zero can be given in the numerator when cnt is 0 */
2335 opts += (LWIP_TCP_SACK_OPT_LENGTH(num_sacks)) / sizeof(u32_t); // Number of words
2336 }
2337 #endif
2338
2339 LWIP_UNUSED_ARG(num_sacks);
2340
2341 #ifdef LWIP_HOOK_TCP_OUT_ADD_TCPOPTS
2342 opts = LWIP_HOOK_TCP_OUT_ADD_TCPOPTS(p, tcphdr, pcb, opts);
2343 #endif
2344
2345 LWIP_UNUSED_ARG(pcb);
2346 LWIP_UNUSED_ARG(optflags); /* for LWIP_NOASSERT */
2347 LWIP_UNUSED_ARG(opts); /* for LWIP_NOASSERT */
2348 }
2349
2350 /** Output a control segment pbuf to IP.
2351 *
2352 * Called from tcp_rst, tcp_send_empty_ack, tcp_keepalive and tcp_zero_window_probe,
2353 * this function combines selecting a netif for transmission, generating the tcp
2354 * header checksum and calling ip_output_if while handling netif hints and stats.
2355 */
2356 static err_t
tcp_output_control_segment(const struct tcp_pcb * pcb,struct pbuf * p,const ip_addr_t * src,const ip_addr_t * dst)2357 tcp_output_control_segment(const struct tcp_pcb *pcb, struct pbuf *p,
2358 const ip_addr_t *src, const ip_addr_t *dst)
2359 {
2360 err_t err;
2361 struct netif *netif;
2362
2363 LWIP_ASSERT("tcp_output_control_segment: invalid pbuf", p != NULL);
2364
2365 netif = tcp_route(pcb, src, dst);
2366 if (netif == NULL) {
2367 err = ERR_RTE;
2368 } else {
2369 u8_t ttl, tos;
2370 #if CHECKSUM_GEN_TCP
2371 IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) {
2372 struct tcp_hdr *tcphdr = (struct tcp_hdr *)p->payload;
2373 tcphdr->chksum = ip_chksum_pseudo(p, IP_PROTO_TCP, p->tot_len,
2374 src, dst);
2375 }
2376 #endif
2377 if (pcb != NULL) {
2378 NETIF_SET_HINTS(netif, LWIP_CONST_CAST(struct netif_hint*, &(pcb->netif_hints)));
2379 ttl = pcb->ttl;
2380 tos = pcb->tos;
2381 #if LWIP_SO_DONTROUTE
2382 if (ip_get_option((struct ip_pcb *)pcb, SOF_DONTROUTE)) {
2383 p->flags |= PBUF_FLAG_IS_LINK_ONLY;
2384 }
2385 #endif /* LWIP_SO_DONTROUTE */
2386 } else {
2387 /* Send output with hardcoded TTL/HL since we have no access to the pcb */
2388 ttl = TCP_TTL;
2389 tos = 0;
2390 }
2391 TCP_STATS_INC(tcp.xmit);
2392
2393 #if LWIP_SO_PRIORITY
2394 p->priority = LWIP_PKT_PRIORITY_MIN;
2395 if (pcb != NULL) {
2396 p->priority = pcb->priority;
2397 }
2398 #endif /* LWIP_SO_PRIORITY */
2399
2400 err = ip_output_if(p, src, dst, ttl, tos, IP_PROTO_TCP, netif);
2401 NETIF_RESET_HINTS(netif);
2402 }
2403 pbuf_free(p);
2404 return err;
2405 }
2406
2407 /**
2408 * Send a TCP RESET packet (empty segment with RST flag set) either to
2409 * abort a connection or to show that there is no matching local connection
2410 * for a received segment.
2411 *
2412 * Called by tcp_abort() (to abort a local connection), tcp_input() (if no
2413 * matching local pcb was found), tcp_listen_input() (if incoming segment
2414 * has ACK flag set) and tcp_process() (received segment in the wrong state)
2415 *
2416 * Since a RST segment is in most cases not sent for an active connection,
2417 * tcp_rst() has a number of arguments that are taken from a tcp_pcb for
2418 * most other segment output functions.
2419 *
2420 * @param pcb TCP pcb (may be NULL if no pcb is available)
2421 * @param seqno the sequence number to use for the outgoing segment
2422 * @param ackno the acknowledge number to use for the outgoing segment
2423 * @param local_ip the local IP address to send the segment from
2424 * @param remote_ip the remote IP address to send the segment to
2425 * @param local_port the local TCP port to send the segment from
2426 * @param remote_port the remote TCP port to send the segment to
2427 */
2428 void
tcp_rst(struct tcp_pcb * pcb,u32_t seqno,u32_t ackno,const ip_addr_t * local_ip,const ip_addr_t * remote_ip,u16_t local_port,u16_t remote_port)2429 tcp_rst(struct tcp_pcb *pcb, u32_t seqno, u32_t ackno,
2430 const ip_addr_t *local_ip, const ip_addr_t *remote_ip,
2431 u16_t local_port, u16_t remote_port)
2432 {
2433 struct pbuf *p;
2434 u16_t wnd;
2435 u8_t optlen;
2436
2437 LWIP_ASSERT("tcp_rst: invalid local_ip", local_ip != NULL);
2438 LWIP_ASSERT("tcp_rst: invalid remote_ip", remote_ip != NULL);
2439
2440 optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(0, pcb);
2441
2442 #if LWIP_WND_SCALE
2443 wnd = PP_HTONS(((TCP_WND >> TCP_RCV_SCALE) & 0xFFFF));
2444 #else
2445 wnd = PP_HTONS(TCP_WND);
2446 #endif
2447
2448 p = tcp_output_alloc_header_common(ackno, optlen, 0, lwip_htonl(seqno), local_port,
2449 remote_port, TCP_RST | TCP_ACK, wnd);
2450 if (p == NULL) {
2451 LWIP_DEBUGF(TCP_DEBUG, ("tcp_rst: could not allocate memory for pbuf\n"));
2452 return;
2453 }
2454 tcp_output_fill_options(pcb, p, 0, 0);
2455
2456 MIB2_STATS_INC(mib2.tcpoutrsts);
2457
2458 tcp_output_control_segment(pcb, p, local_ip, remote_ip);
2459 LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_rst: seqno %"U32_F" ackno %"U32_F".\n", seqno, ackno));
2460 }
2461
2462 /**
2463 * Send an ACK without data.
2464 *
2465 * @param pcb Protocol control block for the TCP connection to send the ACK
2466 */
2467 err_t
tcp_send_empty_ack(struct tcp_pcb * pcb)2468 tcp_send_empty_ack(struct tcp_pcb *pcb)
2469 {
2470 err_t err;
2471 struct pbuf *p;
2472 u8_t optlen, optflags = 0;
2473 u8_t num_sacks = 0;
2474
2475 LWIP_ASSERT("tcp_send_empty_ack: invalid pcb", pcb != NULL);
2476
2477 #if DRIVER_STATUS_CHECK
2478 if (pcb->drv_status == DRV_NOT_READY) {
2479 return ERR_RTE;
2480 }
2481 #endif
2482
2483 #if LWIP_TCP_TIMESTAMPS
2484 if (pcb->flags & TF_TIMESTAMP) {
2485 optflags = TF_SEG_OPTS_TS;
2486 }
2487 #endif
2488 optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb);
2489 #if LWIP_SACK
2490 if (pcb->flags & TF_SACK) {
2491 num_sacks = tcp_get_sack_block_count_for_send(pcb, optlen);
2492 optlen = (u8_t)(optlen + LWIP_TCP_SACK_OPT_LENGTH(num_sacks));
2493 optflags |= TF_SEG_OPTS_SACK;
2494 }
2495 #endif
2496
2497 p = tcp_output_alloc_header(pcb, optlen, 0, lwip_htonl(pcb->snd_nxt));
2498 if (p == NULL) {
2499 /* let tcp_fasttmr retry sending this ACK */
2500 tcp_set_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
2501 LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output: (ACK) could not allocate pbuf\n"));
2502 return ERR_BUF;
2503 }
2504 tcp_output_fill_options(pcb, p, optflags, num_sacks);
2505
2506 #if LWIP_TCP_TIMESTAMPS
2507 pcb->ts_lastacksent = pcb->rcv_nxt;
2508 #endif
2509
2510 LWIP_DEBUGF(TCP_OUTPUT_DEBUG,
2511 ("tcp_output: sending ACK for %"U32_F"\n", pcb->rcv_nxt));
2512 err = tcp_output_control_segment(pcb, p, &pcb->local_ip, &pcb->remote_ip);
2513 if (err != ERR_OK) {
2514 /* let tcp_fasttmr retry sending this ACK */
2515 tcp_set_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
2516 } else {
2517 /* remove ACK flags from the PCB, as we sent an empty ACK now */
2518 tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
2519 }
2520
2521 return err;
2522 }
2523
2524 /**
2525 * Send keepalive packets to keep a connection active although
2526 * no data is sent over it.
2527 *
2528 * Called by tcp_slowtmr()
2529 *
2530 * @param pcb the tcp_pcb for which to send a keepalive packet
2531 */
2532 err_t
tcp_keepalive(struct tcp_pcb * pcb)2533 tcp_keepalive(struct tcp_pcb *pcb)
2534 {
2535 err_t err;
2536 struct pbuf *p;
2537 u8_t optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(0, pcb);
2538
2539 LWIP_ASSERT("tcp_keepalive: invalid pcb", pcb != NULL);
2540
2541 LWIP_DEBUGF(TCP_DEBUG, ("tcp_keepalive: sending KEEPALIVE probe to "));
2542 ip_addr_debug_print_val(TCP_DEBUG, pcb->remote_ip);
2543 LWIP_DEBUGF(TCP_DEBUG, ("\n"));
2544
2545 LWIP_DEBUGF(TCP_DEBUG, ("tcp_keepalive: tcp_ticks %"U32_F" pcb->tmr %"U32_F" pcb->keep_cnt_sent %"U16_F"\n",
2546 tcp_ticks, pcb->tmr, (u16_t)pcb->keep_cnt_sent));
2547
2548 p = tcp_output_alloc_header(pcb, optlen, 0, lwip_htonl(pcb->snd_nxt - 1));
2549 if (p == NULL) {
2550 LWIP_DEBUGF(TCP_DEBUG,
2551 ("tcp_keepalive: could not allocate memory for pbuf\n"));
2552 return ERR_MEM;
2553 }
2554 tcp_output_fill_options(pcb, p, 0, 0);
2555 err = tcp_output_control_segment(pcb, p, &pcb->local_ip, &pcb->remote_ip);
2556
2557 LWIP_DEBUGF(TCP_DEBUG, ("tcp_keepalive: seqno %"U32_F" ackno %"U32_F" err %d.\n",
2558 pcb->snd_nxt - 1, pcb->rcv_nxt, (int)err));
2559 return err;
2560 }
2561
2562 /**
2563 * Send persist timer zero-window probes to keep a connection active
2564 * when a window update is lost.
2565 *
2566 * Called by tcp_slowtmr()
2567 *
2568 * @param pcb the tcp_pcb for which to send a zero-window probe packet
2569 */
2570 err_t
tcp_zero_window_probe(struct tcp_pcb * pcb)2571 tcp_zero_window_probe(struct tcp_pcb *pcb)
2572 {
2573 err_t err;
2574 struct pbuf *p;
2575 struct tcp_hdr *tcphdr;
2576 struct tcp_seg *seg;
2577 u8_t is_fin;
2578 u32_t seq;
2579 u8_t optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(0, pcb);
2580
2581 LWIP_ASSERT("tcp_zero_window_probe: invalid pcb", pcb != NULL);
2582
2583 LWIP_DEBUGF(TCP_DEBUG, ("tcp_zero_window_probe: sending ZERO WINDOW probe to "));
2584 ip_addr_debug_print_val(TCP_DEBUG, pcb->remote_ip);
2585 LWIP_DEBUGF(TCP_DEBUG, ("\n"));
2586
2587 LWIP_DEBUGF(TCP_DEBUG,
2588 ("tcp_zero_window_probe: tcp_ticks %"U32_F
2589 " pcb->tmr %"U32_F" pcb->keep_cnt_sent %"U16_F"\n",
2590 tcp_ticks, pcb->tmr, (u16_t)pcb->keep_cnt_sent));
2591
2592 /* Only consider unsent, persist timer should be off when there is data in-flight */
2593 seg = pcb->unsent;
2594 if (seg == NULL) {
2595 /* Not expected, persist timer should be off when the send buffer is empty */
2596 return ERR_OK;
2597 }
2598
2599 /* increment probe count. NOTE: we record probe even if it fails
2600 to actually transmit due to an error. This ensures memory exhaustion/
2601 routing problem doesn't leave a zero-window pcb as an indefinite zombie.
2602 RTO mechanism has similar behavior, see pcb->nrtx */
2603 if (pcb->persist_probe < 0xFF) {
2604 ++pcb->persist_probe;
2605 }
2606
2607 is_fin = ((TCPH_FLAGS(seg->tcphdr) & TCP_FIN) != 0) && (seg->len == 0);
2608 seq = is_fin ? seg->tcphdr->seqno : lwip_htonl(pcb->snd_nxt - 1);
2609 /* Change the behaviour of zero window probe from send 1 byte payload to 0 byte payload probe.
2610 The implementation is RFC non compliance(RFC793 section 3.7), but the behaviour is aligned with Linux
2611 It will help future code maintance. */
2612 /* we want to send one seqno: either FIN or data (no options) */
2613 p = tcp_output_alloc_header(pcb, optlen, 0, seq);
2614 if (p == NULL) {
2615 LWIP_DEBUGF(TCP_DEBUG, ("tcp_zero_window_probe: no memory for pbuf\n"));
2616 return ERR_MEM;
2617 }
2618 tcphdr = (struct tcp_hdr *)p->payload;
2619
2620 if (is_fin) {
2621 /* FIN segment, no data */
2622 TCPH_FLAGS_SET(tcphdr, TCP_ACK | TCP_FIN);
2623 }
2624
2625 tcp_output_fill_options(pcb, p, 0, 0);
2626
2627 err = tcp_output_control_segment(pcb, p, &pcb->local_ip, &pcb->remote_ip);
2628
2629 LWIP_DEBUGF(TCP_DEBUG, ("tcp_zero_window_probe: seqno %"U32_F
2630 " ackno %"U32_F" err %d.\n",
2631 pcb->snd_nxt - 1, pcb->rcv_nxt, (int)err));
2632 return err;
2633 }
2634
2635 #if DRIVER_STATUS_CHECK
2636 #if LWIP_SACK
tcp_search_and_flush_sack_on_wake_queue(struct tcp_pcb * pcb,u32_t sack_type)2637 static void tcp_search_and_flush_sack_on_wake_queue(struct tcp_pcb *pcb, u32_t sack_type)
2638 {
2639 struct tcp_seg *sack_seg = pcb->unacked;
2640 while ((sack_seg != NULL) && (TCP_SEQ_LT(ntohl(sack_seg->tcphdr->seqno), pcb->high_sacked))) {
2641 if (sack_seg->seg_type == sack_type) {
2642 LWIP_DEBUGF(DRV_STS_DEBUG, ("Retransmitting unsacked segment seq num(%u) of type %d\n",
2643 ntohl(sack_seg->tcphdr->seqno), sack_type));
2644 /* flush packet and clear state */
2645 (void)tcp_output_segment(sack_seg, pcb, NULL);
2646 sack_seg->seg_type = SEG_TYPE_NONE;
2647 }
2648 sack_seg = sack_seg->next;
2649 }
2650
2651 return;
2652 }
2653 #endif
2654
tcp_flush_pcb_on_wake_queue(struct tcp_pcb * pcb,u8_t status)2655 void tcp_flush_pcb_on_wake_queue(struct tcp_pcb *pcb, u8_t status)
2656 {
2657 /* update driver status for that PCB */
2658 pcb->drv_status = status;
2659
2660 LWIP_DEBUGF(DRV_STS_DEBUG, ("Flushing PCB Local Port: %u) on Wake Queue\n", pcb->local_port));
2661 ip_addr_debug_print_val(DRV_STS_DEBUG, pcb->local_ip);
2662
2663 if (pcb->flags & TF_RST_ON_DRV_WAKE) {
2664 LWIP_DEBUGF(DRV_STS_DEBUG, ("Sending Reset on Wake Queue\n"));
2665 tcp_rst(pcb, pcb->snd_nxt, pcb->rcv_nxt, &pcb->local_ip, &pcb->remote_ip,
2666 pcb->local_port, pcb->remote_port);
2667 return;
2668 }
2669
2670 /* Send empty ACK if no data packets */
2671 if ((pcb->unsent == NULL) && (pcb->flags & (TF_ACK_NOW | TF_ACK_DELAY))) {
2672 LWIP_DEBUGF(DRV_STS_DEBUG, ("Sending Empty ACK on Wake Queue\n"));
2673 (void)tcp_send_empty_ack(pcb);
2674 }
2675
2676 /* If Data Piggy back is disabled, then send SACK Options in empty ACK */
2677 #if !LWIP_SACK_DATA_SEG_PIGGYBACK
2678 if (pcb->ooseq != NULL) {
2679 LWIP_DEBUGF(DRV_STS_DEBUG, ("Sending Empty ACK on Wake Queue\n"));
2680 (void)tcp_send_empty_ack(pcb);
2681 }
2682 #endif
2683
2684 #if LWIP_SACK
2685 /* flush all unsacked packets as per loss recovery algorithm */
2686 if (pcb->unacked) {
2687 u32_t next_type = FAST_RETX_SEG;
2688 LWIP_DEBUGF(DRV_STS_DEBUG, ("Going to flush UnackedQueue\n"));
2689 /* seach for all segments one by */
2690 do {
2691 tcp_search_and_flush_sack_on_wake_queue(pcb, next_type);
2692 switch (next_type) {
2693 case FAST_RETX_SEG:
2694 next_type = UNSACKED_AND_LOST_SEG;
2695 break;
2696 case UNSACKED_AND_LOST_SEG:
2697 next_type = UNSENT_SEG;
2698 /* flush tcp queue */
2699 (void)tcp_output(pcb);
2700 /* fall-through */
2701 case UNSENT_SEG:
2702 next_type = UNSACKED_SEG;
2703 break;
2704 case UNSACKED_SEG:
2705 next_type = RESCUE_RX_SEG;
2706 break;
2707 case RESCUE_RX_SEG:
2708 next_type = SEG_TYPE_NONE;
2709 /* fall-through */
2710 default:
2711 break;
2712 }
2713
2714 /* break loop if seg_type is none */
2715 } while (next_type != SEG_TYPE_NONE);
2716 LWIP_DEBUGF(DRV_STS_DEBUG, ("UnackedQueue flushed\n"));
2717 } else {
2718 LWIP_DEBUGF(DRV_STS_DEBUG, ("Going to flush TCP unsent\n"));
2719 /* flush tcp queue */
2720 (void)tcp_output(pcb);
2721 LWIP_DEBUGF(DRV_STS_DEBUG, ("Flushed TCP unsent\n"));
2722 }
2723 #else
2724 LWIP_DEBUGF(DRV_STS_DEBUG, ("Going to flush TCP unsent\n"));
2725 /* Flush unsent segments */
2726 (void)tcp_output(pcb);
2727 LWIP_DEBUGF(DRV_STS_DEBUG, ("Flushed TCP unsent\n"));
2728 #endif
2729
2730 return;
2731 }
2732 #endif
2733
2734 #endif /* LWIP_TCP */
2735