• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1From af8ac36acb103aa27b498dafa0ae8ba4332faac8 Mon Sep 17 00:00:00 2001
2From: wu-changsheng <wuchangsheng2@huawei.com>
3Date: Sat, 3 Dec 2022 21:38:09 +0800
4Subject: [PATCH] add-tso
5
6---
7 src/core/ipv4/etharp.c   |  17 +++-
8 src/core/ipv4/ip4.c      |  10 ++-
9 src/core/tcp.c           |   6 ++
10 src/core/tcp_out.c       | 178 +++++++++++++++++++++++++++++++++++++--
11 src/include/dpdk_cksum.h |   2 +-
12 src/include/lwip/pbuf.h  |   8 +-
13 src/include/lwipopts.h   |   4 +
14 7 files changed, 211 insertions(+), 14 deletions(-)
15
16diff --git a/src/core/ipv4/etharp.c b/src/core/ipv4/etharp.c
17index effb7db..f1903e4 100644
18--- a/src/core/ipv4/etharp.c
19+++ b/src/core/ipv4/etharp.c
20@@ -482,6 +482,13 @@ etharp_update_arp_entry(struct netif *netif, const ip4_addr_t *ipaddr, struct et
21     struct pbuf *p = arp_table[i].q;
22     arp_table[i].q = NULL;
23 #endif /* ARP_QUEUEING */
24+#if USE_LIBOS
25+    struct pbuf *tmp = p->next;
26+    while (tmp != NULL) {
27+       tmp->ref--;
28+       tmp = tmp->next;
29+    }
30+#endif
31     /* send the queued IP packet */
32     ethernet_output(netif, p, (struct eth_addr *)(netif->hwaddr), ethaddr, ETHTYPE_IP);
33     /* free the queued IP packet */
34@@ -1027,7 +1034,15 @@ etharp_query(struct netif *netif, const ip4_addr_t *ipaddr, struct pbuf *q)
35     } else {
36       /* referencing the old pbuf is enough */
37       p = q;
38-      pbuf_ref(p);
39+#if USE_LIBOS
40+      struct pbuf *tmp = p;
41+      while (tmp != NULL) {
42+        pbuf_ref(tmp);
43+        tmp = tmp->next;
44+      }
45+#else
46+       pbuf_ref(p);
47+#endif
48     }
49     /* packet could be taken over? */
50     if (p != NULL) {
51diff --git a/src/core/ipv4/ip4.c b/src/core/ipv4/ip4.c
52index 1334cdc..d823491 100644
53--- a/src/core/ipv4/ip4.c
54+++ b/src/core/ipv4/ip4.c
55@@ -1034,9 +1034,15 @@ ip4_output_if_opt_src(struct pbuf *p, const ip4_addr_t *src, const ip4_addr_t *d
56 #endif /* ENABLE_LOOPBACK */
57 #if IP_FRAG
58   /* don't fragment if interface has mtu set to 0 [loopif] */
59-  if (netif->mtu && (p->tot_len > netif->mtu)) {
60-    return ip4_frag(p, netif, dest);
61+#if USE_LIBOS
62+  if (!(get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO)) {
63+#endif
64+    if (netif->mtu && (p->tot_len > netif->mtu)) {
65+      return ip4_frag(p, netif, dest);
66+    }
67+#if USE_LIBOS
68   }
69+#endif
70 #endif /* IP_FRAG */
71
72   LWIP_DEBUGF(IP_DEBUG, ("ip4_output_if: call netif->output()\n"));
73diff --git a/src/core/tcp.c b/src/core/tcp.c
74index 7c18408..51ada38 100644
75--- a/src/core/tcp.c
76+++ b/src/core/tcp.c
77@@ -1756,7 +1756,9 @@ tcp_seg_free(struct tcp_seg *seg)
78       seg->p = NULL;
79 #endif /* TCP_DEBUG */
80     }
81+#if !USE_LIBOS
82     memp_free(MEMP_TCP_SEG, seg);
83+#endif
84   }
85 }
86
87@@ -1792,10 +1794,14 @@ tcp_seg_copy(struct tcp_seg *seg)
88
89   LWIP_ASSERT("tcp_seg_copy: invalid seg", seg != NULL);
90
91+#if USE_LIBOS
92+  cseg = (struct tcp_seg *)((uint8_t *)seg->p + sizeof(struct pbuf_custom));
93+#else
94   cseg = (struct tcp_seg *)memp_malloc(MEMP_TCP_SEG);
95   if (cseg == NULL) {
96     return NULL;
97   }
98+#endif
99   SMEMCPY((u8_t *)cseg, (const u8_t *)seg, sizeof(struct tcp_seg));
100   pbuf_ref(cseg->p);
101   return cseg;
102diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c
103index 2834ba3..ee6f40b 100644
104--- a/src/core/tcp_out.c
105+++ b/src/core/tcp_out.c
106@@ -161,6 +161,40 @@ tcp_route(const struct tcp_pcb *pcb, const ip_addr_t *src, const ip_addr_t *dst)
107  * The TCP header is filled in except ackno and wnd.
108  * p is freed on failure.
109  */
110+#if USE_LIBOS
111+void tcp_init_segment(struct tcp_seg *seg, const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags,
112+  u32_t seqno, u8_t optflags)
113+{
114+  u8_t optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb);
115+
116+  seg->flags = optflags;
117+  seg->next = NULL;
118+  seg->p = p;
119+  seg->len = p->tot_len - optlen;
120+
121+  /* build TCP header */
122+  pbuf_add_header(p, TCP_HLEN);
123+  seg->tcphdr = (struct tcp_hdr *)seg->p->payload;
124+  seg->tcphdr->src = lwip_htons(pcb->local_port);
125+  seg->tcphdr->dest = lwip_htons(pcb->remote_port);
126+  seg->tcphdr->seqno = lwip_htonl(seqno);
127+
128+  TCPH_HDRLEN_FLAGS_SET(seg->tcphdr, (TCP_HLEN + optlen) / 4, hdrflags);
129+  seg->tcphdr->urgp = 0;
130+}
131+
132+static struct tcp_seg *
133+tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags)
134+{
135+  struct tcp_seg *seg;
136+
137+  seg = (struct tcp_seg *)((uint8_t *)p + sizeof(struct pbuf_custom));
138+
139+  tcp_init_segment(seg, pcb, p, hdrflags, seqno, optflags);
140+
141+  return seg;
142+}
143+#else
144 static struct tcp_seg *
145 tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags)
146 {
147@@ -210,6 +244,7 @@ tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32
148   seg->tcphdr->urgp = 0;
149   return seg;
150 }
151+#endif
152
153 /**
154  * Allocate a PBUF_RAM pbuf, perhaps with extra space at the end.
155@@ -1272,6 +1307,60 @@ tcp_build_wnd_scale_option(u32_t *opts)
156 }
157 #endif
158
159+#if USE_LIBOS
160+static struct tcp_seg *tcp_output_over(struct tcp_pcb *pcb, struct tcp_seg *seg, struct tcp_seg *useg)
161+{
162+  if (TCP_TCPLEN(seg) > 0) {
163+    seg->next = NULL;
164+    if (useg == NULL) {
165+      pcb->unacked = seg;
166+      useg = seg;
167+    } else {
168+      if (TCP_SEQ_LT(lwip_ntohl(seg->tcphdr->seqno), lwip_ntohl(useg->tcphdr->seqno))) {
169+        /* add segment to before tail of unacked list, keeping the list sorted */
170+        struct tcp_seg **cur_seg = &(pcb->unacked);
171+        while (*cur_seg &&
172+              TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) {
173+          cur_seg = &((*cur_seg)->next );
174+        }
175+        seg->next = (*cur_seg);
176+        (*cur_seg) = seg;
177+      } else {
178+        /* add segment to tail of unacked list */
179+        useg->next = seg;
180+        useg = seg;
181+      }
182+    }
183+  } else {
184+    tcp_seg_free(seg);
185+  }
186+
187+  return useg;
188+}
189+static err_t tcp_output_seg(struct tcp_pcb *pcb, struct tcp_seg *seg, struct netif *netif, u32_t snd_nxt)
190+{
191+  if (pcb->state != SYN_SENT) {
192+    TCPH_SET_FLAG(seg->tcphdr, TCP_ACK);
193+  }
194+
195+  err_t err = tcp_output_segment(seg, pcb, netif);
196+  if (err != ERR_OK) {
197+    /* segment could not be sent, for whatever reason */
198+    tcp_set_flags(pcb, TF_NAGLEMEMERR);
199+    return err;
200+  }
201+
202+  if (pcb->state != SYN_SENT) {
203+    tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
204+  }
205+
206+  if (TCP_SEQ_LT(pcb->snd_nxt, snd_nxt)) {
207+      pcb->snd_nxt = snd_nxt;
208+  }
209+
210+  return ERR_OK;
211+}
212+#endif
213 /**
214  * @ingroup tcp_raw
215  * Find out what we can send and send it
216@@ -1376,16 +1465,88 @@ tcp_output(struct tcp_pcb *pcb)
217     for (; useg->next != NULL; useg = useg->next);
218   }
219   /* data available and window allows it to be sent? */
220+
221 #if USE_LIBOS
222-  /* avoid send cose too much time, limit send pkts num max 10 */
223-  uint16_t send_pkt = 0;
224-  while (seg != NULL && send_pkt < 10 &&
225-         lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) {
226-    send_pkt++;
227-#else
228+  if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO) {
229+    while(seg) {
230+      /**
231+       * 1)遍历unsent队列,找到所有的待发送seg. 将seg的buf串起来
232+       * 2) 生成新的seg, 调用tcp_output_segment, 新的seg释放掉
233+       * 3) 若成功,则更新snd_nxt, unacked队列,和unsent队列。
234+       */
235+      struct tcp_seg *start_seg = seg;
236+      struct pbuf *first_pbuf = NULL;
237+      struct pbuf *pre_pbuf = NULL;
238+      u8_t pbuf_chain_len = 0;
239+      u32_t next_seqno = lwip_ntohl(seg->tcphdr->seqno);
240+      while (seg != NULL && pbuf_chain_len < MAX_PBUF_CHAIN_LEN) {
241+        u32_t seg_seqno = lwip_ntohl(seg->tcphdr->seqno);
242+        if (seg_seqno - pcb->lastack + seg->len > wnd) {
243+          if (first_pbuf)
244+            break;
245+          else
246+            goto output_done;
247+        }
248+
249+        if ((tcp_do_output_nagle(pcb) == 0) && ((pcb->flags & (TF_NAGLEMEMERR | TF_FIN)) == 0)) {
250+          if (first_pbuf)
251+            break;
252+          else
253+            goto output_done;
254+        }
255+
256+        if (seg->len < TCP_MSS || next_seqno != seg_seqno || pbuf_chain_len >= MAX_PBUF_CHAIN_LEN) {
257+          break;
258+        }
259+        if (first_pbuf == NULL && (seg->next == NULL || seg->next->len < TCP_MSS)) {
260+          break;
261+        }
262+
263+        pbuf_remove_header(seg->p, seg->p->tot_len - seg->len);
264+        if (first_pbuf == NULL) {
265+          first_pbuf = seg->p;
266+        } else {
267+          first_pbuf->tot_len += seg->p->len;
268+          pre_pbuf->next = seg->p;
269+        }
270+
271+        pre_pbuf = seg->p;
272+        next_seqno = seg_seqno + TCP_TCPLEN(seg);
273+        seg = seg->next;
274+        pcb->unsent = seg;
275+        pbuf_chain_len++;
276+      }
277+
278+      if (first_pbuf == NULL) {
279+        err = tcp_output_seg(pcb, seg, netif, next_seqno + seg->len);
280+        if (err != ERR_OK)
281+          return err;
282+        pcb->unsent = seg->next;
283+        useg = tcp_output_over(pcb, seg, useg);
284+        seg = pcb->unsent;
285+        continue;
286+      }
287+
288+      struct tcp_seg new_seg;
289+      tcp_init_segment(&new_seg, pcb, first_pbuf, 0, lwip_ntohl(start_seg->tcphdr->seqno), 0);
290+
291+      err = tcp_output_seg(pcb, &new_seg, netif, next_seqno);
292+
293+      for (u32_t i = 0; i < pbuf_chain_len; i++) {
294+        struct tcp_seg *next_seg = start_seg->next;
295+        start_seg->p->next = NULL;
296+        useg = tcp_output_over(pcb, start_seg, useg);
297+        start_seg = next_seg;
298+      }
299+
300+      pbuf_remove_header(new_seg.p, new_seg.p->tot_len - new_seg.len - TCPH_HDRLEN_BYTES(new_seg.tcphdr));
301+      new_seg.p->tot_len = new_seg.p->len;
302+    }
303+  } else
304+#endif
305+{
306   while (seg != NULL &&
307          lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) {
308-#endif
309     LWIP_ASSERT("RST not expected here!",
310                 (TCPH_FLAGS(seg->tcphdr) & TCP_RST) == 0);
311     /* Stop sending if the nagle algorithm would prevent it
312@@ -1462,6 +1623,7 @@ tcp_output(struct tcp_pcb *pcb)
313     }
314     seg = pcb->unsent;
315   }
316+}
317 #if TCP_OVERSIZE
318   if (pcb->unsent == NULL) {
319     /* last unsent has been removed, reset unsent_oversize */
320@@ -1627,7 +1789,7 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif
321   IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) {
322 #if CHECKSUM_GEN_TCP_HW
323   if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_CKSUM) {
324-    tcph_cksum_set(seg->p, TCP_HLEN);
325+    tcph_cksum_set(seg->p, TCPH_HDRLEN_BYTES(seg->tcphdr));
326     seg->tcphdr->chksum = ip_chksum_pseudo_offload(IP_PROTO_TCP,seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip);
327   } else {
328 #if TCP_CHECKSUM_ON_COPY
329diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h
330index e57be4d..83c9c38 100644
331--- a/src/include/dpdk_cksum.h
332+++ b/src/include/dpdk_cksum.h
333@@ -78,7 +78,7 @@ static inline void iph_cksum_set(struct pbuf *p, u16_t len, bool do_ipcksum) {
334 #include <rte_ip.h>
335
336 static inline void tcph_cksum_set(struct pbuf *p, u16_t len) {
337-    (void)len;
338+    p->l4_len = len;
339     p->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
340 }
341
342diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h
343index 87cd960..ef879da 100644
344--- a/src/include/lwip/pbuf.h
345+++ b/src/include/lwip/pbuf.h
346@@ -223,10 +223,14 @@ struct pbuf {
347 #if USE_LIBOS && CHECKSUM_OFFLOAD_ALL
348   /** checksum offload ol_flags */
349   u64_t ol_flags;
350-  /** checksum offload l2_len */
351+  /* < L2 (MAC) Header Length for non-tunneling pkt. */
352   u64_t l2_len:7;
353-  /** checksum offload l3_len */
354+  /* < L3 (IP) Header Length. */
355   u64_t l3_len:9;
356+  /* < L4 (TCP/UDP) Header Length. */
357+  u64_t l4_len:8;
358+  u16_t header_off;
359+  u8_t rexmit;
360 #endif /* USE_LIBOS CHECKSUM_OFFLOAD_SWITCH */
361
362   /** In case the user needs to store data custom data on a pbuf */
363diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h
364index a5add21..7c819d0 100644
365--- a/src/include/lwipopts.h
366+++ b/src/include/lwipopts.h
367@@ -173,6 +173,10 @@
368
369 #define ARP_QUEUE_LEN 32
370
371+#define MAX_PBUF_CHAIN_LEN 40
372+
373+#define MIN_TSO_SEG_LEN 256
374+
375 /*  ---------------------------------------
376  *  -------      NIC offloads      --------
377  *  ---------------------------------------
378--
3792.23.0
380
381