1From af8ac36acb103aa27b498dafa0ae8ba4332faac8 Mon Sep 17 00:00:00 2001 2From: wu-changsheng <wuchangsheng2@huawei.com> 3Date: Sat, 3 Dec 2022 21:38:09 +0800 4Subject: [PATCH] add-tso 5 6--- 7 src/core/ipv4/etharp.c | 17 +++- 8 src/core/ipv4/ip4.c | 10 ++- 9 src/core/tcp.c | 6 ++ 10 src/core/tcp_out.c | 178 +++++++++++++++++++++++++++++++++++++-- 11 src/include/dpdk_cksum.h | 2 +- 12 src/include/lwip/pbuf.h | 8 +- 13 src/include/lwipopts.h | 4 + 14 7 files changed, 211 insertions(+), 14 deletions(-) 15 16diff --git a/src/core/ipv4/etharp.c b/src/core/ipv4/etharp.c 17index effb7db..f1903e4 100644 18--- a/src/core/ipv4/etharp.c 19+++ b/src/core/ipv4/etharp.c 20@@ -482,6 +482,13 @@ etharp_update_arp_entry(struct netif *netif, const ip4_addr_t *ipaddr, struct et 21 struct pbuf *p = arp_table[i].q; 22 arp_table[i].q = NULL; 23 #endif /* ARP_QUEUEING */ 24+#if USE_LIBOS 25+ struct pbuf *tmp = p->next; 26+ while (tmp != NULL) { 27+ tmp->ref--; 28+ tmp = tmp->next; 29+ } 30+#endif 31 /* send the queued IP packet */ 32 ethernet_output(netif, p, (struct eth_addr *)(netif->hwaddr), ethaddr, ETHTYPE_IP); 33 /* free the queued IP packet */ 34@@ -1027,7 +1034,15 @@ etharp_query(struct netif *netif, const ip4_addr_t *ipaddr, struct pbuf *q) 35 } else { 36 /* referencing the old pbuf is enough */ 37 p = q; 38- pbuf_ref(p); 39+#if USE_LIBOS 40+ struct pbuf *tmp = p; 41+ while (tmp != NULL) { 42+ pbuf_ref(tmp); 43+ tmp = tmp->next; 44+ } 45+#else 46+ pbuf_ref(p); 47+#endif 48 } 49 /* packet could be taken over? */ 50 if (p != NULL) { 51diff --git a/src/core/ipv4/ip4.c b/src/core/ipv4/ip4.c 52index 1334cdc..d823491 100644 53--- a/src/core/ipv4/ip4.c 54+++ b/src/core/ipv4/ip4.c 55@@ -1034,9 +1034,15 @@ ip4_output_if_opt_src(struct pbuf *p, const ip4_addr_t *src, const ip4_addr_t *d 56 #endif /* ENABLE_LOOPBACK */ 57 #if IP_FRAG 58 /* don't fragment if interface has mtu set to 0 [loopif] */ 59- if (netif->mtu && (p->tot_len > netif->mtu)) { 60- return ip4_frag(p, netif, dest); 61+#if USE_LIBOS 62+ if (!(get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO)) { 63+#endif 64+ if (netif->mtu && (p->tot_len > netif->mtu)) { 65+ return ip4_frag(p, netif, dest); 66+ } 67+#if USE_LIBOS 68 } 69+#endif 70 #endif /* IP_FRAG */ 71 72 LWIP_DEBUGF(IP_DEBUG, ("ip4_output_if: call netif->output()\n")); 73diff --git a/src/core/tcp.c b/src/core/tcp.c 74index 7c18408..51ada38 100644 75--- a/src/core/tcp.c 76+++ b/src/core/tcp.c 77@@ -1756,7 +1756,9 @@ tcp_seg_free(struct tcp_seg *seg) 78 seg->p = NULL; 79 #endif /* TCP_DEBUG */ 80 } 81+#if !USE_LIBOS 82 memp_free(MEMP_TCP_SEG, seg); 83+#endif 84 } 85 } 86 87@@ -1792,10 +1794,14 @@ tcp_seg_copy(struct tcp_seg *seg) 88 89 LWIP_ASSERT("tcp_seg_copy: invalid seg", seg != NULL); 90 91+#if USE_LIBOS 92+ cseg = (struct tcp_seg *)((uint8_t *)seg->p + sizeof(struct pbuf_custom)); 93+#else 94 cseg = (struct tcp_seg *)memp_malloc(MEMP_TCP_SEG); 95 if (cseg == NULL) { 96 return NULL; 97 } 98+#endif 99 SMEMCPY((u8_t *)cseg, (const u8_t *)seg, sizeof(struct tcp_seg)); 100 pbuf_ref(cseg->p); 101 return cseg; 102diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c 103index 2834ba3..ee6f40b 100644 104--- a/src/core/tcp_out.c 105+++ b/src/core/tcp_out.c 106@@ -161,6 +161,40 @@ tcp_route(const struct tcp_pcb *pcb, const ip_addr_t *src, const ip_addr_t *dst) 107 * The TCP header is filled in except ackno and wnd. 108 * p is freed on failure. 109 */ 110+#if USE_LIBOS 111+void tcp_init_segment(struct tcp_seg *seg, const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, 112+ u32_t seqno, u8_t optflags) 113+{ 114+ u8_t optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb); 115+ 116+ seg->flags = optflags; 117+ seg->next = NULL; 118+ seg->p = p; 119+ seg->len = p->tot_len - optlen; 120+ 121+ /* build TCP header */ 122+ pbuf_add_header(p, TCP_HLEN); 123+ seg->tcphdr = (struct tcp_hdr *)seg->p->payload; 124+ seg->tcphdr->src = lwip_htons(pcb->local_port); 125+ seg->tcphdr->dest = lwip_htons(pcb->remote_port); 126+ seg->tcphdr->seqno = lwip_htonl(seqno); 127+ 128+ TCPH_HDRLEN_FLAGS_SET(seg->tcphdr, (TCP_HLEN + optlen) / 4, hdrflags); 129+ seg->tcphdr->urgp = 0; 130+} 131+ 132+static struct tcp_seg * 133+tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags) 134+{ 135+ struct tcp_seg *seg; 136+ 137+ seg = (struct tcp_seg *)((uint8_t *)p + sizeof(struct pbuf_custom)); 138+ 139+ tcp_init_segment(seg, pcb, p, hdrflags, seqno, optflags); 140+ 141+ return seg; 142+} 143+#else 144 static struct tcp_seg * 145 tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags) 146 { 147@@ -210,6 +244,7 @@ tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32 148 seg->tcphdr->urgp = 0; 149 return seg; 150 } 151+#endif 152 153 /** 154 * Allocate a PBUF_RAM pbuf, perhaps with extra space at the end. 155@@ -1272,6 +1307,60 @@ tcp_build_wnd_scale_option(u32_t *opts) 156 } 157 #endif 158 159+#if USE_LIBOS 160+static struct tcp_seg *tcp_output_over(struct tcp_pcb *pcb, struct tcp_seg *seg, struct tcp_seg *useg) 161+{ 162+ if (TCP_TCPLEN(seg) > 0) { 163+ seg->next = NULL; 164+ if (useg == NULL) { 165+ pcb->unacked = seg; 166+ useg = seg; 167+ } else { 168+ if (TCP_SEQ_LT(lwip_ntohl(seg->tcphdr->seqno), lwip_ntohl(useg->tcphdr->seqno))) { 169+ /* add segment to before tail of unacked list, keeping the list sorted */ 170+ struct tcp_seg **cur_seg = &(pcb->unacked); 171+ while (*cur_seg && 172+ TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) { 173+ cur_seg = &((*cur_seg)->next ); 174+ } 175+ seg->next = (*cur_seg); 176+ (*cur_seg) = seg; 177+ } else { 178+ /* add segment to tail of unacked list */ 179+ useg->next = seg; 180+ useg = seg; 181+ } 182+ } 183+ } else { 184+ tcp_seg_free(seg); 185+ } 186+ 187+ return useg; 188+} 189+static err_t tcp_output_seg(struct tcp_pcb *pcb, struct tcp_seg *seg, struct netif *netif, u32_t snd_nxt) 190+{ 191+ if (pcb->state != SYN_SENT) { 192+ TCPH_SET_FLAG(seg->tcphdr, TCP_ACK); 193+ } 194+ 195+ err_t err = tcp_output_segment(seg, pcb, netif); 196+ if (err != ERR_OK) { 197+ /* segment could not be sent, for whatever reason */ 198+ tcp_set_flags(pcb, TF_NAGLEMEMERR); 199+ return err; 200+ } 201+ 202+ if (pcb->state != SYN_SENT) { 203+ tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW); 204+ } 205+ 206+ if (TCP_SEQ_LT(pcb->snd_nxt, snd_nxt)) { 207+ pcb->snd_nxt = snd_nxt; 208+ } 209+ 210+ return ERR_OK; 211+} 212+#endif 213 /** 214 * @ingroup tcp_raw 215 * Find out what we can send and send it 216@@ -1376,16 +1465,88 @@ tcp_output(struct tcp_pcb *pcb) 217 for (; useg->next != NULL; useg = useg->next); 218 } 219 /* data available and window allows it to be sent? */ 220+ 221 #if USE_LIBOS 222- /* avoid send cose too much time, limit send pkts num max 10 */ 223- uint16_t send_pkt = 0; 224- while (seg != NULL && send_pkt < 10 && 225- lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) { 226- send_pkt++; 227-#else 228+ if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO) { 229+ while(seg) { 230+ /** 231+ * 1)遍历unsent队列,找到所有的待发送seg. 将seg的buf串起来 232+ * 2) 生成新的seg, 调用tcp_output_segment, 新的seg释放掉 233+ * 3) 若成功,则更新snd_nxt, unacked队列,和unsent队列。 234+ */ 235+ struct tcp_seg *start_seg = seg; 236+ struct pbuf *first_pbuf = NULL; 237+ struct pbuf *pre_pbuf = NULL; 238+ u8_t pbuf_chain_len = 0; 239+ u32_t next_seqno = lwip_ntohl(seg->tcphdr->seqno); 240+ while (seg != NULL && pbuf_chain_len < MAX_PBUF_CHAIN_LEN) { 241+ u32_t seg_seqno = lwip_ntohl(seg->tcphdr->seqno); 242+ if (seg_seqno - pcb->lastack + seg->len > wnd) { 243+ if (first_pbuf) 244+ break; 245+ else 246+ goto output_done; 247+ } 248+ 249+ if ((tcp_do_output_nagle(pcb) == 0) && ((pcb->flags & (TF_NAGLEMEMERR | TF_FIN)) == 0)) { 250+ if (first_pbuf) 251+ break; 252+ else 253+ goto output_done; 254+ } 255+ 256+ if (seg->len < TCP_MSS || next_seqno != seg_seqno || pbuf_chain_len >= MAX_PBUF_CHAIN_LEN) { 257+ break; 258+ } 259+ if (first_pbuf == NULL && (seg->next == NULL || seg->next->len < TCP_MSS)) { 260+ break; 261+ } 262+ 263+ pbuf_remove_header(seg->p, seg->p->tot_len - seg->len); 264+ if (first_pbuf == NULL) { 265+ first_pbuf = seg->p; 266+ } else { 267+ first_pbuf->tot_len += seg->p->len; 268+ pre_pbuf->next = seg->p; 269+ } 270+ 271+ pre_pbuf = seg->p; 272+ next_seqno = seg_seqno + TCP_TCPLEN(seg); 273+ seg = seg->next; 274+ pcb->unsent = seg; 275+ pbuf_chain_len++; 276+ } 277+ 278+ if (first_pbuf == NULL) { 279+ err = tcp_output_seg(pcb, seg, netif, next_seqno + seg->len); 280+ if (err != ERR_OK) 281+ return err; 282+ pcb->unsent = seg->next; 283+ useg = tcp_output_over(pcb, seg, useg); 284+ seg = pcb->unsent; 285+ continue; 286+ } 287+ 288+ struct tcp_seg new_seg; 289+ tcp_init_segment(&new_seg, pcb, first_pbuf, 0, lwip_ntohl(start_seg->tcphdr->seqno), 0); 290+ 291+ err = tcp_output_seg(pcb, &new_seg, netif, next_seqno); 292+ 293+ for (u32_t i = 0; i < pbuf_chain_len; i++) { 294+ struct tcp_seg *next_seg = start_seg->next; 295+ start_seg->p->next = NULL; 296+ useg = tcp_output_over(pcb, start_seg, useg); 297+ start_seg = next_seg; 298+ } 299+ 300+ pbuf_remove_header(new_seg.p, new_seg.p->tot_len - new_seg.len - TCPH_HDRLEN_BYTES(new_seg.tcphdr)); 301+ new_seg.p->tot_len = new_seg.p->len; 302+ } 303+ } else 304+#endif 305+{ 306 while (seg != NULL && 307 lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) { 308-#endif 309 LWIP_ASSERT("RST not expected here!", 310 (TCPH_FLAGS(seg->tcphdr) & TCP_RST) == 0); 311 /* Stop sending if the nagle algorithm would prevent it 312@@ -1462,6 +1623,7 @@ tcp_output(struct tcp_pcb *pcb) 313 } 314 seg = pcb->unsent; 315 } 316+} 317 #if TCP_OVERSIZE 318 if (pcb->unsent == NULL) { 319 /* last unsent has been removed, reset unsent_oversize */ 320@@ -1627,7 +1789,7 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif 321 IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) { 322 #if CHECKSUM_GEN_TCP_HW 323 if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_CKSUM) { 324- tcph_cksum_set(seg->p, TCP_HLEN); 325+ tcph_cksum_set(seg->p, TCPH_HDRLEN_BYTES(seg->tcphdr)); 326 seg->tcphdr->chksum = ip_chksum_pseudo_offload(IP_PROTO_TCP,seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip); 327 } else { 328 #if TCP_CHECKSUM_ON_COPY 329diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h 330index e57be4d..83c9c38 100644 331--- a/src/include/dpdk_cksum.h 332+++ b/src/include/dpdk_cksum.h 333@@ -78,7 +78,7 @@ static inline void iph_cksum_set(struct pbuf *p, u16_t len, bool do_ipcksum) { 334 #include <rte_ip.h> 335 336 static inline void tcph_cksum_set(struct pbuf *p, u16_t len) { 337- (void)len; 338+ p->l4_len = len; 339 p->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; 340 } 341 342diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h 343index 87cd960..ef879da 100644 344--- a/src/include/lwip/pbuf.h 345+++ b/src/include/lwip/pbuf.h 346@@ -223,10 +223,14 @@ struct pbuf { 347 #if USE_LIBOS && CHECKSUM_OFFLOAD_ALL 348 /** checksum offload ol_flags */ 349 u64_t ol_flags; 350- /** checksum offload l2_len */ 351+ /* < L2 (MAC) Header Length for non-tunneling pkt. */ 352 u64_t l2_len:7; 353- /** checksum offload l3_len */ 354+ /* < L3 (IP) Header Length. */ 355 u64_t l3_len:9; 356+ /* < L4 (TCP/UDP) Header Length. */ 357+ u64_t l4_len:8; 358+ u16_t header_off; 359+ u8_t rexmit; 360 #endif /* USE_LIBOS CHECKSUM_OFFLOAD_SWITCH */ 361 362 /** In case the user needs to store data custom data on a pbuf */ 363diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h 364index a5add21..7c819d0 100644 365--- a/src/include/lwipopts.h 366+++ b/src/include/lwipopts.h 367@@ -173,6 +173,10 @@ 368 369 #define ARP_QUEUE_LEN 32 370 371+#define MAX_PBUF_CHAIN_LEN 40 372+ 373+#define MIN_TSO_SEG_LEN 256 374+ 375 /* --------------------------------------- 376 * ------- NIC offloads -------- 377 * --------------------------------------- 378-- 3792.23.0 380 381