From af8ac36acb103aa27b498dafa0ae8ba4332faac8 Mon Sep 17 00:00:00 2001 From: wu-changsheng Date: Sat, 3 Dec 2022 21:38:09 +0800 Subject: [PATCH] add-tso --- src/core/ipv4/etharp.c | 17 +++- src/core/ipv4/ip4.c | 10 ++- src/core/tcp.c | 6 ++ src/core/tcp_out.c | 178 +++++++++++++++++++++++++++++++++++++-- src/include/dpdk_cksum.h | 2 +- src/include/lwip/pbuf.h | 8 +- src/include/lwipopts.h | 4 + 7 files changed, 211 insertions(+), 14 deletions(-) diff --git a/src/core/ipv4/etharp.c b/src/core/ipv4/etharp.c index effb7db..f1903e4 100644 --- a/src/core/ipv4/etharp.c +++ b/src/core/ipv4/etharp.c @@ -482,6 +482,13 @@ etharp_update_arp_entry(struct netif *netif, const ip4_addr_t *ipaddr, struct et struct pbuf *p = arp_table[i].q; arp_table[i].q = NULL; #endif /* ARP_QUEUEING */ +#if USE_LIBOS + struct pbuf *tmp = p->next; + while (tmp != NULL) { + tmp->ref--; + tmp = tmp->next; + } +#endif /* send the queued IP packet */ ethernet_output(netif, p, (struct eth_addr *)(netif->hwaddr), ethaddr, ETHTYPE_IP); /* free the queued IP packet */ @@ -1027,7 +1034,15 @@ etharp_query(struct netif *netif, const ip4_addr_t *ipaddr, struct pbuf *q) } else { /* referencing the old pbuf is enough */ p = q; - pbuf_ref(p); +#if USE_LIBOS + struct pbuf *tmp = p; + while (tmp != NULL) { + pbuf_ref(tmp); + tmp = tmp->next; + } +#else + pbuf_ref(p); +#endif } /* packet could be taken over? */ if (p != NULL) { diff --git a/src/core/ipv4/ip4.c b/src/core/ipv4/ip4.c index 1334cdc..d823491 100644 --- a/src/core/ipv4/ip4.c +++ b/src/core/ipv4/ip4.c @@ -1034,9 +1034,15 @@ ip4_output_if_opt_src(struct pbuf *p, const ip4_addr_t *src, const ip4_addr_t *d #endif /* ENABLE_LOOPBACK */ #if IP_FRAG /* don't fragment if interface has mtu set to 0 [loopif] */ - if (netif->mtu && (p->tot_len > netif->mtu)) { - return ip4_frag(p, netif, dest); +#if USE_LIBOS + if (!(get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO)) { +#endif + if (netif->mtu && (p->tot_len > netif->mtu)) { + return ip4_frag(p, netif, dest); + } +#if USE_LIBOS } +#endif #endif /* IP_FRAG */ LWIP_DEBUGF(IP_DEBUG, ("ip4_output_if: call netif->output()\n")); diff --git a/src/core/tcp.c b/src/core/tcp.c index 7c18408..51ada38 100644 --- a/src/core/tcp.c +++ b/src/core/tcp.c @@ -1756,7 +1756,9 @@ tcp_seg_free(struct tcp_seg *seg) seg->p = NULL; #endif /* TCP_DEBUG */ } +#if !USE_LIBOS memp_free(MEMP_TCP_SEG, seg); +#endif } } @@ -1792,10 +1794,14 @@ tcp_seg_copy(struct tcp_seg *seg) LWIP_ASSERT("tcp_seg_copy: invalid seg", seg != NULL); +#if USE_LIBOS + cseg = (struct tcp_seg *)((uint8_t *)seg->p + sizeof(struct pbuf_custom)); +#else cseg = (struct tcp_seg *)memp_malloc(MEMP_TCP_SEG); if (cseg == NULL) { return NULL; } +#endif SMEMCPY((u8_t *)cseg, (const u8_t *)seg, sizeof(struct tcp_seg)); pbuf_ref(cseg->p); return cseg; diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c index 2834ba3..ee6f40b 100644 --- a/src/core/tcp_out.c +++ b/src/core/tcp_out.c @@ -161,6 +161,40 @@ tcp_route(const struct tcp_pcb *pcb, const ip_addr_t *src, const ip_addr_t *dst) * The TCP header is filled in except ackno and wnd. * p is freed on failure. */ +#if USE_LIBOS +void tcp_init_segment(struct tcp_seg *seg, const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, + u32_t seqno, u8_t optflags) +{ + u8_t optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb); + + seg->flags = optflags; + seg->next = NULL; + seg->p = p; + seg->len = p->tot_len - optlen; + + /* build TCP header */ + pbuf_add_header(p, TCP_HLEN); + seg->tcphdr = (struct tcp_hdr *)seg->p->payload; + seg->tcphdr->src = lwip_htons(pcb->local_port); + seg->tcphdr->dest = lwip_htons(pcb->remote_port); + seg->tcphdr->seqno = lwip_htonl(seqno); + + TCPH_HDRLEN_FLAGS_SET(seg->tcphdr, (TCP_HLEN + optlen) / 4, hdrflags); + seg->tcphdr->urgp = 0; +} + +static struct tcp_seg * +tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags) +{ + struct tcp_seg *seg; + + seg = (struct tcp_seg *)((uint8_t *)p + sizeof(struct pbuf_custom)); + + tcp_init_segment(seg, pcb, p, hdrflags, seqno, optflags); + + return seg; +} +#else static struct tcp_seg * tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags) { @@ -210,6 +244,7 @@ tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32 seg->tcphdr->urgp = 0; return seg; } +#endif /** * Allocate a PBUF_RAM pbuf, perhaps with extra space at the end. @@ -1272,6 +1307,60 @@ tcp_build_wnd_scale_option(u32_t *opts) } #endif +#if USE_LIBOS +static struct tcp_seg *tcp_output_over(struct tcp_pcb *pcb, struct tcp_seg *seg, struct tcp_seg *useg) +{ + if (TCP_TCPLEN(seg) > 0) { + seg->next = NULL; + if (useg == NULL) { + pcb->unacked = seg; + useg = seg; + } else { + if (TCP_SEQ_LT(lwip_ntohl(seg->tcphdr->seqno), lwip_ntohl(useg->tcphdr->seqno))) { + /* add segment to before tail of unacked list, keeping the list sorted */ + struct tcp_seg **cur_seg = &(pcb->unacked); + while (*cur_seg && + TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) { + cur_seg = &((*cur_seg)->next ); + } + seg->next = (*cur_seg); + (*cur_seg) = seg; + } else { + /* add segment to tail of unacked list */ + useg->next = seg; + useg = seg; + } + } + } else { + tcp_seg_free(seg); + } + + return useg; +} +static err_t tcp_output_seg(struct tcp_pcb *pcb, struct tcp_seg *seg, struct netif *netif, u32_t snd_nxt) +{ + if (pcb->state != SYN_SENT) { + TCPH_SET_FLAG(seg->tcphdr, TCP_ACK); + } + + err_t err = tcp_output_segment(seg, pcb, netif); + if (err != ERR_OK) { + /* segment could not be sent, for whatever reason */ + tcp_set_flags(pcb, TF_NAGLEMEMERR); + return err; + } + + if (pcb->state != SYN_SENT) { + tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW); + } + + if (TCP_SEQ_LT(pcb->snd_nxt, snd_nxt)) { + pcb->snd_nxt = snd_nxt; + } + + return ERR_OK; +} +#endif /** * @ingroup tcp_raw * Find out what we can send and send it @@ -1376,16 +1465,88 @@ tcp_output(struct tcp_pcb *pcb) for (; useg->next != NULL; useg = useg->next); } /* data available and window allows it to be sent? */ + #if USE_LIBOS - /* avoid send cose too much time, limit send pkts num max 10 */ - uint16_t send_pkt = 0; - while (seg != NULL && send_pkt < 10 && - lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) { - send_pkt++; -#else + if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO) { + while(seg) { + /** + * 1)遍历unsent队列,找到所有的待发送seg. 将seg的buf串起来 + * 2) 生成新的seg, 调用tcp_output_segment, 新的seg释放掉 + * 3) 若成功,则更新snd_nxt, unacked队列,和unsent队列。 + */ + struct tcp_seg *start_seg = seg; + struct pbuf *first_pbuf = NULL; + struct pbuf *pre_pbuf = NULL; + u8_t pbuf_chain_len = 0; + u32_t next_seqno = lwip_ntohl(seg->tcphdr->seqno); + while (seg != NULL && pbuf_chain_len < MAX_PBUF_CHAIN_LEN) { + u32_t seg_seqno = lwip_ntohl(seg->tcphdr->seqno); + if (seg_seqno - pcb->lastack + seg->len > wnd) { + if (first_pbuf) + break; + else + goto output_done; + } + + if ((tcp_do_output_nagle(pcb) == 0) && ((pcb->flags & (TF_NAGLEMEMERR | TF_FIN)) == 0)) { + if (first_pbuf) + break; + else + goto output_done; + } + + if (seg->len < TCP_MSS || next_seqno != seg_seqno || pbuf_chain_len >= MAX_PBUF_CHAIN_LEN) { + break; + } + if (first_pbuf == NULL && (seg->next == NULL || seg->next->len < TCP_MSS)) { + break; + } + + pbuf_remove_header(seg->p, seg->p->tot_len - seg->len); + if (first_pbuf == NULL) { + first_pbuf = seg->p; + } else { + first_pbuf->tot_len += seg->p->len; + pre_pbuf->next = seg->p; + } + + pre_pbuf = seg->p; + next_seqno = seg_seqno + TCP_TCPLEN(seg); + seg = seg->next; + pcb->unsent = seg; + pbuf_chain_len++; + } + + if (first_pbuf == NULL) { + err = tcp_output_seg(pcb, seg, netif, next_seqno + seg->len); + if (err != ERR_OK) + return err; + pcb->unsent = seg->next; + useg = tcp_output_over(pcb, seg, useg); + seg = pcb->unsent; + continue; + } + + struct tcp_seg new_seg; + tcp_init_segment(&new_seg, pcb, first_pbuf, 0, lwip_ntohl(start_seg->tcphdr->seqno), 0); + + err = tcp_output_seg(pcb, &new_seg, netif, next_seqno); + + for (u32_t i = 0; i < pbuf_chain_len; i++) { + struct tcp_seg *next_seg = start_seg->next; + start_seg->p->next = NULL; + useg = tcp_output_over(pcb, start_seg, useg); + start_seg = next_seg; + } + + pbuf_remove_header(new_seg.p, new_seg.p->tot_len - new_seg.len - TCPH_HDRLEN_BYTES(new_seg.tcphdr)); + new_seg.p->tot_len = new_seg.p->len; + } + } else +#endif +{ while (seg != NULL && lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) { -#endif LWIP_ASSERT("RST not expected here!", (TCPH_FLAGS(seg->tcphdr) & TCP_RST) == 0); /* Stop sending if the nagle algorithm would prevent it @@ -1462,6 +1623,7 @@ tcp_output(struct tcp_pcb *pcb) } seg = pcb->unsent; } +} #if TCP_OVERSIZE if (pcb->unsent == NULL) { /* last unsent has been removed, reset unsent_oversize */ @@ -1627,7 +1789,7 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) { #if CHECKSUM_GEN_TCP_HW if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_CKSUM) { - tcph_cksum_set(seg->p, TCP_HLEN); + tcph_cksum_set(seg->p, TCPH_HDRLEN_BYTES(seg->tcphdr)); seg->tcphdr->chksum = ip_chksum_pseudo_offload(IP_PROTO_TCP,seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip); } else { #if TCP_CHECKSUM_ON_COPY diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h index e57be4d..83c9c38 100644 --- a/src/include/dpdk_cksum.h +++ b/src/include/dpdk_cksum.h @@ -78,7 +78,7 @@ static inline void iph_cksum_set(struct pbuf *p, u16_t len, bool do_ipcksum) { #include static inline void tcph_cksum_set(struct pbuf *p, u16_t len) { - (void)len; + p->l4_len = len; p->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; } diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h index 87cd960..ef879da 100644 --- a/src/include/lwip/pbuf.h +++ b/src/include/lwip/pbuf.h @@ -223,10 +223,14 @@ struct pbuf { #if USE_LIBOS && CHECKSUM_OFFLOAD_ALL /** checksum offload ol_flags */ u64_t ol_flags; - /** checksum offload l2_len */ + /* < L2 (MAC) Header Length for non-tunneling pkt. */ u64_t l2_len:7; - /** checksum offload l3_len */ + /* < L3 (IP) Header Length. */ u64_t l3_len:9; + /* < L4 (TCP/UDP) Header Length. */ + u64_t l4_len:8; + u16_t header_off; + u8_t rexmit; #endif /* USE_LIBOS CHECKSUM_OFFLOAD_SWITCH */ /** In case the user needs to store data custom data on a pbuf */ diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h index a5add21..7c819d0 100644 --- a/src/include/lwipopts.h +++ b/src/include/lwipopts.h @@ -173,6 +173,10 @@ #define ARP_QUEUE_LEN 32 +#define MAX_PBUF_CHAIN_LEN 40 + +#define MIN_TSO_SEG_LEN 256 + /* --------------------------------------- * ------- NIC offloads -------- * --------------------------------------- -- 2.23.0