• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  linux/net/ipv4/inet_lro.c
3  *
4  *  Large Receive Offload (ipv4 / tcp)
5  *
6  *  (C) Copyright IBM Corp. 2007
7  *
8  *  Authors:
9  *       Jan-Bernd Themann <themann@de.ibm.com>
10  *       Christoph Raisch <raisch@de.ibm.com>
11  *
12  *
13  * This program is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation; either version 2, or (at your option)
16  * any later version.
17  *
18  * This program is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; if not, write to the Free Software
25  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26  */
27 
28 
29 #include <linux/module.h>
30 #include <linux/if_vlan.h>
31 #include <linux/inet_lro.h>
32 #include <net/checksum.h>
33 
34 MODULE_LICENSE("GPL");
35 MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
36 MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
37 
38 #define TCP_HDR_LEN(tcph) (tcph->doff << 2)
39 #define IP_HDR_LEN(iph) (iph->ihl << 2)
40 #define TCP_PAYLOAD_LENGTH(iph, tcph) \
41 	(ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
42 
43 #define IPH_LEN_WO_OPTIONS 5
44 #define TCPH_LEN_WO_OPTIONS 5
45 #define TCPH_LEN_W_TIMESTAMP 8
46 
47 #define LRO_MAX_PG_HLEN 64
48 
49 #define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
50 
51 /*
52  * Basic tcp checks whether packet is suitable for LRO
53  */
54 
lro_tcp_ip_check(const struct iphdr * iph,const struct tcphdr * tcph,int len,const struct net_lro_desc * lro_desc)55 static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
56 			    int len, const struct net_lro_desc *lro_desc)
57 {
58         /* check ip header: don't aggregate padded frames */
59 	if (ntohs(iph->tot_len) != len)
60 		return -1;
61 
62 	if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
63 		return -1;
64 
65 	if (iph->ihl != IPH_LEN_WO_OPTIONS)
66 		return -1;
67 
68 	if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
69 	    tcph->rst || tcph->syn || tcph->fin)
70 		return -1;
71 
72 	if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
73 		return -1;
74 
75 	if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
76 	    tcph->doff != TCPH_LEN_W_TIMESTAMP)
77 		return -1;
78 
79 	/* check tcp options (only timestamp allowed) */
80 	if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
81 		__be32 *topt = (__be32 *)(tcph + 1);
82 
83 		if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
84 				   | (TCPOPT_TIMESTAMP << 8)
85 				   | TCPOLEN_TIMESTAMP))
86 			return -1;
87 
88 		/* timestamp should be in right order */
89 		topt++;
90 		if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
91 				      ntohl(*topt)))
92 			return -1;
93 
94 		/* timestamp reply should not be zero */
95 		topt++;
96 		if (*topt == 0)
97 			return -1;
98 	}
99 
100 	return 0;
101 }
102 
lro_update_tcp_ip_header(struct net_lro_desc * lro_desc)103 static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
104 {
105 	struct iphdr *iph = lro_desc->iph;
106 	struct tcphdr *tcph = lro_desc->tcph;
107 	__be32 *p;
108 	__wsum tcp_hdr_csum;
109 
110 	tcph->ack_seq = lro_desc->tcp_ack;
111 	tcph->window = lro_desc->tcp_window;
112 
113 	if (lro_desc->tcp_saw_tstamp) {
114 		p = (__be32 *)(tcph + 1);
115 		*(p+2) = lro_desc->tcp_rcv_tsecr;
116 	}
117 
118 	csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
119 	iph->tot_len = htons(lro_desc->ip_tot_len);
120 
121 	tcph->check = 0;
122 	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
123 	lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
124 	tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
125 					lro_desc->ip_tot_len -
126 					IP_HDR_LEN(iph), IPPROTO_TCP,
127 					lro_desc->data_csum);
128 }
129 
lro_tcp_data_csum(struct iphdr * iph,struct tcphdr * tcph,int len)130 static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
131 {
132 	__wsum tcp_csum;
133 	__wsum tcp_hdr_csum;
134 	__wsum tcp_ps_hdr_csum;
135 
136 	tcp_csum = ~csum_unfold(tcph->check);
137 	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
138 
139 	tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
140 					     len + TCP_HDR_LEN(tcph),
141 					     IPPROTO_TCP, 0);
142 
143 	return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
144 			tcp_ps_hdr_csum);
145 }
146 
lro_init_desc(struct net_lro_desc * lro_desc,struct sk_buff * skb,struct iphdr * iph,struct tcphdr * tcph)147 static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
148 			  struct iphdr *iph, struct tcphdr *tcph)
149 {
150 	int nr_frags;
151 	__be32 *ptr;
152 	u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
153 
154 	nr_frags = skb_shinfo(skb)->nr_frags;
155 	lro_desc->parent = skb;
156 	lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
157 	lro_desc->iph = iph;
158 	lro_desc->tcph = tcph;
159 	lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
160 	lro_desc->tcp_ack = tcph->ack_seq;
161 	lro_desc->tcp_window = tcph->window;
162 
163 	lro_desc->pkt_aggr_cnt = 1;
164 	lro_desc->ip_tot_len = ntohs(iph->tot_len);
165 
166 	if (tcph->doff == 8) {
167 		ptr = (__be32 *)(tcph+1);
168 		lro_desc->tcp_saw_tstamp = 1;
169 		lro_desc->tcp_rcv_tsval = *(ptr+1);
170 		lro_desc->tcp_rcv_tsecr = *(ptr+2);
171 	}
172 
173 	lro_desc->mss = tcp_data_len;
174 	lro_desc->active = 1;
175 
176 	lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
177 						tcp_data_len);
178 }
179 
lro_clear_desc(struct net_lro_desc * lro_desc)180 static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
181 {
182 	memset(lro_desc, 0, sizeof(struct net_lro_desc));
183 }
184 
lro_add_common(struct net_lro_desc * lro_desc,struct iphdr * iph,struct tcphdr * tcph,int tcp_data_len)185 static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
186 			   struct tcphdr *tcph, int tcp_data_len)
187 {
188 	struct sk_buff *parent = lro_desc->parent;
189 	__be32 *topt;
190 
191 	lro_desc->pkt_aggr_cnt++;
192 	lro_desc->ip_tot_len += tcp_data_len;
193 	lro_desc->tcp_next_seq += tcp_data_len;
194 	lro_desc->tcp_window = tcph->window;
195 	lro_desc->tcp_ack = tcph->ack_seq;
196 
197 	/* don't update tcp_rcv_tsval, would not work with PAWS */
198 	if (lro_desc->tcp_saw_tstamp) {
199 		topt = (__be32 *) (tcph + 1);
200 		lro_desc->tcp_rcv_tsecr = *(topt + 2);
201 	}
202 
203 	lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
204 					     lro_tcp_data_csum(iph, tcph,
205 							       tcp_data_len),
206 					     parent->len);
207 
208 	parent->len += tcp_data_len;
209 	parent->data_len += tcp_data_len;
210 	if (tcp_data_len > lro_desc->mss)
211 		lro_desc->mss = tcp_data_len;
212 }
213 
lro_add_packet(struct net_lro_desc * lro_desc,struct sk_buff * skb,struct iphdr * iph,struct tcphdr * tcph)214 static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
215 			   struct iphdr *iph, struct tcphdr *tcph)
216 {
217 	struct sk_buff *parent = lro_desc->parent;
218 	int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
219 
220 	lro_add_common(lro_desc, iph, tcph, tcp_data_len);
221 
222 	skb_pull(skb, (skb->len - tcp_data_len));
223 	parent->truesize += skb->truesize;
224 
225 	if (lro_desc->last_skb)
226 		lro_desc->last_skb->next = skb;
227 	else
228 		skb_shinfo(parent)->frag_list = skb;
229 
230 	lro_desc->last_skb = skb;
231 }
232 
lro_add_frags(struct net_lro_desc * lro_desc,int len,int hlen,int truesize,struct skb_frag_struct * skb_frags,struct iphdr * iph,struct tcphdr * tcph)233 static void lro_add_frags(struct net_lro_desc *lro_desc,
234 			  int len, int hlen, int truesize,
235 			  struct skb_frag_struct *skb_frags,
236 			  struct iphdr *iph, struct tcphdr *tcph)
237 {
238 	struct sk_buff *skb = lro_desc->parent;
239 	int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
240 
241 	lro_add_common(lro_desc, iph, tcph, tcp_data_len);
242 
243 	skb->truesize += truesize;
244 
245 	skb_frags[0].page_offset += hlen;
246 	skb_frag_size_sub(&skb_frags[0], hlen);
247 
248 	while (tcp_data_len > 0) {
249 		*(lro_desc->next_frag) = *skb_frags;
250 		tcp_data_len -= skb_frag_size(skb_frags);
251 		lro_desc->next_frag++;
252 		skb_frags++;
253 		skb_shinfo(skb)->nr_frags++;
254 	}
255 }
256 
lro_check_tcp_conn(struct net_lro_desc * lro_desc,struct iphdr * iph,struct tcphdr * tcph)257 static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
258 			      struct iphdr *iph,
259 			      struct tcphdr *tcph)
260 {
261 	if ((lro_desc->iph->saddr != iph->saddr) ||
262 	    (lro_desc->iph->daddr != iph->daddr) ||
263 	    (lro_desc->tcph->source != tcph->source) ||
264 	    (lro_desc->tcph->dest != tcph->dest))
265 		return -1;
266 	return 0;
267 }
268 
lro_get_desc(struct net_lro_mgr * lro_mgr,struct net_lro_desc * lro_arr,struct iphdr * iph,struct tcphdr * tcph)269 static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
270 					 struct net_lro_desc *lro_arr,
271 					 struct iphdr *iph,
272 					 struct tcphdr *tcph)
273 {
274 	struct net_lro_desc *lro_desc = NULL;
275 	struct net_lro_desc *tmp;
276 	int max_desc = lro_mgr->max_desc;
277 	int i;
278 
279 	for (i = 0; i < max_desc; i++) {
280 		tmp = &lro_arr[i];
281 		if (tmp->active)
282 			if (!lro_check_tcp_conn(tmp, iph, tcph)) {
283 				lro_desc = tmp;
284 				goto out;
285 			}
286 	}
287 
288 	for (i = 0; i < max_desc; i++) {
289 		if (!lro_arr[i].active) {
290 			lro_desc = &lro_arr[i];
291 			goto out;
292 		}
293 	}
294 
295 	LRO_INC_STATS(lro_mgr, no_desc);
296 out:
297 	return lro_desc;
298 }
299 
lro_flush(struct net_lro_mgr * lro_mgr,struct net_lro_desc * lro_desc)300 static void lro_flush(struct net_lro_mgr *lro_mgr,
301 		      struct net_lro_desc *lro_desc)
302 {
303 	if (lro_desc->pkt_aggr_cnt > 1)
304 		lro_update_tcp_ip_header(lro_desc);
305 
306 	skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
307 
308 	if (lro_mgr->features & LRO_F_NAPI)
309 		netif_receive_skb(lro_desc->parent);
310 	else
311 		netif_rx(lro_desc->parent);
312 
313 	LRO_INC_STATS(lro_mgr, flushed);
314 	lro_clear_desc(lro_desc);
315 }
316 
__lro_proc_skb(struct net_lro_mgr * lro_mgr,struct sk_buff * skb,void * priv)317 static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
318 			  void *priv)
319 {
320 	struct net_lro_desc *lro_desc;
321 	struct iphdr *iph;
322 	struct tcphdr *tcph;
323 	u64 flags;
324 	int vlan_hdr_len = 0;
325 
326 	if (!lro_mgr->get_skb_header ||
327 	    lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
328 				    &flags, priv))
329 		goto out;
330 
331 	if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
332 		goto out;
333 
334 	lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
335 	if (!lro_desc)
336 		goto out;
337 
338 	if ((skb->protocol == htons(ETH_P_8021Q)) &&
339 	    !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
340 		vlan_hdr_len = VLAN_HLEN;
341 
342 	if (!lro_desc->active) { /* start new lro session */
343 		if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
344 			goto out;
345 
346 		skb->ip_summed = lro_mgr->ip_summed_aggr;
347 		lro_init_desc(lro_desc, skb, iph, tcph);
348 		LRO_INC_STATS(lro_mgr, aggregated);
349 		return 0;
350 	}
351 
352 	if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
353 		goto out2;
354 
355 	if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
356 		goto out2;
357 
358 	lro_add_packet(lro_desc, skb, iph, tcph);
359 	LRO_INC_STATS(lro_mgr, aggregated);
360 
361 	if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
362 	    lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
363 		lro_flush(lro_mgr, lro_desc);
364 
365 	return 0;
366 
367 out2: /* send aggregated SKBs to stack */
368 	lro_flush(lro_mgr, lro_desc);
369 
370 out:
371 	return 1;
372 }
373 
374 
lro_gen_skb(struct net_lro_mgr * lro_mgr,struct skb_frag_struct * frags,int len,int true_size,void * mac_hdr,int hlen,__wsum sum,u32 ip_summed)375 static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
376 				   struct skb_frag_struct *frags,
377 				   int len, int true_size,
378 				   void *mac_hdr,
379 				   int hlen, __wsum sum,
380 				   u32 ip_summed)
381 {
382 	struct sk_buff *skb;
383 	struct skb_frag_struct *skb_frags;
384 	int data_len = len;
385 	int hdr_len = min(len, hlen);
386 
387 	skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad);
388 	if (!skb)
389 		return NULL;
390 
391 	skb_reserve(skb, lro_mgr->frag_align_pad);
392 	skb->len = len;
393 	skb->data_len = len - hdr_len;
394 	skb->truesize += true_size;
395 	skb->tail += hdr_len;
396 
397 	memcpy(skb->data, mac_hdr, hdr_len);
398 
399 	skb_frags = skb_shinfo(skb)->frags;
400 	while (data_len > 0) {
401 		*skb_frags = *frags;
402 		data_len -= skb_frag_size(frags);
403 		skb_frags++;
404 		frags++;
405 		skb_shinfo(skb)->nr_frags++;
406 	}
407 
408 	skb_shinfo(skb)->frags[0].page_offset += hdr_len;
409 	skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len);
410 
411 	skb->ip_summed = ip_summed;
412 	skb->csum = sum;
413 	skb->protocol = eth_type_trans(skb, lro_mgr->dev);
414 	return skb;
415 }
416 
__lro_proc_segment(struct net_lro_mgr * lro_mgr,struct skb_frag_struct * frags,int len,int true_size,void * priv,__wsum sum)417 static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
418 					  struct skb_frag_struct *frags,
419 					  int len, int true_size,
420 					  void *priv, __wsum sum)
421 {
422 	struct net_lro_desc *lro_desc;
423 	struct iphdr *iph;
424 	struct tcphdr *tcph;
425 	struct sk_buff *skb;
426 	u64 flags;
427 	void *mac_hdr;
428 	int mac_hdr_len;
429 	int hdr_len = LRO_MAX_PG_HLEN;
430 	int vlan_hdr_len = 0;
431 
432 	if (!lro_mgr->get_frag_header ||
433 	    lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
434 				     (void *)&tcph, &flags, priv)) {
435 		mac_hdr = skb_frag_address(frags);
436 		goto out1;
437 	}
438 
439 	if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
440 		goto out1;
441 
442 	hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr);
443 	mac_hdr_len = (int)((void *)(iph) - mac_hdr);
444 
445 	lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
446 	if (!lro_desc)
447 		goto out1;
448 
449 	if (!lro_desc->active) { /* start new lro session */
450 		if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL))
451 			goto out1;
452 
453 		skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
454 				  hdr_len, 0, lro_mgr->ip_summed_aggr);
455 		if (!skb)
456 			goto out;
457 
458 		if ((skb->protocol == htons(ETH_P_8021Q)) &&
459 		    !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
460 			vlan_hdr_len = VLAN_HLEN;
461 
462 		iph = (void *)(skb->data + vlan_hdr_len);
463 		tcph = (void *)((u8 *)skb->data + vlan_hdr_len
464 				+ IP_HDR_LEN(iph));
465 
466 		lro_init_desc(lro_desc, skb, iph, tcph);
467 		LRO_INC_STATS(lro_mgr, aggregated);
468 		return NULL;
469 	}
470 
471 	if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
472 		goto out2;
473 
474 	if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc))
475 		goto out2;
476 
477 	lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph);
478 	LRO_INC_STATS(lro_mgr, aggregated);
479 
480 	if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) ||
481 	    lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
482 		lro_flush(lro_mgr, lro_desc);
483 
484 	return NULL;
485 
486 out2: /* send aggregated packets to the stack */
487 	lro_flush(lro_mgr, lro_desc);
488 
489 out1:  /* Original packet has to be posted to the stack */
490 	skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
491 			  hdr_len, sum, lro_mgr->ip_summed);
492 out:
493 	return skb;
494 }
495 
lro_receive_skb(struct net_lro_mgr * lro_mgr,struct sk_buff * skb,void * priv)496 void lro_receive_skb(struct net_lro_mgr *lro_mgr,
497 		     struct sk_buff *skb,
498 		     void *priv)
499 {
500 	if (__lro_proc_skb(lro_mgr, skb, priv)) {
501 		if (lro_mgr->features & LRO_F_NAPI)
502 			netif_receive_skb(skb);
503 		else
504 			netif_rx(skb);
505 	}
506 }
507 EXPORT_SYMBOL(lro_receive_skb);
508 
lro_receive_frags(struct net_lro_mgr * lro_mgr,struct skb_frag_struct * frags,int len,int true_size,void * priv,__wsum sum)509 void lro_receive_frags(struct net_lro_mgr *lro_mgr,
510 		       struct skb_frag_struct *frags,
511 		       int len, int true_size, void *priv, __wsum sum)
512 {
513 	struct sk_buff *skb;
514 
515 	skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum);
516 	if (!skb)
517 		return;
518 
519 	if (lro_mgr->features & LRO_F_NAPI)
520 		netif_receive_skb(skb);
521 	else
522 		netif_rx(skb);
523 }
524 EXPORT_SYMBOL(lro_receive_frags);
525 
lro_flush_all(struct net_lro_mgr * lro_mgr)526 void lro_flush_all(struct net_lro_mgr *lro_mgr)
527 {
528 	int i;
529 	struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
530 
531 	for (i = 0; i < lro_mgr->max_desc; i++) {
532 		if (lro_desc[i].active)
533 			lro_flush(lro_mgr, &lro_desc[i]);
534 	}
535 }
536 EXPORT_SYMBOL(lro_flush_all);
537 
lro_flush_pkt(struct net_lro_mgr * lro_mgr,struct iphdr * iph,struct tcphdr * tcph)538 void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
539 		  struct iphdr *iph, struct tcphdr *tcph)
540 {
541 	struct net_lro_desc *lro_desc;
542 
543 	lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
544 	if (lro_desc->active)
545 		lro_flush(lro_mgr, lro_desc);
546 }
547 EXPORT_SYMBOL(lro_flush_pkt);
548