• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright 2013 Red Hat, Inc.
4  * Author: Daniel Borkmann <dborkman@redhat.com>
5  *         Chetan Loke <loke.chetan@gmail.com> (TPACKET_V3 usage example)
6  *
7  * A basic test of packet socket's TPACKET_V1/TPACKET_V2/TPACKET_V3 behavior.
8  *
9  * Control:
10  *   Test the setup of the TPACKET socket with different patterns that are
11  *   known to fail (TODO) resp. succeed (OK).
12  *
13  * Datapath:
14  *   Open a pair of packet sockets and send resp. receive an a priori known
15  *   packet pattern accross the sockets and check if it was received resp.
16  *   sent correctly. Fanout in combination with RX_RING is currently not
17  *   tested here.
18  *
19  *   The test currently runs for
20  *   - TPACKET_V1: RX_RING, TX_RING
21  *   - TPACKET_V2: RX_RING, TX_RING
22  *   - TPACKET_V3: RX_RING
23  */
24 
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <sys/socket.h>
30 #include <sys/mman.h>
31 #include <sys/utsname.h>
32 #include <linux/if_packet.h>
33 #include <linux/filter.h>
34 #include <ctype.h>
35 #include <fcntl.h>
36 #include <unistd.h>
37 #ifndef __ANDROID__
38 #include <bits/wordsize.h>
39 #endif
40 #include <net/ethernet.h>
41 #include <netinet/ip.h>
42 #include <arpa/inet.h>
43 #include <stdint.h>
44 #include <string.h>
45 #include <assert.h>
46 #include <net/if.h>
47 #include <inttypes.h>
48 #include <poll.h>
49 
50 #include "psock_lib.h"
51 
52 #include "../kselftest.h"
53 
54 #ifndef bug_on
55 # define bug_on(cond)		assert(!(cond))
56 #endif
57 
58 #ifndef __aligned_tpacket
59 # define __aligned_tpacket	__attribute__((aligned(TPACKET_ALIGNMENT)))
60 #endif
61 
62 #ifndef __align_tpacket
63 # define __align_tpacket(x)	__attribute__((aligned(TPACKET_ALIGN(x))))
64 #endif
65 
66 #define NUM_PACKETS		100
67 #define ALIGN_8(x)		(((x) + 8 - 1) & ~(8 - 1))
68 
69 struct ring {
70 	struct iovec *rd;
71 	uint8_t *mm_space;
72 	size_t mm_len, rd_len;
73 	struct sockaddr_ll ll;
74 	void (*walk)(int sock, struct ring *ring);
75 	int type, rd_num, flen, version;
76 	union {
77 		struct tpacket_req  req;
78 		struct tpacket_req3 req3;
79 	};
80 };
81 
82 struct block_desc {
83 	uint32_t version;
84 	uint32_t offset_to_priv;
85 	struct tpacket_hdr_v1 h1;
86 };
87 
88 union frame_map {
89 	struct {
90 		struct tpacket_hdr tp_h __aligned_tpacket;
91 		struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket_hdr));
92 	} *v1;
93 	struct {
94 		struct tpacket2_hdr tp_h __aligned_tpacket;
95 		struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr));
96 	} *v2;
97 	void *raw;
98 };
99 
100 static unsigned int total_packets, total_bytes;
101 
pfsocket(int ver)102 static int pfsocket(int ver)
103 {
104 	int ret, sock = socket(PF_PACKET, SOCK_RAW, 0);
105 	if (sock == -1) {
106 		perror("socket");
107 		exit(1);
108 	}
109 
110 	ret = setsockopt(sock, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver));
111 	if (ret == -1) {
112 		perror("setsockopt");
113 		exit(1);
114 	}
115 
116 	return sock;
117 }
118 
status_bar_update(void)119 static void status_bar_update(void)
120 {
121 	if (total_packets % 10 == 0) {
122 		fprintf(stderr, ".");
123 		fflush(stderr);
124 	}
125 }
126 
test_payload(void * pay,size_t len)127 static void test_payload(void *pay, size_t len)
128 {
129 	struct ethhdr *eth = pay;
130 
131 	if (len < sizeof(struct ethhdr)) {
132 		fprintf(stderr, "test_payload: packet too "
133 			"small: %zu bytes!\n", len);
134 		exit(1);
135 	}
136 
137 	if (eth->h_proto != htons(ETH_P_IP)) {
138 		fprintf(stderr, "test_payload: wrong ethernet "
139 			"type: 0x%x!\n", ntohs(eth->h_proto));
140 		exit(1);
141 	}
142 }
143 
create_payload(void * pay,size_t * len)144 static void create_payload(void *pay, size_t *len)
145 {
146 	int i;
147 	struct ethhdr *eth = pay;
148 	struct iphdr *ip = pay + sizeof(*eth);
149 
150 	/* Lets create some broken crap, that still passes
151 	 * our BPF filter.
152 	 */
153 
154 	*len = DATA_LEN + 42;
155 
156 	memset(pay, 0xff, ETH_ALEN * 2);
157 	eth->h_proto = htons(ETH_P_IP);
158 
159 	for (i = 0; i < sizeof(*ip); ++i)
160 		((uint8_t *) pay)[i + sizeof(*eth)] = (uint8_t) rand();
161 
162 	ip->ihl = 5;
163 	ip->version = 4;
164 	ip->protocol = 0x11;
165 	ip->frag_off = 0;
166 	ip->ttl = 64;
167 	ip->tot_len = htons((uint16_t) *len - sizeof(*eth));
168 
169 	ip->saddr = htonl(INADDR_LOOPBACK);
170 	ip->daddr = htonl(INADDR_LOOPBACK);
171 
172 	memset(pay + sizeof(*eth) + sizeof(*ip),
173 	       DATA_CHAR, DATA_LEN);
174 }
175 
__v1_rx_kernel_ready(struct tpacket_hdr * hdr)176 static inline int __v1_rx_kernel_ready(struct tpacket_hdr *hdr)
177 {
178 	return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
179 }
180 
__v1_rx_user_ready(struct tpacket_hdr * hdr)181 static inline void __v1_rx_user_ready(struct tpacket_hdr *hdr)
182 {
183 	hdr->tp_status = TP_STATUS_KERNEL;
184 	__sync_synchronize();
185 }
186 
__v2_rx_kernel_ready(struct tpacket2_hdr * hdr)187 static inline int __v2_rx_kernel_ready(struct tpacket2_hdr *hdr)
188 {
189 	return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
190 }
191 
__v2_rx_user_ready(struct tpacket2_hdr * hdr)192 static inline void __v2_rx_user_ready(struct tpacket2_hdr *hdr)
193 {
194 	hdr->tp_status = TP_STATUS_KERNEL;
195 	__sync_synchronize();
196 }
197 
__v1_v2_rx_kernel_ready(void * base,int version)198 static inline int __v1_v2_rx_kernel_ready(void *base, int version)
199 {
200 	switch (version) {
201 	case TPACKET_V1:
202 		return __v1_rx_kernel_ready(base);
203 	case TPACKET_V2:
204 		return __v2_rx_kernel_ready(base);
205 	default:
206 		bug_on(1);
207 		return 0;
208 	}
209 }
210 
__v1_v2_rx_user_ready(void * base,int version)211 static inline void __v1_v2_rx_user_ready(void *base, int version)
212 {
213 	switch (version) {
214 	case TPACKET_V1:
215 		__v1_rx_user_ready(base);
216 		break;
217 	case TPACKET_V2:
218 		__v2_rx_user_ready(base);
219 		break;
220 	}
221 }
222 
walk_v1_v2_rx(int sock,struct ring * ring)223 static void walk_v1_v2_rx(int sock, struct ring *ring)
224 {
225 	struct pollfd pfd;
226 	int udp_sock[2];
227 	union frame_map ppd;
228 	unsigned int frame_num = 0;
229 
230 	bug_on(ring->type != PACKET_RX_RING);
231 
232 	pair_udp_open(udp_sock, PORT_BASE);
233 
234 	memset(&pfd, 0, sizeof(pfd));
235 	pfd.fd = sock;
236 	pfd.events = POLLIN | POLLERR;
237 	pfd.revents = 0;
238 
239 	pair_udp_send(udp_sock, NUM_PACKETS);
240 
241 	while (total_packets < NUM_PACKETS * 2) {
242 		while (__v1_v2_rx_kernel_ready(ring->rd[frame_num].iov_base,
243 					       ring->version)) {
244 			ppd.raw = ring->rd[frame_num].iov_base;
245 
246 			switch (ring->version) {
247 			case TPACKET_V1:
248 				test_payload((uint8_t *) ppd.raw + ppd.v1->tp_h.tp_mac,
249 					     ppd.v1->tp_h.tp_snaplen);
250 				total_bytes += ppd.v1->tp_h.tp_snaplen;
251 				break;
252 
253 			case TPACKET_V2:
254 				test_payload((uint8_t *) ppd.raw + ppd.v2->tp_h.tp_mac,
255 					     ppd.v2->tp_h.tp_snaplen);
256 				total_bytes += ppd.v2->tp_h.tp_snaplen;
257 				break;
258 			}
259 
260 			status_bar_update();
261 			total_packets++;
262 
263 			__v1_v2_rx_user_ready(ppd.raw, ring->version);
264 
265 			frame_num = (frame_num + 1) % ring->rd_num;
266 		}
267 
268 		poll(&pfd, 1, 1);
269 	}
270 
271 	pair_udp_close(udp_sock);
272 
273 	if (total_packets != 2 * NUM_PACKETS) {
274 		fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
275 			ring->version, total_packets, NUM_PACKETS);
276 		exit(1);
277 	}
278 
279 	fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
280 }
281 
__v1_tx_kernel_ready(struct tpacket_hdr * hdr)282 static inline int __v1_tx_kernel_ready(struct tpacket_hdr *hdr)
283 {
284 	return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
285 }
286 
__v1_tx_user_ready(struct tpacket_hdr * hdr)287 static inline void __v1_tx_user_ready(struct tpacket_hdr *hdr)
288 {
289 	hdr->tp_status = TP_STATUS_SEND_REQUEST;
290 	__sync_synchronize();
291 }
292 
__v2_tx_kernel_ready(struct tpacket2_hdr * hdr)293 static inline int __v2_tx_kernel_ready(struct tpacket2_hdr *hdr)
294 {
295 	return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
296 }
297 
__v2_tx_user_ready(struct tpacket2_hdr * hdr)298 static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr)
299 {
300 	hdr->tp_status = TP_STATUS_SEND_REQUEST;
301 	__sync_synchronize();
302 }
303 
__v3_tx_kernel_ready(struct tpacket3_hdr * hdr)304 static inline int __v3_tx_kernel_ready(struct tpacket3_hdr *hdr)
305 {
306 	return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
307 }
308 
__v3_tx_user_ready(struct tpacket3_hdr * hdr)309 static inline void __v3_tx_user_ready(struct tpacket3_hdr *hdr)
310 {
311 	hdr->tp_status = TP_STATUS_SEND_REQUEST;
312 	__sync_synchronize();
313 }
314 
__tx_kernel_ready(void * base,int version)315 static inline int __tx_kernel_ready(void *base, int version)
316 {
317 	switch (version) {
318 	case TPACKET_V1:
319 		return __v1_tx_kernel_ready(base);
320 	case TPACKET_V2:
321 		return __v2_tx_kernel_ready(base);
322 	case TPACKET_V3:
323 		return __v3_tx_kernel_ready(base);
324 	default:
325 		bug_on(1);
326 		return 0;
327 	}
328 }
329 
__tx_user_ready(void * base,int version)330 static inline void __tx_user_ready(void *base, int version)
331 {
332 	switch (version) {
333 	case TPACKET_V1:
334 		__v1_tx_user_ready(base);
335 		break;
336 	case TPACKET_V2:
337 		__v2_tx_user_ready(base);
338 		break;
339 	case TPACKET_V3:
340 		__v3_tx_user_ready(base);
341 		break;
342 	}
343 }
344 
__v1_v2_set_packet_loss_discard(int sock)345 static void __v1_v2_set_packet_loss_discard(int sock)
346 {
347 	int ret, discard = 1;
348 
349 	ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *) &discard,
350 			 sizeof(discard));
351 	if (ret == -1) {
352 		perror("setsockopt");
353 		exit(1);
354 	}
355 }
356 
get_next_frame(struct ring * ring,int n)357 static inline void *get_next_frame(struct ring *ring, int n)
358 {
359 	uint8_t *f0 = ring->rd[0].iov_base;
360 
361 	switch (ring->version) {
362 	case TPACKET_V1:
363 	case TPACKET_V2:
364 		return ring->rd[n].iov_base;
365 	case TPACKET_V3:
366 		return f0 + (n * ring->req3.tp_frame_size);
367 	default:
368 		bug_on(1);
369 		return NULL;
370 	}
371 }
372 
walk_tx(int sock,struct ring * ring)373 static void walk_tx(int sock, struct ring *ring)
374 {
375 	struct pollfd pfd;
376 	int rcv_sock, ret;
377 	size_t packet_len;
378 	union frame_map ppd;
379 	char packet[1024];
380 	unsigned int frame_num = 0, got = 0;
381 	struct sockaddr_ll ll = {
382 		.sll_family = PF_PACKET,
383 		.sll_halen = ETH_ALEN,
384 	};
385 	int nframes;
386 
387 	/* TPACKET_V{1,2} sets up the ring->rd* related variables based
388 	 * on frames (e.g., rd_num is tp_frame_nr) whereas V3 sets these
389 	 * up based on blocks (e.g, rd_num is  tp_block_nr)
390 	 */
391 	if (ring->version <= TPACKET_V2)
392 		nframes = ring->rd_num;
393 	else
394 		nframes = ring->req3.tp_frame_nr;
395 
396 	bug_on(ring->type != PACKET_TX_RING);
397 	bug_on(nframes < NUM_PACKETS);
398 
399 	rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
400 	if (rcv_sock == -1) {
401 		perror("socket");
402 		exit(1);
403 	}
404 
405 	pair_udp_setfilter(rcv_sock);
406 
407 	ll.sll_ifindex = if_nametoindex("lo");
408 	ret = bind(rcv_sock, (struct sockaddr *) &ll, sizeof(ll));
409 	if (ret == -1) {
410 		perror("bind");
411 		exit(1);
412 	}
413 
414 	memset(&pfd, 0, sizeof(pfd));
415 	pfd.fd = sock;
416 	pfd.events = POLLOUT | POLLERR;
417 	pfd.revents = 0;
418 
419 	total_packets = NUM_PACKETS;
420 	create_payload(packet, &packet_len);
421 
422 	while (total_packets > 0) {
423 		void *next = get_next_frame(ring, frame_num);
424 
425 		while (__tx_kernel_ready(next, ring->version) &&
426 		       total_packets > 0) {
427 			ppd.raw = next;
428 
429 			switch (ring->version) {
430 			case TPACKET_V1:
431 				ppd.v1->tp_h.tp_snaplen = packet_len;
432 				ppd.v1->tp_h.tp_len = packet_len;
433 
434 				memcpy((uint8_t *) ppd.raw + TPACKET_HDRLEN -
435 				       sizeof(struct sockaddr_ll), packet,
436 				       packet_len);
437 				total_bytes += ppd.v1->tp_h.tp_snaplen;
438 				break;
439 
440 			case TPACKET_V2:
441 				ppd.v2->tp_h.tp_snaplen = packet_len;
442 				ppd.v2->tp_h.tp_len = packet_len;
443 
444 				memcpy((uint8_t *) ppd.raw + TPACKET2_HDRLEN -
445 				       sizeof(struct sockaddr_ll), packet,
446 				       packet_len);
447 				total_bytes += ppd.v2->tp_h.tp_snaplen;
448 				break;
449 			case TPACKET_V3: {
450 				struct tpacket3_hdr *tx = next;
451 
452 				tx->tp_snaplen = packet_len;
453 				tx->tp_len = packet_len;
454 				tx->tp_next_offset = 0;
455 
456 				memcpy((uint8_t *)tx + TPACKET3_HDRLEN -
457 				       sizeof(struct sockaddr_ll), packet,
458 				       packet_len);
459 				total_bytes += tx->tp_snaplen;
460 				break;
461 			}
462 			}
463 
464 			status_bar_update();
465 			total_packets--;
466 
467 			__tx_user_ready(next, ring->version);
468 
469 			frame_num = (frame_num + 1) % nframes;
470 		}
471 
472 		poll(&pfd, 1, 1);
473 	}
474 
475 	bug_on(total_packets != 0);
476 
477 	ret = sendto(sock, NULL, 0, 0, NULL, 0);
478 	if (ret == -1) {
479 		perror("sendto");
480 		exit(1);
481 	}
482 
483 	while ((ret = recvfrom(rcv_sock, packet, sizeof(packet),
484 			       0, NULL, NULL)) > 0 &&
485 	       total_packets < NUM_PACKETS) {
486 		got += ret;
487 		test_payload(packet, ret);
488 
489 		status_bar_update();
490 		total_packets++;
491 	}
492 
493 	close(rcv_sock);
494 
495 	if (total_packets != NUM_PACKETS) {
496 		fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
497 			ring->version, total_packets, NUM_PACKETS);
498 		exit(1);
499 	}
500 
501 	fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, got);
502 }
503 
walk_v1_v2(int sock,struct ring * ring)504 static void walk_v1_v2(int sock, struct ring *ring)
505 {
506 	if (ring->type == PACKET_RX_RING)
507 		walk_v1_v2_rx(sock, ring);
508 	else
509 		walk_tx(sock, ring);
510 }
511 
512 static uint64_t __v3_prev_block_seq_num = 0;
513 
__v3_test_block_seq_num(struct block_desc * pbd)514 void __v3_test_block_seq_num(struct block_desc *pbd)
515 {
516 	if (__v3_prev_block_seq_num + 1 != pbd->h1.seq_num) {
517 		fprintf(stderr, "\nprev_block_seq_num:%"PRIu64", expected "
518 			"seq:%"PRIu64" != actual seq:%"PRIu64"\n",
519 			__v3_prev_block_seq_num, __v3_prev_block_seq_num + 1,
520 			(uint64_t) pbd->h1.seq_num);
521 		exit(1);
522 	}
523 
524 	__v3_prev_block_seq_num = pbd->h1.seq_num;
525 }
526 
__v3_test_block_len(struct block_desc * pbd,uint32_t bytes,int block_num)527 static void __v3_test_block_len(struct block_desc *pbd, uint32_t bytes, int block_num)
528 {
529 	if (pbd->h1.num_pkts && bytes != pbd->h1.blk_len) {
530 		fprintf(stderr, "\nblock:%u with %upackets, expected "
531 			"len:%u != actual len:%u\n", block_num,
532 			pbd->h1.num_pkts, bytes, pbd->h1.blk_len);
533 		exit(1);
534 	}
535 }
536 
__v3_test_block_header(struct block_desc * pbd,const int block_num)537 static void __v3_test_block_header(struct block_desc *pbd, const int block_num)
538 {
539 	if ((pbd->h1.block_status & TP_STATUS_USER) == 0) {
540 		fprintf(stderr, "\nblock %u: not in TP_STATUS_USER\n", block_num);
541 		exit(1);
542 	}
543 
544 	__v3_test_block_seq_num(pbd);
545 }
546 
__v3_walk_block(struct block_desc * pbd,const int block_num)547 static void __v3_walk_block(struct block_desc *pbd, const int block_num)
548 {
549 	int num_pkts = pbd->h1.num_pkts, i;
550 	unsigned long bytes = 0, bytes_with_padding = ALIGN_8(sizeof(*pbd));
551 	struct tpacket3_hdr *ppd;
552 
553 	__v3_test_block_header(pbd, block_num);
554 
555 	ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd +
556 				       pbd->h1.offset_to_first_pkt);
557 
558 	for (i = 0; i < num_pkts; ++i) {
559 		bytes += ppd->tp_snaplen;
560 
561 		if (ppd->tp_next_offset)
562 			bytes_with_padding += ppd->tp_next_offset;
563 		else
564 			bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac);
565 
566 		test_payload((uint8_t *) ppd + ppd->tp_mac, ppd->tp_snaplen);
567 
568 		status_bar_update();
569 		total_packets++;
570 
571 		ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset);
572 		__sync_synchronize();
573 	}
574 
575 	__v3_test_block_len(pbd, bytes_with_padding, block_num);
576 	total_bytes += bytes;
577 }
578 
__v3_flush_block(struct block_desc * pbd)579 void __v3_flush_block(struct block_desc *pbd)
580 {
581 	pbd->h1.block_status = TP_STATUS_KERNEL;
582 	__sync_synchronize();
583 }
584 
walk_v3_rx(int sock,struct ring * ring)585 static void walk_v3_rx(int sock, struct ring *ring)
586 {
587 	unsigned int block_num = 0;
588 	struct pollfd pfd;
589 	struct block_desc *pbd;
590 	int udp_sock[2];
591 
592 	bug_on(ring->type != PACKET_RX_RING);
593 
594 	pair_udp_open(udp_sock, PORT_BASE);
595 
596 	memset(&pfd, 0, sizeof(pfd));
597 	pfd.fd = sock;
598 	pfd.events = POLLIN | POLLERR;
599 	pfd.revents = 0;
600 
601 	pair_udp_send(udp_sock, NUM_PACKETS);
602 
603 	while (total_packets < NUM_PACKETS * 2) {
604 		pbd = (struct block_desc *) ring->rd[block_num].iov_base;
605 
606 		while ((pbd->h1.block_status & TP_STATUS_USER) == 0)
607 			poll(&pfd, 1, 1);
608 
609 		__v3_walk_block(pbd, block_num);
610 		__v3_flush_block(pbd);
611 
612 		block_num = (block_num + 1) % ring->rd_num;
613 	}
614 
615 	pair_udp_close(udp_sock);
616 
617 	if (total_packets != 2 * NUM_PACKETS) {
618 		fprintf(stderr, "walk_v3_rx: received %u out of %u pkts\n",
619 			total_packets, NUM_PACKETS);
620 		exit(1);
621 	}
622 
623 	fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
624 }
625 
walk_v3(int sock,struct ring * ring)626 static void walk_v3(int sock, struct ring *ring)
627 {
628 	if (ring->type == PACKET_RX_RING)
629 		walk_v3_rx(sock, ring);
630 	else
631 		walk_tx(sock, ring);
632 }
633 
__v1_v2_fill(struct ring * ring,unsigned int blocks)634 static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
635 {
636 	ring->req.tp_block_size = getpagesize() << 2;
637 	ring->req.tp_frame_size = TPACKET_ALIGNMENT << 7;
638 	ring->req.tp_block_nr = blocks;
639 
640 	ring->req.tp_frame_nr = ring->req.tp_block_size /
641 				ring->req.tp_frame_size *
642 				ring->req.tp_block_nr;
643 
644 	ring->mm_len = ring->req.tp_block_size * ring->req.tp_block_nr;
645 	ring->walk = walk_v1_v2;
646 	ring->rd_num = ring->req.tp_frame_nr;
647 	ring->flen = ring->req.tp_frame_size;
648 }
649 
__v3_fill(struct ring * ring,unsigned int blocks,int type)650 static void __v3_fill(struct ring *ring, unsigned int blocks, int type)
651 {
652 	if (type == PACKET_RX_RING) {
653 		ring->req3.tp_retire_blk_tov = 64;
654 		ring->req3.tp_sizeof_priv = 0;
655 		ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
656 	}
657 	ring->req3.tp_block_size = getpagesize() << 2;
658 	ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7;
659 	ring->req3.tp_block_nr = blocks;
660 
661 	ring->req3.tp_frame_nr = ring->req3.tp_block_size /
662 				 ring->req3.tp_frame_size *
663 				 ring->req3.tp_block_nr;
664 
665 	ring->mm_len = ring->req3.tp_block_size * ring->req3.tp_block_nr;
666 	ring->walk = walk_v3;
667 	ring->rd_num = ring->req3.tp_block_nr;
668 	ring->flen = ring->req3.tp_block_size;
669 }
670 
setup_ring(int sock,struct ring * ring,int version,int type)671 static void setup_ring(int sock, struct ring *ring, int version, int type)
672 {
673 	int ret = 0;
674 	unsigned int blocks = 256;
675 
676 	ring->type = type;
677 	ring->version = version;
678 
679 	switch (version) {
680 	case TPACKET_V1:
681 	case TPACKET_V2:
682 		if (type == PACKET_TX_RING)
683 			__v1_v2_set_packet_loss_discard(sock);
684 		__v1_v2_fill(ring, blocks);
685 		ret = setsockopt(sock, SOL_PACKET, type, &ring->req,
686 				 sizeof(ring->req));
687 		break;
688 
689 	case TPACKET_V3:
690 		__v3_fill(ring, blocks, type);
691 		ret = setsockopt(sock, SOL_PACKET, type, &ring->req3,
692 				 sizeof(ring->req3));
693 		break;
694 	}
695 
696 	if (ret == -1) {
697 		perror("setsockopt");
698 		exit(1);
699 	}
700 
701 	ring->rd_len = ring->rd_num * sizeof(*ring->rd);
702 	ring->rd = malloc(ring->rd_len);
703 	if (ring->rd == NULL) {
704 		perror("malloc");
705 		exit(1);
706 	}
707 
708 	total_packets = 0;
709 	total_bytes = 0;
710 }
711 
mmap_ring(int sock,struct ring * ring)712 static void mmap_ring(int sock, struct ring *ring)
713 {
714 	int i;
715 
716 	ring->mm_space = mmap(0, ring->mm_len, PROT_READ | PROT_WRITE,
717 			      MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0);
718 	if (ring->mm_space == MAP_FAILED) {
719 		perror("mmap");
720 		exit(1);
721 	}
722 
723 	memset(ring->rd, 0, ring->rd_len);
724 	for (i = 0; i < ring->rd_num; ++i) {
725 		ring->rd[i].iov_base = ring->mm_space + (i * ring->flen);
726 		ring->rd[i].iov_len = ring->flen;
727 	}
728 }
729 
bind_ring(int sock,struct ring * ring)730 static void bind_ring(int sock, struct ring *ring)
731 {
732 	int ret;
733 
734 	pair_udp_setfilter(sock);
735 
736 	ring->ll.sll_family = PF_PACKET;
737 	ring->ll.sll_protocol = htons(ETH_P_ALL);
738 	ring->ll.sll_ifindex = if_nametoindex("lo");
739 	ring->ll.sll_hatype = 0;
740 	ring->ll.sll_pkttype = 0;
741 	ring->ll.sll_halen = 0;
742 
743 	ret = bind(sock, (struct sockaddr *) &ring->ll, sizeof(ring->ll));
744 	if (ret == -1) {
745 		perror("bind");
746 		exit(1);
747 	}
748 }
749 
walk_ring(int sock,struct ring * ring)750 static void walk_ring(int sock, struct ring *ring)
751 {
752 	ring->walk(sock, ring);
753 }
754 
unmap_ring(int sock,struct ring * ring)755 static void unmap_ring(int sock, struct ring *ring)
756 {
757 	munmap(ring->mm_space, ring->mm_len);
758 	free(ring->rd);
759 }
760 
test_kernel_bit_width(void)761 static int test_kernel_bit_width(void)
762 {
763 	char in[512], *ptr;
764 	int num = 0, fd;
765 	ssize_t ret;
766 
767 	fd = open("/proc/kallsyms", O_RDONLY);
768 	if (fd == -1) {
769 		perror("open");
770 		exit(1);
771 	}
772 
773 	ret = read(fd, in, sizeof(in));
774 	if (ret <= 0) {
775 		perror("read");
776 		exit(1);
777 	}
778 
779 	close(fd);
780 
781 	ptr = in;
782 	while(!isspace(*ptr)) {
783 		num++;
784 		ptr++;
785 	}
786 
787 	return num * 4;
788 }
789 
test_user_bit_width(void)790 static int test_user_bit_width(void)
791 {
792 	return __WORDSIZE;
793 }
794 
795 static const char *tpacket_str[] = {
796 	[TPACKET_V1] = "TPACKET_V1",
797 	[TPACKET_V2] = "TPACKET_V2",
798 	[TPACKET_V3] = "TPACKET_V3",
799 };
800 
801 static const char *type_str[] = {
802 	[PACKET_RX_RING] = "PACKET_RX_RING",
803 	[PACKET_TX_RING] = "PACKET_TX_RING",
804 };
805 
test_tpacket(int version,int type)806 static int test_tpacket(int version, int type)
807 {
808 	int sock;
809 	struct ring ring;
810 
811 	fprintf(stderr, "test: %s with %s ", tpacket_str[version],
812 		type_str[type]);
813 	fflush(stderr);
814 
815 	if (version == TPACKET_V1 &&
816 	    test_kernel_bit_width() != test_user_bit_width()) {
817 		fprintf(stderr, "test: skip %s %s since user and kernel "
818 			"space have different bit width\n",
819 			tpacket_str[version], type_str[type]);
820 		return KSFT_SKIP;
821 	}
822 
823 	sock = pfsocket(version);
824 	memset(&ring, 0, sizeof(ring));
825 	setup_ring(sock, &ring, version, type);
826 	mmap_ring(sock, &ring);
827 	bind_ring(sock, &ring);
828 	walk_ring(sock, &ring);
829 	unmap_ring(sock, &ring);
830 	close(sock);
831 
832 	fprintf(stderr, "\n");
833 	return 0;
834 }
835 
get_kernel_version(int * version,int * patchlevel)836 void get_kernel_version(int *version, int *patchlevel)
837 {
838 	int ret, sublevel;
839 	struct utsname utsname;
840 
841 	ret = uname(&utsname);
842 	if (ret) {
843 		perror("uname");
844 		exit(1);
845 	}
846 
847 	ret = sscanf(utsname.release, "%d.%d.%d", version, patchlevel,
848 		     &sublevel);
849 	if (ret < 0) {
850 		perror("sscanf");
851 		exit(1);
852 	} else if (ret != 3) {
853 		printf("Malformed kernel version %s\n", &utsname.release);
854 		exit(1);
855 	}
856 }
857 
main(void)858 int main(void)
859 {
860 	int ret = 0;
861 	int version, patchlevel;
862 
863 	get_kernel_version(&version, &patchlevel);
864 
865 	ret |= test_tpacket(TPACKET_V1, PACKET_RX_RING);
866 	ret |= test_tpacket(TPACKET_V1, PACKET_TX_RING);
867 
868 	ret |= test_tpacket(TPACKET_V2, PACKET_RX_RING);
869 	ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING);
870 
871 	ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING);
872 	if (version > 4 || (version == 4 && patchlevel >= 11))
873 		ret |= test_tpacket(TPACKET_V3, PACKET_TX_RING);
874 
875 	if (ret)
876 		return 1;
877 
878 	printf("OK. All tests passed\n");
879 	return 0;
880 }
881