• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*  XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
2  *
3  *  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
4  */
5 #include <uapi/linux/if_ether.h>
6 #include <uapi/linux/if_packet.h>
7 #include <uapi/linux/if_vlan.h>
8 #include <uapi/linux/ip.h>
9 #include <uapi/linux/ipv6.h>
10 #include <uapi/linux/in.h>
11 #include <uapi/linux/tcp.h>
12 #include <uapi/linux/udp.h>
13 
14 #include <uapi/linux/bpf.h>
15 #include "bpf_helpers.h"
16 #include "hash_func01.h"
17 
18 #define MAX_CPUS 64 /* WARNING - sync with _user.c */
19 
20 /* Special map type that can XDP_REDIRECT frames to another CPU */
21 struct {
22 	__uint(type, BPF_MAP_TYPE_CPUMAP);
23 	__uint(key_size, sizeof(u32));
24 	__uint(value_size, sizeof(u32));
25 	__uint(max_entries, MAX_CPUS);
26 } cpu_map SEC(".maps");
27 
28 /* Common stats data record to keep userspace more simple */
29 struct datarec {
30 	__u64 processed;
31 	__u64 dropped;
32 	__u64 issue;
33 };
34 
35 /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
36  * feedback.  Redirect TX errors can be caught via a tracepoint.
37  */
38 struct {
39 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
40 	__type(key, u32);
41 	__type(value, struct datarec);
42 	__uint(max_entries, 1);
43 } rx_cnt SEC(".maps");
44 
45 /* Used by trace point */
46 struct {
47 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
48 	__type(key, u32);
49 	__type(value, struct datarec);
50 	__uint(max_entries, 2);
51 	/* TODO: have entries for all possible errno's */
52 } redirect_err_cnt SEC(".maps");
53 
54 /* Used by trace point */
55 struct {
56 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
57 	__type(key, u32);
58 	__type(value, struct datarec);
59 	__uint(max_entries, MAX_CPUS);
60 } cpumap_enqueue_cnt SEC(".maps");
61 
62 /* Used by trace point */
63 struct {
64 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
65 	__type(key, u32);
66 	__type(value, struct datarec);
67 	__uint(max_entries, 1);
68 } cpumap_kthread_cnt SEC(".maps");
69 
70 /* Set of maps controlling available CPU, and for iterating through
71  * selectable redirect CPUs.
72  */
73 struct {
74 	__uint(type, BPF_MAP_TYPE_ARRAY);
75 	__type(key, u32);
76 	__type(value, u32);
77 	__uint(max_entries, MAX_CPUS);
78 } cpus_available SEC(".maps");
79 struct {
80 	__uint(type, BPF_MAP_TYPE_ARRAY);
81 	__type(key, u32);
82 	__type(value, u32);
83 	__uint(max_entries, 1);
84 } cpus_count SEC(".maps");
85 struct {
86 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
87 	__type(key, u32);
88 	__type(value, u32);
89 	__uint(max_entries, 1);
90 } cpus_iterator SEC(".maps");
91 
92 /* Used by trace point */
93 struct {
94 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
95 	__type(key, u32);
96 	__type(value, struct datarec);
97 	__uint(max_entries, 1);
98 } exception_cnt SEC(".maps");
99 
100 /* Helper parse functions */
101 
102 /* Parse Ethernet layer 2, extract network layer 3 offset and protocol
103  *
104  * Returns false on error and non-supported ether-type
105  */
106 struct vlan_hdr {
107 	__be16 h_vlan_TCI;
108 	__be16 h_vlan_encapsulated_proto;
109 };
110 
111 static __always_inline
parse_eth(struct ethhdr * eth,void * data_end,u16 * eth_proto,u64 * l3_offset)112 bool parse_eth(struct ethhdr *eth, void *data_end,
113 	       u16 *eth_proto, u64 *l3_offset)
114 {
115 	u16 eth_type;
116 	u64 offset;
117 
118 	offset = sizeof(*eth);
119 	if ((void *)eth + offset > data_end)
120 		return false;
121 
122 	eth_type = eth->h_proto;
123 
124 	/* Skip non 802.3 Ethertypes */
125 	if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
126 		return false;
127 
128 	/* Handle VLAN tagged packet */
129 	if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
130 		struct vlan_hdr *vlan_hdr;
131 
132 		vlan_hdr = (void *)eth + offset;
133 		offset += sizeof(*vlan_hdr);
134 		if ((void *)eth + offset > data_end)
135 			return false;
136 		eth_type = vlan_hdr->h_vlan_encapsulated_proto;
137 	}
138 	/* Handle double VLAN tagged packet */
139 	if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
140 		struct vlan_hdr *vlan_hdr;
141 
142 		vlan_hdr = (void *)eth + offset;
143 		offset += sizeof(*vlan_hdr);
144 		if ((void *)eth + offset > data_end)
145 			return false;
146 		eth_type = vlan_hdr->h_vlan_encapsulated_proto;
147 	}
148 
149 	*eth_proto = ntohs(eth_type);
150 	*l3_offset = offset;
151 	return true;
152 }
153 
154 static __always_inline
get_dest_port_ipv4_udp(struct xdp_md * ctx,u64 nh_off)155 u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
156 {
157 	void *data_end = (void *)(long)ctx->data_end;
158 	void *data     = (void *)(long)ctx->data;
159 	struct iphdr *iph = data + nh_off;
160 	struct udphdr *udph;
161 	u16 dport;
162 
163 	if (iph + 1 > data_end)
164 		return 0;
165 	if (!(iph->protocol == IPPROTO_UDP))
166 		return 0;
167 
168 	udph = (void *)(iph + 1);
169 	if (udph + 1 > data_end)
170 		return 0;
171 
172 	dport = ntohs(udph->dest);
173 	return dport;
174 }
175 
176 static __always_inline
get_proto_ipv4(struct xdp_md * ctx,u64 nh_off)177 int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
178 {
179 	void *data_end = (void *)(long)ctx->data_end;
180 	void *data     = (void *)(long)ctx->data;
181 	struct iphdr *iph = data + nh_off;
182 
183 	if (iph + 1 > data_end)
184 		return 0;
185 	return iph->protocol;
186 }
187 
188 static __always_inline
get_proto_ipv6(struct xdp_md * ctx,u64 nh_off)189 int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
190 {
191 	void *data_end = (void *)(long)ctx->data_end;
192 	void *data     = (void *)(long)ctx->data;
193 	struct ipv6hdr *ip6h = data + nh_off;
194 
195 	if (ip6h + 1 > data_end)
196 		return 0;
197 	return ip6h->nexthdr;
198 }
199 
200 SEC("xdp_cpu_map0")
xdp_prognum0_no_touch(struct xdp_md * ctx)201 int  xdp_prognum0_no_touch(struct xdp_md *ctx)
202 {
203 	void *data_end = (void *)(long)ctx->data_end;
204 	void *data     = (void *)(long)ctx->data;
205 	struct datarec *rec;
206 	u32 *cpu_selected;
207 	u32 cpu_dest;
208 	u32 key = 0;
209 
210 	/* Only use first entry in cpus_available */
211 	cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
212 	if (!cpu_selected)
213 		return XDP_ABORTED;
214 	cpu_dest = *cpu_selected;
215 
216 	/* Count RX packet in map */
217 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
218 	if (!rec)
219 		return XDP_ABORTED;
220 	rec->processed++;
221 
222 	if (cpu_dest >= MAX_CPUS) {
223 		rec->issue++;
224 		return XDP_ABORTED;
225 	}
226 
227 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
228 }
229 
230 SEC("xdp_cpu_map1_touch_data")
xdp_prognum1_touch_data(struct xdp_md * ctx)231 int  xdp_prognum1_touch_data(struct xdp_md *ctx)
232 {
233 	void *data_end = (void *)(long)ctx->data_end;
234 	void *data     = (void *)(long)ctx->data;
235 	struct ethhdr *eth = data;
236 	struct datarec *rec;
237 	u32 *cpu_selected;
238 	u32 cpu_dest;
239 	u16 eth_type;
240 	u32 key = 0;
241 
242 	/* Only use first entry in cpus_available */
243 	cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
244 	if (!cpu_selected)
245 		return XDP_ABORTED;
246 	cpu_dest = *cpu_selected;
247 
248 	/* Validate packet length is minimum Eth header size */
249 	if (eth + 1 > data_end)
250 		return XDP_ABORTED;
251 
252 	/* Count RX packet in map */
253 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
254 	if (!rec)
255 		return XDP_ABORTED;
256 	rec->processed++;
257 
258 	/* Read packet data, and use it (drop non 802.3 Ethertypes) */
259 	eth_type = eth->h_proto;
260 	if (ntohs(eth_type) < ETH_P_802_3_MIN) {
261 		rec->dropped++;
262 		return XDP_DROP;
263 	}
264 
265 	if (cpu_dest >= MAX_CPUS) {
266 		rec->issue++;
267 		return XDP_ABORTED;
268 	}
269 
270 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
271 }
272 
273 SEC("xdp_cpu_map2_round_robin")
xdp_prognum2_round_robin(struct xdp_md * ctx)274 int  xdp_prognum2_round_robin(struct xdp_md *ctx)
275 {
276 	void *data_end = (void *)(long)ctx->data_end;
277 	void *data     = (void *)(long)ctx->data;
278 	struct ethhdr *eth = data;
279 	struct datarec *rec;
280 	u32 cpu_dest;
281 	u32 *cpu_lookup;
282 	u32 key0 = 0;
283 
284 	u32 *cpu_selected;
285 	u32 *cpu_iterator;
286 	u32 *cpu_max;
287 	u32 cpu_idx;
288 
289 	cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
290 	if (!cpu_max)
291 		return XDP_ABORTED;
292 
293 	cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
294 	if (!cpu_iterator)
295 		return XDP_ABORTED;
296 	cpu_idx = *cpu_iterator;
297 
298 	*cpu_iterator += 1;
299 	if (*cpu_iterator == *cpu_max)
300 		*cpu_iterator = 0;
301 
302 	cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
303 	if (!cpu_selected)
304 		return XDP_ABORTED;
305 	cpu_dest = *cpu_selected;
306 
307 	/* Count RX packet in map */
308 	rec = bpf_map_lookup_elem(&rx_cnt, &key0);
309 	if (!rec)
310 		return XDP_ABORTED;
311 	rec->processed++;
312 
313 	if (cpu_dest >= MAX_CPUS) {
314 		rec->issue++;
315 		return XDP_ABORTED;
316 	}
317 
318 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
319 }
320 
321 SEC("xdp_cpu_map3_proto_separate")
xdp_prognum3_proto_separate(struct xdp_md * ctx)322 int  xdp_prognum3_proto_separate(struct xdp_md *ctx)
323 {
324 	void *data_end = (void *)(long)ctx->data_end;
325 	void *data     = (void *)(long)ctx->data;
326 	struct ethhdr *eth = data;
327 	u8 ip_proto = IPPROTO_UDP;
328 	struct datarec *rec;
329 	u16 eth_proto = 0;
330 	u64 l3_offset = 0;
331 	u32 cpu_dest = 0;
332 	u32 cpu_idx = 0;
333 	u32 *cpu_lookup;
334 	u32 key = 0;
335 
336 	/* Count RX packet in map */
337 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
338 	if (!rec)
339 		return XDP_ABORTED;
340 	rec->processed++;
341 
342 	if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
343 		return XDP_PASS; /* Just skip */
344 
345 	/* Extract L4 protocol */
346 	switch (eth_proto) {
347 	case ETH_P_IP:
348 		ip_proto = get_proto_ipv4(ctx, l3_offset);
349 		break;
350 	case ETH_P_IPV6:
351 		ip_proto = get_proto_ipv6(ctx, l3_offset);
352 		break;
353 	case ETH_P_ARP:
354 		cpu_idx = 0; /* ARP packet handled on separate CPU */
355 		break;
356 	default:
357 		cpu_idx = 0;
358 	}
359 
360 	/* Choose CPU based on L4 protocol */
361 	switch (ip_proto) {
362 	case IPPROTO_ICMP:
363 	case IPPROTO_ICMPV6:
364 		cpu_idx = 2;
365 		break;
366 	case IPPROTO_TCP:
367 		cpu_idx = 0;
368 		break;
369 	case IPPROTO_UDP:
370 		cpu_idx = 1;
371 		break;
372 	default:
373 		cpu_idx = 0;
374 	}
375 
376 	cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
377 	if (!cpu_lookup)
378 		return XDP_ABORTED;
379 	cpu_dest = *cpu_lookup;
380 
381 	if (cpu_dest >= MAX_CPUS) {
382 		rec->issue++;
383 		return XDP_ABORTED;
384 	}
385 
386 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
387 }
388 
389 SEC("xdp_cpu_map4_ddos_filter_pktgen")
xdp_prognum4_ddos_filter_pktgen(struct xdp_md * ctx)390 int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
391 {
392 	void *data_end = (void *)(long)ctx->data_end;
393 	void *data     = (void *)(long)ctx->data;
394 	struct ethhdr *eth = data;
395 	u8 ip_proto = IPPROTO_UDP;
396 	struct datarec *rec;
397 	u16 eth_proto = 0;
398 	u64 l3_offset = 0;
399 	u32 cpu_dest = 0;
400 	u32 cpu_idx = 0;
401 	u16 dest_port;
402 	u32 *cpu_lookup;
403 	u32 key = 0;
404 
405 	/* Count RX packet in map */
406 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
407 	if (!rec)
408 		return XDP_ABORTED;
409 	rec->processed++;
410 
411 	if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
412 		return XDP_PASS; /* Just skip */
413 
414 	/* Extract L4 protocol */
415 	switch (eth_proto) {
416 	case ETH_P_IP:
417 		ip_proto = get_proto_ipv4(ctx, l3_offset);
418 		break;
419 	case ETH_P_IPV6:
420 		ip_proto = get_proto_ipv6(ctx, l3_offset);
421 		break;
422 	case ETH_P_ARP:
423 		cpu_idx = 0; /* ARP packet handled on separate CPU */
424 		break;
425 	default:
426 		cpu_idx = 0;
427 	}
428 
429 	/* Choose CPU based on L4 protocol */
430 	switch (ip_proto) {
431 	case IPPROTO_ICMP:
432 	case IPPROTO_ICMPV6:
433 		cpu_idx = 2;
434 		break;
435 	case IPPROTO_TCP:
436 		cpu_idx = 0;
437 		break;
438 	case IPPROTO_UDP:
439 		cpu_idx = 1;
440 		/* DDoS filter UDP port 9 (pktgen) */
441 		dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
442 		if (dest_port == 9) {
443 			if (rec)
444 				rec->dropped++;
445 			return XDP_DROP;
446 		}
447 		break;
448 	default:
449 		cpu_idx = 0;
450 	}
451 
452 	cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
453 	if (!cpu_lookup)
454 		return XDP_ABORTED;
455 	cpu_dest = *cpu_lookup;
456 
457 	if (cpu_dest >= MAX_CPUS) {
458 		rec->issue++;
459 		return XDP_ABORTED;
460 	}
461 
462 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
463 }
464 
465 /* Hashing initval */
466 #define INITVAL 15485863
467 
468 static __always_inline
get_ipv4_hash_ip_pair(struct xdp_md * ctx,u64 nh_off)469 u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
470 {
471 	void *data_end = (void *)(long)ctx->data_end;
472 	void *data     = (void *)(long)ctx->data;
473 	struct iphdr *iph = data + nh_off;
474 	u32 cpu_hash;
475 
476 	if (iph + 1 > data_end)
477 		return 0;
478 
479 	cpu_hash = iph->saddr + iph->daddr;
480 	cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
481 
482 	return cpu_hash;
483 }
484 
485 static __always_inline
get_ipv6_hash_ip_pair(struct xdp_md * ctx,u64 nh_off)486 u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
487 {
488 	void *data_end = (void *)(long)ctx->data_end;
489 	void *data     = (void *)(long)ctx->data;
490 	struct ipv6hdr *ip6h = data + nh_off;
491 	u32 cpu_hash;
492 
493 	if (ip6h + 1 > data_end)
494 		return 0;
495 
496 	cpu_hash  = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
497 	cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
498 	cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
499 	cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
500 	cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
501 
502 	return cpu_hash;
503 }
504 
505 /* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
506  * hashing scheme is symmetric, meaning swapping IP src/dest still hit
507  * same CPU.
508  */
509 SEC("xdp_cpu_map5_lb_hash_ip_pairs")
xdp_prognum5_lb_hash_ip_pairs(struct xdp_md * ctx)510 int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
511 {
512 	void *data_end = (void *)(long)ctx->data_end;
513 	void *data     = (void *)(long)ctx->data;
514 	struct ethhdr *eth = data;
515 	u8 ip_proto = IPPROTO_UDP;
516 	struct datarec *rec;
517 	u16 eth_proto = 0;
518 	u64 l3_offset = 0;
519 	u32 cpu_dest = 0;
520 	u32 cpu_idx = 0;
521 	u32 *cpu_lookup;
522 	u32 *cpu_max;
523 	u32 cpu_hash;
524 	u32 key = 0;
525 
526 	/* Count RX packet in map */
527 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
528 	if (!rec)
529 		return XDP_ABORTED;
530 	rec->processed++;
531 
532 	cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
533 	if (!cpu_max)
534 		return XDP_ABORTED;
535 
536 	if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
537 		return XDP_PASS; /* Just skip */
538 
539 	/* Hash for IPv4 and IPv6 */
540 	switch (eth_proto) {
541 	case ETH_P_IP:
542 		cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
543 		break;
544 	case ETH_P_IPV6:
545 		cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
546 		break;
547 	case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
548 	default:
549 		cpu_hash = 0;
550 	}
551 
552 	/* Choose CPU based on hash */
553 	cpu_idx = cpu_hash % *cpu_max;
554 
555 	cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
556 	if (!cpu_lookup)
557 		return XDP_ABORTED;
558 	cpu_dest = *cpu_lookup;
559 
560 	if (cpu_dest >= MAX_CPUS) {
561 		rec->issue++;
562 		return XDP_ABORTED;
563 	}
564 
565 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
566 }
567 
568 char _license[] SEC("license") = "GPL";
569 
570 /*** Trace point code ***/
571 
572 /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
573  * Code in:                kernel/include/trace/events/xdp.h
574  */
575 struct xdp_redirect_ctx {
576 	u64 __pad;	// First 8 bytes are not accessible by bpf code
577 	int prog_id;	//	offset:8;  size:4; signed:1;
578 	u32 act;	//	offset:12  size:4; signed:0;
579 	int ifindex;	//	offset:16  size:4; signed:1;
580 	int err;	//	offset:20  size:4; signed:1;
581 	int to_ifindex;	//	offset:24  size:4; signed:1;
582 	u32 map_id;	//	offset:28  size:4; signed:0;
583 	int map_index;	//	offset:32  size:4; signed:1;
584 };			//	offset:36
585 
586 enum {
587 	XDP_REDIRECT_SUCCESS = 0,
588 	XDP_REDIRECT_ERROR = 1
589 };
590 
591 static __always_inline
xdp_redirect_collect_stat(struct xdp_redirect_ctx * ctx)592 int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
593 {
594 	u32 key = XDP_REDIRECT_ERROR;
595 	struct datarec *rec;
596 	int err = ctx->err;
597 
598 	if (!err)
599 		key = XDP_REDIRECT_SUCCESS;
600 
601 	rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
602 	if (!rec)
603 		return 0;
604 	rec->dropped += 1;
605 
606 	return 0; /* Indicate event was filtered (no further processing)*/
607 	/*
608 	 * Returning 1 here would allow e.g. a perf-record tracepoint
609 	 * to see and record these events, but it doesn't work well
610 	 * in-practice as stopping perf-record also unload this
611 	 * bpf_prog.  Plus, there is additional overhead of doing so.
612 	 */
613 }
614 
615 SEC("tracepoint/xdp/xdp_redirect_err")
trace_xdp_redirect_err(struct xdp_redirect_ctx * ctx)616 int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
617 {
618 	return xdp_redirect_collect_stat(ctx);
619 }
620 
621 SEC("tracepoint/xdp/xdp_redirect_map_err")
trace_xdp_redirect_map_err(struct xdp_redirect_ctx * ctx)622 int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
623 {
624 	return xdp_redirect_collect_stat(ctx);
625 }
626 
627 /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
628  * Code in:                kernel/include/trace/events/xdp.h
629  */
630 struct xdp_exception_ctx {
631 	u64 __pad;	// First 8 bytes are not accessible by bpf code
632 	int prog_id;	//	offset:8;  size:4; signed:1;
633 	u32 act;	//	offset:12; size:4; signed:0;
634 	int ifindex;	//	offset:16; size:4; signed:1;
635 };
636 
637 SEC("tracepoint/xdp/xdp_exception")
trace_xdp_exception(struct xdp_exception_ctx * ctx)638 int trace_xdp_exception(struct xdp_exception_ctx *ctx)
639 {
640 	struct datarec *rec;
641 	u32 key = 0;
642 
643 	rec = bpf_map_lookup_elem(&exception_cnt, &key);
644 	if (!rec)
645 		return 1;
646 	rec->dropped += 1;
647 
648 	return 0;
649 }
650 
651 /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
652  * Code in:         kernel/include/trace/events/xdp.h
653  */
654 struct cpumap_enqueue_ctx {
655 	u64 __pad;		// First 8 bytes are not accessible by bpf code
656 	int map_id;		//	offset:8;  size:4; signed:1;
657 	u32 act;		//	offset:12; size:4; signed:0;
658 	int cpu;		//	offset:16; size:4; signed:1;
659 	unsigned int drops;	//	offset:20; size:4; signed:0;
660 	unsigned int processed;	//	offset:24; size:4; signed:0;
661 	int to_cpu;		//	offset:28; size:4; signed:1;
662 };
663 
664 SEC("tracepoint/xdp/xdp_cpumap_enqueue")
trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx * ctx)665 int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
666 {
667 	u32 to_cpu = ctx->to_cpu;
668 	struct datarec *rec;
669 
670 	if (to_cpu >= MAX_CPUS)
671 		return 1;
672 
673 	rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
674 	if (!rec)
675 		return 0;
676 	rec->processed += ctx->processed;
677 	rec->dropped   += ctx->drops;
678 
679 	/* Record bulk events, then userspace can calc average bulk size */
680 	if (ctx->processed > 0)
681 		rec->issue += 1;
682 
683 	/* Inception: It's possible to detect overload situations, via
684 	 * this tracepoint.  This can be used for creating a feedback
685 	 * loop to XDP, which can take appropriate actions to mitigate
686 	 * this overload situation.
687 	 */
688 	return 0;
689 }
690 
691 /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
692  * Code in:         kernel/include/trace/events/xdp.h
693  */
694 struct cpumap_kthread_ctx {
695 	u64 __pad;		// First 8 bytes are not accessible by bpf code
696 	int map_id;		//	offset:8;  size:4; signed:1;
697 	u32 act;		//	offset:12; size:4; signed:0;
698 	int cpu;		//	offset:16; size:4; signed:1;
699 	unsigned int drops;	//	offset:20; size:4; signed:0;
700 	unsigned int processed;	//	offset:24; size:4; signed:0;
701 	int sched;		//	offset:28; size:4; signed:1;
702 };
703 
704 SEC("tracepoint/xdp/xdp_cpumap_kthread")
trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx * ctx)705 int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
706 {
707 	struct datarec *rec;
708 	u32 key = 0;
709 
710 	rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
711 	if (!rec)
712 		return 0;
713 	rec->processed += ctx->processed;
714 	rec->dropped   += ctx->drops;
715 
716 	/* Count times kthread yielded CPU via schedule call */
717 	if (ctx->sched)
718 		rec->issue++;
719 
720 	return 0;
721 }
722