• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*  XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
2  *
3  *  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
4  */
5 #include <uapi/linux/if_ether.h>
6 #include <uapi/linux/if_packet.h>
7 #include <uapi/linux/if_vlan.h>
8 #include <uapi/linux/ip.h>
9 #include <uapi/linux/ipv6.h>
10 #include <uapi/linux/in.h>
11 #include <uapi/linux/tcp.h>
12 #include <uapi/linux/udp.h>
13 
14 #include <uapi/linux/bpf.h>
15 #include <bpf/bpf_helpers.h>
16 #include "hash_func01.h"
17 
18 #define MAX_CPUS NR_CPUS
19 
20 /* Special map type that can XDP_REDIRECT frames to another CPU */
21 struct {
22 	__uint(type, BPF_MAP_TYPE_CPUMAP);
23 	__uint(key_size, sizeof(u32));
24 	__uint(value_size, sizeof(struct bpf_cpumap_val));
25 	__uint(max_entries, MAX_CPUS);
26 } cpu_map SEC(".maps");
27 
28 /* Common stats data record to keep userspace more simple */
29 struct datarec {
30 	__u64 processed;
31 	__u64 dropped;
32 	__u64 issue;
33 	__u64 xdp_pass;
34 	__u64 xdp_drop;
35 	__u64 xdp_redirect;
36 };
37 
38 /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
39  * feedback.  Redirect TX errors can be caught via a tracepoint.
40  */
41 struct {
42 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
43 	__type(key, u32);
44 	__type(value, struct datarec);
45 	__uint(max_entries, 1);
46 } rx_cnt SEC(".maps");
47 
48 /* Used by trace point */
49 struct {
50 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
51 	__type(key, u32);
52 	__type(value, struct datarec);
53 	__uint(max_entries, 2);
54 	/* TODO: have entries for all possible errno's */
55 } redirect_err_cnt SEC(".maps");
56 
57 /* Used by trace point */
58 struct {
59 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
60 	__type(key, u32);
61 	__type(value, struct datarec);
62 	__uint(max_entries, MAX_CPUS);
63 } cpumap_enqueue_cnt SEC(".maps");
64 
65 /* Used by trace point */
66 struct {
67 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
68 	__type(key, u32);
69 	__type(value, struct datarec);
70 	__uint(max_entries, 1);
71 } cpumap_kthread_cnt SEC(".maps");
72 
73 /* Set of maps controlling available CPU, and for iterating through
74  * selectable redirect CPUs.
75  */
76 struct {
77 	__uint(type, BPF_MAP_TYPE_ARRAY);
78 	__type(key, u32);
79 	__type(value, u32);
80 	__uint(max_entries, MAX_CPUS);
81 } cpus_available SEC(".maps");
82 struct {
83 	__uint(type, BPF_MAP_TYPE_ARRAY);
84 	__type(key, u32);
85 	__type(value, u32);
86 	__uint(max_entries, 1);
87 } cpus_count SEC(".maps");
88 struct {
89 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
90 	__type(key, u32);
91 	__type(value, u32);
92 	__uint(max_entries, 1);
93 } cpus_iterator SEC(".maps");
94 
95 /* Used by trace point */
96 struct {
97 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
98 	__type(key, u32);
99 	__type(value, struct datarec);
100 	__uint(max_entries, 1);
101 } exception_cnt SEC(".maps");
102 
103 /* Helper parse functions */
104 
105 /* Parse Ethernet layer 2, extract network layer 3 offset and protocol
106  *
107  * Returns false on error and non-supported ether-type
108  */
109 struct vlan_hdr {
110 	__be16 h_vlan_TCI;
111 	__be16 h_vlan_encapsulated_proto;
112 };
113 
114 static __always_inline
parse_eth(struct ethhdr * eth,void * data_end,u16 * eth_proto,u64 * l3_offset)115 bool parse_eth(struct ethhdr *eth, void *data_end,
116 	       u16 *eth_proto, u64 *l3_offset)
117 {
118 	u16 eth_type;
119 	u64 offset;
120 
121 	offset = sizeof(*eth);
122 	if ((void *)eth + offset > data_end)
123 		return false;
124 
125 	eth_type = eth->h_proto;
126 
127 	/* Skip non 802.3 Ethertypes */
128 	if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
129 		return false;
130 
131 	/* Handle VLAN tagged packet */
132 	if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
133 		struct vlan_hdr *vlan_hdr;
134 
135 		vlan_hdr = (void *)eth + offset;
136 		offset += sizeof(*vlan_hdr);
137 		if ((void *)eth + offset > data_end)
138 			return false;
139 		eth_type = vlan_hdr->h_vlan_encapsulated_proto;
140 	}
141 	/* Handle double VLAN tagged packet */
142 	if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
143 		struct vlan_hdr *vlan_hdr;
144 
145 		vlan_hdr = (void *)eth + offset;
146 		offset += sizeof(*vlan_hdr);
147 		if ((void *)eth + offset > data_end)
148 			return false;
149 		eth_type = vlan_hdr->h_vlan_encapsulated_proto;
150 	}
151 
152 	*eth_proto = ntohs(eth_type);
153 	*l3_offset = offset;
154 	return true;
155 }
156 
157 static __always_inline
get_dest_port_ipv4_udp(struct xdp_md * ctx,u64 nh_off)158 u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
159 {
160 	void *data_end = (void *)(long)ctx->data_end;
161 	void *data     = (void *)(long)ctx->data;
162 	struct iphdr *iph = data + nh_off;
163 	struct udphdr *udph;
164 	u16 dport;
165 
166 	if (iph + 1 > data_end)
167 		return 0;
168 	if (!(iph->protocol == IPPROTO_UDP))
169 		return 0;
170 
171 	udph = (void *)(iph + 1);
172 	if (udph + 1 > data_end)
173 		return 0;
174 
175 	dport = ntohs(udph->dest);
176 	return dport;
177 }
178 
179 static __always_inline
get_proto_ipv4(struct xdp_md * ctx,u64 nh_off)180 int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
181 {
182 	void *data_end = (void *)(long)ctx->data_end;
183 	void *data     = (void *)(long)ctx->data;
184 	struct iphdr *iph = data + nh_off;
185 
186 	if (iph + 1 > data_end)
187 		return 0;
188 	return iph->protocol;
189 }
190 
191 static __always_inline
get_proto_ipv6(struct xdp_md * ctx,u64 nh_off)192 int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
193 {
194 	void *data_end = (void *)(long)ctx->data_end;
195 	void *data     = (void *)(long)ctx->data;
196 	struct ipv6hdr *ip6h = data + nh_off;
197 
198 	if (ip6h + 1 > data_end)
199 		return 0;
200 	return ip6h->nexthdr;
201 }
202 
203 SEC("xdp_cpu_map0")
xdp_prognum0_no_touch(struct xdp_md * ctx)204 int  xdp_prognum0_no_touch(struct xdp_md *ctx)
205 {
206 	void *data_end = (void *)(long)ctx->data_end;
207 	void *data     = (void *)(long)ctx->data;
208 	struct datarec *rec;
209 	u32 *cpu_selected;
210 	u32 cpu_dest;
211 	u32 key = 0;
212 
213 	/* Only use first entry in cpus_available */
214 	cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
215 	if (!cpu_selected)
216 		return XDP_ABORTED;
217 	cpu_dest = *cpu_selected;
218 
219 	/* Count RX packet in map */
220 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
221 	if (!rec)
222 		return XDP_ABORTED;
223 	rec->processed++;
224 
225 	if (cpu_dest >= MAX_CPUS) {
226 		rec->issue++;
227 		return XDP_ABORTED;
228 	}
229 
230 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
231 }
232 
233 SEC("xdp_cpu_map1_touch_data")
xdp_prognum1_touch_data(struct xdp_md * ctx)234 int  xdp_prognum1_touch_data(struct xdp_md *ctx)
235 {
236 	void *data_end = (void *)(long)ctx->data_end;
237 	void *data     = (void *)(long)ctx->data;
238 	struct ethhdr *eth = data;
239 	struct datarec *rec;
240 	u32 *cpu_selected;
241 	u32 cpu_dest;
242 	u16 eth_type;
243 	u32 key = 0;
244 
245 	/* Only use first entry in cpus_available */
246 	cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
247 	if (!cpu_selected)
248 		return XDP_ABORTED;
249 	cpu_dest = *cpu_selected;
250 
251 	/* Validate packet length is minimum Eth header size */
252 	if (eth + 1 > data_end)
253 		return XDP_ABORTED;
254 
255 	/* Count RX packet in map */
256 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
257 	if (!rec)
258 		return XDP_ABORTED;
259 	rec->processed++;
260 
261 	/* Read packet data, and use it (drop non 802.3 Ethertypes) */
262 	eth_type = eth->h_proto;
263 	if (ntohs(eth_type) < ETH_P_802_3_MIN) {
264 		rec->dropped++;
265 		return XDP_DROP;
266 	}
267 
268 	if (cpu_dest >= MAX_CPUS) {
269 		rec->issue++;
270 		return XDP_ABORTED;
271 	}
272 
273 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
274 }
275 
276 SEC("xdp_cpu_map2_round_robin")
xdp_prognum2_round_robin(struct xdp_md * ctx)277 int  xdp_prognum2_round_robin(struct xdp_md *ctx)
278 {
279 	void *data_end = (void *)(long)ctx->data_end;
280 	void *data     = (void *)(long)ctx->data;
281 	struct ethhdr *eth = data;
282 	struct datarec *rec;
283 	u32 cpu_dest;
284 	u32 *cpu_lookup;
285 	u32 key0 = 0;
286 
287 	u32 *cpu_selected;
288 	u32 *cpu_iterator;
289 	u32 *cpu_max;
290 	u32 cpu_idx;
291 
292 	cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
293 	if (!cpu_max)
294 		return XDP_ABORTED;
295 
296 	cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
297 	if (!cpu_iterator)
298 		return XDP_ABORTED;
299 	cpu_idx = *cpu_iterator;
300 
301 	*cpu_iterator += 1;
302 	if (*cpu_iterator == *cpu_max)
303 		*cpu_iterator = 0;
304 
305 	cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
306 	if (!cpu_selected)
307 		return XDP_ABORTED;
308 	cpu_dest = *cpu_selected;
309 
310 	/* Count RX packet in map */
311 	rec = bpf_map_lookup_elem(&rx_cnt, &key0);
312 	if (!rec)
313 		return XDP_ABORTED;
314 	rec->processed++;
315 
316 	if (cpu_dest >= MAX_CPUS) {
317 		rec->issue++;
318 		return XDP_ABORTED;
319 	}
320 
321 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
322 }
323 
324 SEC("xdp_cpu_map3_proto_separate")
xdp_prognum3_proto_separate(struct xdp_md * ctx)325 int  xdp_prognum3_proto_separate(struct xdp_md *ctx)
326 {
327 	void *data_end = (void *)(long)ctx->data_end;
328 	void *data     = (void *)(long)ctx->data;
329 	struct ethhdr *eth = data;
330 	u8 ip_proto = IPPROTO_UDP;
331 	struct datarec *rec;
332 	u16 eth_proto = 0;
333 	u64 l3_offset = 0;
334 	u32 cpu_dest = 0;
335 	u32 cpu_idx = 0;
336 	u32 *cpu_lookup;
337 	u32 key = 0;
338 
339 	/* Count RX packet in map */
340 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
341 	if (!rec)
342 		return XDP_ABORTED;
343 	rec->processed++;
344 
345 	if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
346 		return XDP_PASS; /* Just skip */
347 
348 	/* Extract L4 protocol */
349 	switch (eth_proto) {
350 	case ETH_P_IP:
351 		ip_proto = get_proto_ipv4(ctx, l3_offset);
352 		break;
353 	case ETH_P_IPV6:
354 		ip_proto = get_proto_ipv6(ctx, l3_offset);
355 		break;
356 	case ETH_P_ARP:
357 		cpu_idx = 0; /* ARP packet handled on separate CPU */
358 		break;
359 	default:
360 		cpu_idx = 0;
361 	}
362 
363 	/* Choose CPU based on L4 protocol */
364 	switch (ip_proto) {
365 	case IPPROTO_ICMP:
366 	case IPPROTO_ICMPV6:
367 		cpu_idx = 2;
368 		break;
369 	case IPPROTO_TCP:
370 		cpu_idx = 0;
371 		break;
372 	case IPPROTO_UDP:
373 		cpu_idx = 1;
374 		break;
375 	default:
376 		cpu_idx = 0;
377 	}
378 
379 	cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
380 	if (!cpu_lookup)
381 		return XDP_ABORTED;
382 	cpu_dest = *cpu_lookup;
383 
384 	if (cpu_dest >= MAX_CPUS) {
385 		rec->issue++;
386 		return XDP_ABORTED;
387 	}
388 
389 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
390 }
391 
392 SEC("xdp_cpu_map4_ddos_filter_pktgen")
xdp_prognum4_ddos_filter_pktgen(struct xdp_md * ctx)393 int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
394 {
395 	void *data_end = (void *)(long)ctx->data_end;
396 	void *data     = (void *)(long)ctx->data;
397 	struct ethhdr *eth = data;
398 	u8 ip_proto = IPPROTO_UDP;
399 	struct datarec *rec;
400 	u16 eth_proto = 0;
401 	u64 l3_offset = 0;
402 	u32 cpu_dest = 0;
403 	u32 cpu_idx = 0;
404 	u16 dest_port;
405 	u32 *cpu_lookup;
406 	u32 key = 0;
407 
408 	/* Count RX packet in map */
409 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
410 	if (!rec)
411 		return XDP_ABORTED;
412 	rec->processed++;
413 
414 	if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
415 		return XDP_PASS; /* Just skip */
416 
417 	/* Extract L4 protocol */
418 	switch (eth_proto) {
419 	case ETH_P_IP:
420 		ip_proto = get_proto_ipv4(ctx, l3_offset);
421 		break;
422 	case ETH_P_IPV6:
423 		ip_proto = get_proto_ipv6(ctx, l3_offset);
424 		break;
425 	case ETH_P_ARP:
426 		cpu_idx = 0; /* ARP packet handled on separate CPU */
427 		break;
428 	default:
429 		cpu_idx = 0;
430 	}
431 
432 	/* Choose CPU based on L4 protocol */
433 	switch (ip_proto) {
434 	case IPPROTO_ICMP:
435 	case IPPROTO_ICMPV6:
436 		cpu_idx = 2;
437 		break;
438 	case IPPROTO_TCP:
439 		cpu_idx = 0;
440 		break;
441 	case IPPROTO_UDP:
442 		cpu_idx = 1;
443 		/* DDoS filter UDP port 9 (pktgen) */
444 		dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
445 		if (dest_port == 9) {
446 			if (rec)
447 				rec->dropped++;
448 			return XDP_DROP;
449 		}
450 		break;
451 	default:
452 		cpu_idx = 0;
453 	}
454 
455 	cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
456 	if (!cpu_lookup)
457 		return XDP_ABORTED;
458 	cpu_dest = *cpu_lookup;
459 
460 	if (cpu_dest >= MAX_CPUS) {
461 		rec->issue++;
462 		return XDP_ABORTED;
463 	}
464 
465 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
466 }
467 
468 /* Hashing initval */
469 #define INITVAL 15485863
470 
471 static __always_inline
get_ipv4_hash_ip_pair(struct xdp_md * ctx,u64 nh_off)472 u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
473 {
474 	void *data_end = (void *)(long)ctx->data_end;
475 	void *data     = (void *)(long)ctx->data;
476 	struct iphdr *iph = data + nh_off;
477 	u32 cpu_hash;
478 
479 	if (iph + 1 > data_end)
480 		return 0;
481 
482 	cpu_hash = iph->saddr + iph->daddr;
483 	cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
484 
485 	return cpu_hash;
486 }
487 
488 static __always_inline
get_ipv6_hash_ip_pair(struct xdp_md * ctx,u64 nh_off)489 u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
490 {
491 	void *data_end = (void *)(long)ctx->data_end;
492 	void *data     = (void *)(long)ctx->data;
493 	struct ipv6hdr *ip6h = data + nh_off;
494 	u32 cpu_hash;
495 
496 	if (ip6h + 1 > data_end)
497 		return 0;
498 
499 	cpu_hash  = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
500 	cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
501 	cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
502 	cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
503 	cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
504 
505 	return cpu_hash;
506 }
507 
508 /* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
509  * hashing scheme is symmetric, meaning swapping IP src/dest still hit
510  * same CPU.
511  */
512 SEC("xdp_cpu_map5_lb_hash_ip_pairs")
xdp_prognum5_lb_hash_ip_pairs(struct xdp_md * ctx)513 int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
514 {
515 	void *data_end = (void *)(long)ctx->data_end;
516 	void *data     = (void *)(long)ctx->data;
517 	struct ethhdr *eth = data;
518 	u8 ip_proto = IPPROTO_UDP;
519 	struct datarec *rec;
520 	u16 eth_proto = 0;
521 	u64 l3_offset = 0;
522 	u32 cpu_dest = 0;
523 	u32 cpu_idx = 0;
524 	u32 *cpu_lookup;
525 	u32 *cpu_max;
526 	u32 cpu_hash;
527 	u32 key = 0;
528 
529 	/* Count RX packet in map */
530 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
531 	if (!rec)
532 		return XDP_ABORTED;
533 	rec->processed++;
534 
535 	cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
536 	if (!cpu_max)
537 		return XDP_ABORTED;
538 
539 	if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
540 		return XDP_PASS; /* Just skip */
541 
542 	/* Hash for IPv4 and IPv6 */
543 	switch (eth_proto) {
544 	case ETH_P_IP:
545 		cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
546 		break;
547 	case ETH_P_IPV6:
548 		cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
549 		break;
550 	case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
551 	default:
552 		cpu_hash = 0;
553 	}
554 
555 	/* Choose CPU based on hash */
556 	cpu_idx = cpu_hash % *cpu_max;
557 
558 	cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
559 	if (!cpu_lookup)
560 		return XDP_ABORTED;
561 	cpu_dest = *cpu_lookup;
562 
563 	if (cpu_dest >= MAX_CPUS) {
564 		rec->issue++;
565 		return XDP_ABORTED;
566 	}
567 
568 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
569 }
570 
571 char _license[] SEC("license") = "GPL";
572 
573 /*** Trace point code ***/
574 
575 /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
576  * Code in:                kernel/include/trace/events/xdp.h
577  */
578 struct xdp_redirect_ctx {
579 	u64 __pad;	// First 8 bytes are not accessible by bpf code
580 	int prog_id;	//	offset:8;  size:4; signed:1;
581 	u32 act;	//	offset:12  size:4; signed:0;
582 	int ifindex;	//	offset:16  size:4; signed:1;
583 	int err;	//	offset:20  size:4; signed:1;
584 	int to_ifindex;	//	offset:24  size:4; signed:1;
585 	u32 map_id;	//	offset:28  size:4; signed:0;
586 	int map_index;	//	offset:32  size:4; signed:1;
587 };			//	offset:36
588 
589 enum {
590 	XDP_REDIRECT_SUCCESS = 0,
591 	XDP_REDIRECT_ERROR = 1
592 };
593 
594 static __always_inline
xdp_redirect_collect_stat(struct xdp_redirect_ctx * ctx)595 int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
596 {
597 	u32 key = XDP_REDIRECT_ERROR;
598 	struct datarec *rec;
599 	int err = ctx->err;
600 
601 	if (!err)
602 		key = XDP_REDIRECT_SUCCESS;
603 
604 	rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
605 	if (!rec)
606 		return 0;
607 	rec->dropped += 1;
608 
609 	return 0; /* Indicate event was filtered (no further processing)*/
610 	/*
611 	 * Returning 1 here would allow e.g. a perf-record tracepoint
612 	 * to see and record these events, but it doesn't work well
613 	 * in-practice as stopping perf-record also unload this
614 	 * bpf_prog.  Plus, there is additional overhead of doing so.
615 	 */
616 }
617 
618 SEC("tracepoint/xdp/xdp_redirect_err")
trace_xdp_redirect_err(struct xdp_redirect_ctx * ctx)619 int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
620 {
621 	return xdp_redirect_collect_stat(ctx);
622 }
623 
624 SEC("tracepoint/xdp/xdp_redirect_map_err")
trace_xdp_redirect_map_err(struct xdp_redirect_ctx * ctx)625 int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
626 {
627 	return xdp_redirect_collect_stat(ctx);
628 }
629 
630 /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
631  * Code in:                kernel/include/trace/events/xdp.h
632  */
633 struct xdp_exception_ctx {
634 	u64 __pad;	// First 8 bytes are not accessible by bpf code
635 	int prog_id;	//	offset:8;  size:4; signed:1;
636 	u32 act;	//	offset:12; size:4; signed:0;
637 	int ifindex;	//	offset:16; size:4; signed:1;
638 };
639 
640 SEC("tracepoint/xdp/xdp_exception")
trace_xdp_exception(struct xdp_exception_ctx * ctx)641 int trace_xdp_exception(struct xdp_exception_ctx *ctx)
642 {
643 	struct datarec *rec;
644 	u32 key = 0;
645 
646 	rec = bpf_map_lookup_elem(&exception_cnt, &key);
647 	if (!rec)
648 		return 1;
649 	rec->dropped += 1;
650 
651 	return 0;
652 }
653 
654 /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
655  * Code in:         kernel/include/trace/events/xdp.h
656  */
657 struct cpumap_enqueue_ctx {
658 	u64 __pad;		// First 8 bytes are not accessible by bpf code
659 	int map_id;		//	offset:8;  size:4; signed:1;
660 	u32 act;		//	offset:12; size:4; signed:0;
661 	int cpu;		//	offset:16; size:4; signed:1;
662 	unsigned int drops;	//	offset:20; size:4; signed:0;
663 	unsigned int processed;	//	offset:24; size:4; signed:0;
664 	int to_cpu;		//	offset:28; size:4; signed:1;
665 };
666 
667 SEC("tracepoint/xdp/xdp_cpumap_enqueue")
trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx * ctx)668 int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
669 {
670 	u32 to_cpu = ctx->to_cpu;
671 	struct datarec *rec;
672 
673 	if (to_cpu >= MAX_CPUS)
674 		return 1;
675 
676 	rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
677 	if (!rec)
678 		return 0;
679 	rec->processed += ctx->processed;
680 	rec->dropped   += ctx->drops;
681 
682 	/* Record bulk events, then userspace can calc average bulk size */
683 	if (ctx->processed > 0)
684 		rec->issue += 1;
685 
686 	/* Inception: It's possible to detect overload situations, via
687 	 * this tracepoint.  This can be used for creating a feedback
688 	 * loop to XDP, which can take appropriate actions to mitigate
689 	 * this overload situation.
690 	 */
691 	return 0;
692 }
693 
694 /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
695  * Code in:         kernel/include/trace/events/xdp.h
696  */
697 struct cpumap_kthread_ctx {
698 	u64 __pad;			// First 8 bytes are not accessible
699 	int map_id;			//	offset:8;  size:4; signed:1;
700 	u32 act;			//	offset:12; size:4; signed:0;
701 	int cpu;			//	offset:16; size:4; signed:1;
702 	unsigned int drops;		//	offset:20; size:4; signed:0;
703 	unsigned int processed;		//	offset:24; size:4; signed:0;
704 	int sched;			//	offset:28; size:4; signed:1;
705 	unsigned int xdp_pass;		//	offset:32; size:4; signed:0;
706 	unsigned int xdp_drop;		//	offset:36; size:4; signed:0;
707 	unsigned int xdp_redirect;	//	offset:40; size:4; signed:0;
708 };
709 
710 SEC("tracepoint/xdp/xdp_cpumap_kthread")
trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx * ctx)711 int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
712 {
713 	struct datarec *rec;
714 	u32 key = 0;
715 
716 	rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
717 	if (!rec)
718 		return 0;
719 	rec->processed += ctx->processed;
720 	rec->dropped   += ctx->drops;
721 	rec->xdp_pass  += ctx->xdp_pass;
722 	rec->xdp_drop  += ctx->xdp_drop;
723 	rec->xdp_redirect  += ctx->xdp_redirect;
724 
725 	/* Count times kthread yielded CPU via schedule call */
726 	if (ctx->sched)
727 		rec->issue++;
728 
729 	return 0;
730 }
731