1 /* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
2 *
3 * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
4 */
5 #include <uapi/linux/if_ether.h>
6 #include <uapi/linux/if_packet.h>
7 #include <uapi/linux/if_vlan.h>
8 #include <uapi/linux/ip.h>
9 #include <uapi/linux/ipv6.h>
10 #include <uapi/linux/in.h>
11 #include <uapi/linux/tcp.h>
12 #include <uapi/linux/udp.h>
13
14 #include <uapi/linux/bpf.h>
15 #include "bpf_helpers.h"
16 #include "hash_func01.h"
17
18 #define MAX_CPUS 64 /* WARNING - sync with _user.c */
19
20 /* Special map type that can XDP_REDIRECT frames to another CPU */
21 struct {
22 __uint(type, BPF_MAP_TYPE_CPUMAP);
23 __uint(key_size, sizeof(u32));
24 __uint(value_size, sizeof(u32));
25 __uint(max_entries, MAX_CPUS);
26 } cpu_map SEC(".maps");
27
28 /* Common stats data record to keep userspace more simple */
29 struct datarec {
30 __u64 processed;
31 __u64 dropped;
32 __u64 issue;
33 };
34
35 /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
36 * feedback. Redirect TX errors can be caught via a tracepoint.
37 */
38 struct {
39 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
40 __type(key, u32);
41 __type(value, struct datarec);
42 __uint(max_entries, 1);
43 } rx_cnt SEC(".maps");
44
45 /* Used by trace point */
46 struct {
47 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
48 __type(key, u32);
49 __type(value, struct datarec);
50 __uint(max_entries, 2);
51 /* TODO: have entries for all possible errno's */
52 } redirect_err_cnt SEC(".maps");
53
54 /* Used by trace point */
55 struct {
56 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
57 __type(key, u32);
58 __type(value, struct datarec);
59 __uint(max_entries, MAX_CPUS);
60 } cpumap_enqueue_cnt SEC(".maps");
61
62 /* Used by trace point */
63 struct {
64 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
65 __type(key, u32);
66 __type(value, struct datarec);
67 __uint(max_entries, 1);
68 } cpumap_kthread_cnt SEC(".maps");
69
70 /* Set of maps controlling available CPU, and for iterating through
71 * selectable redirect CPUs.
72 */
73 struct {
74 __uint(type, BPF_MAP_TYPE_ARRAY);
75 __type(key, u32);
76 __type(value, u32);
77 __uint(max_entries, MAX_CPUS);
78 } cpus_available SEC(".maps");
79 struct {
80 __uint(type, BPF_MAP_TYPE_ARRAY);
81 __type(key, u32);
82 __type(value, u32);
83 __uint(max_entries, 1);
84 } cpus_count SEC(".maps");
85 struct {
86 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
87 __type(key, u32);
88 __type(value, u32);
89 __uint(max_entries, 1);
90 } cpus_iterator SEC(".maps");
91
92 /* Used by trace point */
93 struct {
94 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
95 __type(key, u32);
96 __type(value, struct datarec);
97 __uint(max_entries, 1);
98 } exception_cnt SEC(".maps");
99
100 /* Helper parse functions */
101
102 /* Parse Ethernet layer 2, extract network layer 3 offset and protocol
103 *
104 * Returns false on error and non-supported ether-type
105 */
106 struct vlan_hdr {
107 __be16 h_vlan_TCI;
108 __be16 h_vlan_encapsulated_proto;
109 };
110
111 static __always_inline
parse_eth(struct ethhdr * eth,void * data_end,u16 * eth_proto,u64 * l3_offset)112 bool parse_eth(struct ethhdr *eth, void *data_end,
113 u16 *eth_proto, u64 *l3_offset)
114 {
115 u16 eth_type;
116 u64 offset;
117
118 offset = sizeof(*eth);
119 if ((void *)eth + offset > data_end)
120 return false;
121
122 eth_type = eth->h_proto;
123
124 /* Skip non 802.3 Ethertypes */
125 if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
126 return false;
127
128 /* Handle VLAN tagged packet */
129 if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
130 struct vlan_hdr *vlan_hdr;
131
132 vlan_hdr = (void *)eth + offset;
133 offset += sizeof(*vlan_hdr);
134 if ((void *)eth + offset > data_end)
135 return false;
136 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
137 }
138 /* Handle double VLAN tagged packet */
139 if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
140 struct vlan_hdr *vlan_hdr;
141
142 vlan_hdr = (void *)eth + offset;
143 offset += sizeof(*vlan_hdr);
144 if ((void *)eth + offset > data_end)
145 return false;
146 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
147 }
148
149 *eth_proto = ntohs(eth_type);
150 *l3_offset = offset;
151 return true;
152 }
153
154 static __always_inline
get_dest_port_ipv4_udp(struct xdp_md * ctx,u64 nh_off)155 u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
156 {
157 void *data_end = (void *)(long)ctx->data_end;
158 void *data = (void *)(long)ctx->data;
159 struct iphdr *iph = data + nh_off;
160 struct udphdr *udph;
161 u16 dport;
162
163 if (iph + 1 > data_end)
164 return 0;
165 if (!(iph->protocol == IPPROTO_UDP))
166 return 0;
167
168 udph = (void *)(iph + 1);
169 if (udph + 1 > data_end)
170 return 0;
171
172 dport = ntohs(udph->dest);
173 return dport;
174 }
175
176 static __always_inline
get_proto_ipv4(struct xdp_md * ctx,u64 nh_off)177 int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
178 {
179 void *data_end = (void *)(long)ctx->data_end;
180 void *data = (void *)(long)ctx->data;
181 struct iphdr *iph = data + nh_off;
182
183 if (iph + 1 > data_end)
184 return 0;
185 return iph->protocol;
186 }
187
188 static __always_inline
get_proto_ipv6(struct xdp_md * ctx,u64 nh_off)189 int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
190 {
191 void *data_end = (void *)(long)ctx->data_end;
192 void *data = (void *)(long)ctx->data;
193 struct ipv6hdr *ip6h = data + nh_off;
194
195 if (ip6h + 1 > data_end)
196 return 0;
197 return ip6h->nexthdr;
198 }
199
200 SEC("xdp_cpu_map0")
xdp_prognum0_no_touch(struct xdp_md * ctx)201 int xdp_prognum0_no_touch(struct xdp_md *ctx)
202 {
203 void *data_end = (void *)(long)ctx->data_end;
204 void *data = (void *)(long)ctx->data;
205 struct datarec *rec;
206 u32 *cpu_selected;
207 u32 cpu_dest;
208 u32 key = 0;
209
210 /* Only use first entry in cpus_available */
211 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
212 if (!cpu_selected)
213 return XDP_ABORTED;
214 cpu_dest = *cpu_selected;
215
216 /* Count RX packet in map */
217 rec = bpf_map_lookup_elem(&rx_cnt, &key);
218 if (!rec)
219 return XDP_ABORTED;
220 rec->processed++;
221
222 if (cpu_dest >= MAX_CPUS) {
223 rec->issue++;
224 return XDP_ABORTED;
225 }
226
227 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
228 }
229
230 SEC("xdp_cpu_map1_touch_data")
xdp_prognum1_touch_data(struct xdp_md * ctx)231 int xdp_prognum1_touch_data(struct xdp_md *ctx)
232 {
233 void *data_end = (void *)(long)ctx->data_end;
234 void *data = (void *)(long)ctx->data;
235 struct ethhdr *eth = data;
236 struct datarec *rec;
237 u32 *cpu_selected;
238 u32 cpu_dest;
239 u16 eth_type;
240 u32 key = 0;
241
242 /* Only use first entry in cpus_available */
243 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
244 if (!cpu_selected)
245 return XDP_ABORTED;
246 cpu_dest = *cpu_selected;
247
248 /* Validate packet length is minimum Eth header size */
249 if (eth + 1 > data_end)
250 return XDP_ABORTED;
251
252 /* Count RX packet in map */
253 rec = bpf_map_lookup_elem(&rx_cnt, &key);
254 if (!rec)
255 return XDP_ABORTED;
256 rec->processed++;
257
258 /* Read packet data, and use it (drop non 802.3 Ethertypes) */
259 eth_type = eth->h_proto;
260 if (ntohs(eth_type) < ETH_P_802_3_MIN) {
261 rec->dropped++;
262 return XDP_DROP;
263 }
264
265 if (cpu_dest >= MAX_CPUS) {
266 rec->issue++;
267 return XDP_ABORTED;
268 }
269
270 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
271 }
272
273 SEC("xdp_cpu_map2_round_robin")
xdp_prognum2_round_robin(struct xdp_md * ctx)274 int xdp_prognum2_round_robin(struct xdp_md *ctx)
275 {
276 void *data_end = (void *)(long)ctx->data_end;
277 void *data = (void *)(long)ctx->data;
278 struct ethhdr *eth = data;
279 struct datarec *rec;
280 u32 cpu_dest;
281 u32 *cpu_lookup;
282 u32 key0 = 0;
283
284 u32 *cpu_selected;
285 u32 *cpu_iterator;
286 u32 *cpu_max;
287 u32 cpu_idx;
288
289 cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
290 if (!cpu_max)
291 return XDP_ABORTED;
292
293 cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
294 if (!cpu_iterator)
295 return XDP_ABORTED;
296 cpu_idx = *cpu_iterator;
297
298 *cpu_iterator += 1;
299 if (*cpu_iterator == *cpu_max)
300 *cpu_iterator = 0;
301
302 cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
303 if (!cpu_selected)
304 return XDP_ABORTED;
305 cpu_dest = *cpu_selected;
306
307 /* Count RX packet in map */
308 rec = bpf_map_lookup_elem(&rx_cnt, &key0);
309 if (!rec)
310 return XDP_ABORTED;
311 rec->processed++;
312
313 if (cpu_dest >= MAX_CPUS) {
314 rec->issue++;
315 return XDP_ABORTED;
316 }
317
318 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
319 }
320
321 SEC("xdp_cpu_map3_proto_separate")
xdp_prognum3_proto_separate(struct xdp_md * ctx)322 int xdp_prognum3_proto_separate(struct xdp_md *ctx)
323 {
324 void *data_end = (void *)(long)ctx->data_end;
325 void *data = (void *)(long)ctx->data;
326 struct ethhdr *eth = data;
327 u8 ip_proto = IPPROTO_UDP;
328 struct datarec *rec;
329 u16 eth_proto = 0;
330 u64 l3_offset = 0;
331 u32 cpu_dest = 0;
332 u32 cpu_idx = 0;
333 u32 *cpu_lookup;
334 u32 key = 0;
335
336 /* Count RX packet in map */
337 rec = bpf_map_lookup_elem(&rx_cnt, &key);
338 if (!rec)
339 return XDP_ABORTED;
340 rec->processed++;
341
342 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset)))
343 return XDP_PASS; /* Just skip */
344
345 /* Extract L4 protocol */
346 switch (eth_proto) {
347 case ETH_P_IP:
348 ip_proto = get_proto_ipv4(ctx, l3_offset);
349 break;
350 case ETH_P_IPV6:
351 ip_proto = get_proto_ipv6(ctx, l3_offset);
352 break;
353 case ETH_P_ARP:
354 cpu_idx = 0; /* ARP packet handled on separate CPU */
355 break;
356 default:
357 cpu_idx = 0;
358 }
359
360 /* Choose CPU based on L4 protocol */
361 switch (ip_proto) {
362 case IPPROTO_ICMP:
363 case IPPROTO_ICMPV6:
364 cpu_idx = 2;
365 break;
366 case IPPROTO_TCP:
367 cpu_idx = 0;
368 break;
369 case IPPROTO_UDP:
370 cpu_idx = 1;
371 break;
372 default:
373 cpu_idx = 0;
374 }
375
376 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
377 if (!cpu_lookup)
378 return XDP_ABORTED;
379 cpu_dest = *cpu_lookup;
380
381 if (cpu_dest >= MAX_CPUS) {
382 rec->issue++;
383 return XDP_ABORTED;
384 }
385
386 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
387 }
388
389 SEC("xdp_cpu_map4_ddos_filter_pktgen")
xdp_prognum4_ddos_filter_pktgen(struct xdp_md * ctx)390 int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
391 {
392 void *data_end = (void *)(long)ctx->data_end;
393 void *data = (void *)(long)ctx->data;
394 struct ethhdr *eth = data;
395 u8 ip_proto = IPPROTO_UDP;
396 struct datarec *rec;
397 u16 eth_proto = 0;
398 u64 l3_offset = 0;
399 u32 cpu_dest = 0;
400 u32 cpu_idx = 0;
401 u16 dest_port;
402 u32 *cpu_lookup;
403 u32 key = 0;
404
405 /* Count RX packet in map */
406 rec = bpf_map_lookup_elem(&rx_cnt, &key);
407 if (!rec)
408 return XDP_ABORTED;
409 rec->processed++;
410
411 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset)))
412 return XDP_PASS; /* Just skip */
413
414 /* Extract L4 protocol */
415 switch (eth_proto) {
416 case ETH_P_IP:
417 ip_proto = get_proto_ipv4(ctx, l3_offset);
418 break;
419 case ETH_P_IPV6:
420 ip_proto = get_proto_ipv6(ctx, l3_offset);
421 break;
422 case ETH_P_ARP:
423 cpu_idx = 0; /* ARP packet handled on separate CPU */
424 break;
425 default:
426 cpu_idx = 0;
427 }
428
429 /* Choose CPU based on L4 protocol */
430 switch (ip_proto) {
431 case IPPROTO_ICMP:
432 case IPPROTO_ICMPV6:
433 cpu_idx = 2;
434 break;
435 case IPPROTO_TCP:
436 cpu_idx = 0;
437 break;
438 case IPPROTO_UDP:
439 cpu_idx = 1;
440 /* DDoS filter UDP port 9 (pktgen) */
441 dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
442 if (dest_port == 9) {
443 if (rec)
444 rec->dropped++;
445 return XDP_DROP;
446 }
447 break;
448 default:
449 cpu_idx = 0;
450 }
451
452 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
453 if (!cpu_lookup)
454 return XDP_ABORTED;
455 cpu_dest = *cpu_lookup;
456
457 if (cpu_dest >= MAX_CPUS) {
458 rec->issue++;
459 return XDP_ABORTED;
460 }
461
462 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
463 }
464
465 /* Hashing initval */
466 #define INITVAL 15485863
467
468 static __always_inline
get_ipv4_hash_ip_pair(struct xdp_md * ctx,u64 nh_off)469 u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
470 {
471 void *data_end = (void *)(long)ctx->data_end;
472 void *data = (void *)(long)ctx->data;
473 struct iphdr *iph = data + nh_off;
474 u32 cpu_hash;
475
476 if (iph + 1 > data_end)
477 return 0;
478
479 cpu_hash = iph->saddr + iph->daddr;
480 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
481
482 return cpu_hash;
483 }
484
485 static __always_inline
get_ipv6_hash_ip_pair(struct xdp_md * ctx,u64 nh_off)486 u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
487 {
488 void *data_end = (void *)(long)ctx->data_end;
489 void *data = (void *)(long)ctx->data;
490 struct ipv6hdr *ip6h = data + nh_off;
491 u32 cpu_hash;
492
493 if (ip6h + 1 > data_end)
494 return 0;
495
496 cpu_hash = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
497 cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
498 cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
499 cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
500 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
501
502 return cpu_hash;
503 }
504
505 /* Load-Balance traffic based on hashing IP-addrs + L4-proto. The
506 * hashing scheme is symmetric, meaning swapping IP src/dest still hit
507 * same CPU.
508 */
509 SEC("xdp_cpu_map5_lb_hash_ip_pairs")
xdp_prognum5_lb_hash_ip_pairs(struct xdp_md * ctx)510 int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
511 {
512 void *data_end = (void *)(long)ctx->data_end;
513 void *data = (void *)(long)ctx->data;
514 struct ethhdr *eth = data;
515 u8 ip_proto = IPPROTO_UDP;
516 struct datarec *rec;
517 u16 eth_proto = 0;
518 u64 l3_offset = 0;
519 u32 cpu_dest = 0;
520 u32 cpu_idx = 0;
521 u32 *cpu_lookup;
522 u32 *cpu_max;
523 u32 cpu_hash;
524 u32 key = 0;
525
526 /* Count RX packet in map */
527 rec = bpf_map_lookup_elem(&rx_cnt, &key);
528 if (!rec)
529 return XDP_ABORTED;
530 rec->processed++;
531
532 cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
533 if (!cpu_max)
534 return XDP_ABORTED;
535
536 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset)))
537 return XDP_PASS; /* Just skip */
538
539 /* Hash for IPv4 and IPv6 */
540 switch (eth_proto) {
541 case ETH_P_IP:
542 cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
543 break;
544 case ETH_P_IPV6:
545 cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
546 break;
547 case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
548 default:
549 cpu_hash = 0;
550 }
551
552 /* Choose CPU based on hash */
553 cpu_idx = cpu_hash % *cpu_max;
554
555 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
556 if (!cpu_lookup)
557 return XDP_ABORTED;
558 cpu_dest = *cpu_lookup;
559
560 if (cpu_dest >= MAX_CPUS) {
561 rec->issue++;
562 return XDP_ABORTED;
563 }
564
565 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
566 }
567
568 char _license[] SEC("license") = "GPL";
569
570 /*** Trace point code ***/
571
572 /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
573 * Code in: kernel/include/trace/events/xdp.h
574 */
575 struct xdp_redirect_ctx {
576 u64 __pad; // First 8 bytes are not accessible by bpf code
577 int prog_id; // offset:8; size:4; signed:1;
578 u32 act; // offset:12 size:4; signed:0;
579 int ifindex; // offset:16 size:4; signed:1;
580 int err; // offset:20 size:4; signed:1;
581 int to_ifindex; // offset:24 size:4; signed:1;
582 u32 map_id; // offset:28 size:4; signed:0;
583 int map_index; // offset:32 size:4; signed:1;
584 }; // offset:36
585
586 enum {
587 XDP_REDIRECT_SUCCESS = 0,
588 XDP_REDIRECT_ERROR = 1
589 };
590
591 static __always_inline
xdp_redirect_collect_stat(struct xdp_redirect_ctx * ctx)592 int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
593 {
594 u32 key = XDP_REDIRECT_ERROR;
595 struct datarec *rec;
596 int err = ctx->err;
597
598 if (!err)
599 key = XDP_REDIRECT_SUCCESS;
600
601 rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
602 if (!rec)
603 return 0;
604 rec->dropped += 1;
605
606 return 0; /* Indicate event was filtered (no further processing)*/
607 /*
608 * Returning 1 here would allow e.g. a perf-record tracepoint
609 * to see and record these events, but it doesn't work well
610 * in-practice as stopping perf-record also unload this
611 * bpf_prog. Plus, there is additional overhead of doing so.
612 */
613 }
614
615 SEC("tracepoint/xdp/xdp_redirect_err")
trace_xdp_redirect_err(struct xdp_redirect_ctx * ctx)616 int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
617 {
618 return xdp_redirect_collect_stat(ctx);
619 }
620
621 SEC("tracepoint/xdp/xdp_redirect_map_err")
trace_xdp_redirect_map_err(struct xdp_redirect_ctx * ctx)622 int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
623 {
624 return xdp_redirect_collect_stat(ctx);
625 }
626
627 /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
628 * Code in: kernel/include/trace/events/xdp.h
629 */
630 struct xdp_exception_ctx {
631 u64 __pad; // First 8 bytes are not accessible by bpf code
632 int prog_id; // offset:8; size:4; signed:1;
633 u32 act; // offset:12; size:4; signed:0;
634 int ifindex; // offset:16; size:4; signed:1;
635 };
636
637 SEC("tracepoint/xdp/xdp_exception")
trace_xdp_exception(struct xdp_exception_ctx * ctx)638 int trace_xdp_exception(struct xdp_exception_ctx *ctx)
639 {
640 struct datarec *rec;
641 u32 key = 0;
642
643 rec = bpf_map_lookup_elem(&exception_cnt, &key);
644 if (!rec)
645 return 1;
646 rec->dropped += 1;
647
648 return 0;
649 }
650
651 /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
652 * Code in: kernel/include/trace/events/xdp.h
653 */
654 struct cpumap_enqueue_ctx {
655 u64 __pad; // First 8 bytes are not accessible by bpf code
656 int map_id; // offset:8; size:4; signed:1;
657 u32 act; // offset:12; size:4; signed:0;
658 int cpu; // offset:16; size:4; signed:1;
659 unsigned int drops; // offset:20; size:4; signed:0;
660 unsigned int processed; // offset:24; size:4; signed:0;
661 int to_cpu; // offset:28; size:4; signed:1;
662 };
663
664 SEC("tracepoint/xdp/xdp_cpumap_enqueue")
trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx * ctx)665 int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
666 {
667 u32 to_cpu = ctx->to_cpu;
668 struct datarec *rec;
669
670 if (to_cpu >= MAX_CPUS)
671 return 1;
672
673 rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
674 if (!rec)
675 return 0;
676 rec->processed += ctx->processed;
677 rec->dropped += ctx->drops;
678
679 /* Record bulk events, then userspace can calc average bulk size */
680 if (ctx->processed > 0)
681 rec->issue += 1;
682
683 /* Inception: It's possible to detect overload situations, via
684 * this tracepoint. This can be used for creating a feedback
685 * loop to XDP, which can take appropriate actions to mitigate
686 * this overload situation.
687 */
688 return 0;
689 }
690
691 /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
692 * Code in: kernel/include/trace/events/xdp.h
693 */
694 struct cpumap_kthread_ctx {
695 u64 __pad; // First 8 bytes are not accessible by bpf code
696 int map_id; // offset:8; size:4; signed:1;
697 u32 act; // offset:12; size:4; signed:0;
698 int cpu; // offset:16; size:4; signed:1;
699 unsigned int drops; // offset:20; size:4; signed:0;
700 unsigned int processed; // offset:24; size:4; signed:0;
701 int sched; // offset:28; size:4; signed:1;
702 };
703
704 SEC("tracepoint/xdp/xdp_cpumap_kthread")
trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx * ctx)705 int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
706 {
707 struct datarec *rec;
708 u32 key = 0;
709
710 rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
711 if (!rec)
712 return 0;
713 rec->processed += ctx->processed;
714 rec->dropped += ctx->drops;
715
716 /* Count times kthread yielded CPU via schedule call */
717 if (ctx->sched)
718 rec->issue++;
719
720 return 0;
721 }
722