• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*  XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
2  *
3  *  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
4  */
5 #include "vmlinux.h"
6 #include "xdp_sample.bpf.h"
7 #include "xdp_sample_shared.h"
8 #include "hash_func01.h"
9 
10 /* Special map type that can XDP_REDIRECT frames to another CPU */
11 struct {
12 	__uint(type, BPF_MAP_TYPE_CPUMAP);
13 	__uint(key_size, sizeof(u32));
14 	__uint(value_size, sizeof(struct bpf_cpumap_val));
15 } cpu_map SEC(".maps");
16 
17 /* Set of maps controlling available CPU, and for iterating through
18  * selectable redirect CPUs.
19  */
20 struct {
21 	__uint(type, BPF_MAP_TYPE_ARRAY);
22 	__type(key, u32);
23 	__type(value, u32);
24 } cpus_available SEC(".maps");
25 
26 struct {
27 	__uint(type, BPF_MAP_TYPE_ARRAY);
28 	__type(key, u32);
29 	__type(value, u32);
30 	__uint(max_entries, 1);
31 } cpus_count SEC(".maps");
32 
33 struct {
34 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
35 	__type(key, u32);
36 	__type(value, u32);
37 	__uint(max_entries, 1);
38 } cpus_iterator SEC(".maps");
39 
40 struct {
41 	__uint(type, BPF_MAP_TYPE_DEVMAP);
42 	__uint(key_size, sizeof(int));
43 	__uint(value_size, sizeof(struct bpf_devmap_val));
44 	__uint(max_entries, 1);
45 } tx_port SEC(".maps");
46 
47 char tx_mac_addr[ETH_ALEN];
48 
49 /* Helper parse functions */
50 
51 static __always_inline
parse_eth(struct ethhdr * eth,void * data_end,u16 * eth_proto,u64 * l3_offset)52 bool parse_eth(struct ethhdr *eth, void *data_end,
53 	       u16 *eth_proto, u64 *l3_offset)
54 {
55 	u16 eth_type;
56 	u64 offset;
57 
58 	offset = sizeof(*eth);
59 	if ((void *)eth + offset > data_end)
60 		return false;
61 
62 	eth_type = eth->h_proto;
63 
64 	/* Skip non 802.3 Ethertypes */
65 	if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
66 		return false;
67 
68 	/* Handle VLAN tagged packet */
69 	if (eth_type == bpf_htons(ETH_P_8021Q) ||
70 	    eth_type == bpf_htons(ETH_P_8021AD)) {
71 		struct vlan_hdr *vlan_hdr;
72 
73 		vlan_hdr = (void *)eth + offset;
74 		offset += sizeof(*vlan_hdr);
75 		if ((void *)eth + offset > data_end)
76 			return false;
77 		eth_type = vlan_hdr->h_vlan_encapsulated_proto;
78 	}
79 	/* Handle double VLAN tagged packet */
80 	if (eth_type == bpf_htons(ETH_P_8021Q) ||
81 	    eth_type == bpf_htons(ETH_P_8021AD)) {
82 		struct vlan_hdr *vlan_hdr;
83 
84 		vlan_hdr = (void *)eth + offset;
85 		offset += sizeof(*vlan_hdr);
86 		if ((void *)eth + offset > data_end)
87 			return false;
88 		eth_type = vlan_hdr->h_vlan_encapsulated_proto;
89 	}
90 
91 	*eth_proto = bpf_ntohs(eth_type);
92 	*l3_offset = offset;
93 	return true;
94 }
95 
96 static __always_inline
get_dest_port_ipv4_udp(struct xdp_md * ctx,u64 nh_off)97 u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
98 {
99 	void *data_end = (void *)(long)ctx->data_end;
100 	void *data     = (void *)(long)ctx->data;
101 	struct iphdr *iph = data + nh_off;
102 	struct udphdr *udph;
103 
104 	if (iph + 1 > data_end)
105 		return 0;
106 	if (!(iph->protocol == IPPROTO_UDP))
107 		return 0;
108 
109 	udph = (void *)(iph + 1);
110 	if (udph + 1 > data_end)
111 		return 0;
112 
113 	return bpf_ntohs(udph->dest);
114 }
115 
116 static __always_inline
get_proto_ipv4(struct xdp_md * ctx,u64 nh_off)117 int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
118 {
119 	void *data_end = (void *)(long)ctx->data_end;
120 	void *data     = (void *)(long)ctx->data;
121 	struct iphdr *iph = data + nh_off;
122 
123 	if (iph + 1 > data_end)
124 		return 0;
125 	return iph->protocol;
126 }
127 
128 static __always_inline
get_proto_ipv6(struct xdp_md * ctx,u64 nh_off)129 int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
130 {
131 	void *data_end = (void *)(long)ctx->data_end;
132 	void *data     = (void *)(long)ctx->data;
133 	struct ipv6hdr *ip6h = data + nh_off;
134 
135 	if (ip6h + 1 > data_end)
136 		return 0;
137 	return ip6h->nexthdr;
138 }
139 
140 SEC("xdp")
xdp_prognum0_no_touch(struct xdp_md * ctx)141 int  xdp_prognum0_no_touch(struct xdp_md *ctx)
142 {
143 	u32 key = bpf_get_smp_processor_id();
144 	struct datarec *rec;
145 	u32 *cpu_selected;
146 	u32 cpu_dest = 0;
147 	u32 key0 = 0;
148 
149 	/* Only use first entry in cpus_available */
150 	cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
151 	if (!cpu_selected)
152 		return XDP_ABORTED;
153 	cpu_dest = *cpu_selected;
154 
155 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
156 	if (!rec)
157 		return XDP_PASS;
158 	NO_TEAR_INC(rec->processed);
159 
160 	if (cpu_dest >= nr_cpus) {
161 		NO_TEAR_INC(rec->issue);
162 		return XDP_ABORTED;
163 	}
164 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
165 }
166 
167 SEC("xdp")
xdp_prognum1_touch_data(struct xdp_md * ctx)168 int  xdp_prognum1_touch_data(struct xdp_md *ctx)
169 {
170 	void *data_end = (void *)(long)ctx->data_end;
171 	void *data     = (void *)(long)ctx->data;
172 	u32 key = bpf_get_smp_processor_id();
173 	struct ethhdr *eth = data;
174 	struct datarec *rec;
175 	u32 *cpu_selected;
176 	u32 cpu_dest = 0;
177 	u32 key0 = 0;
178 	u16 eth_type;
179 
180 	/* Only use first entry in cpus_available */
181 	cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
182 	if (!cpu_selected)
183 		return XDP_ABORTED;
184 	cpu_dest = *cpu_selected;
185 
186 	/* Validate packet length is minimum Eth header size */
187 	if (eth + 1 > data_end)
188 		return XDP_ABORTED;
189 
190 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
191 	if (!rec)
192 		return XDP_PASS;
193 	NO_TEAR_INC(rec->processed);
194 
195 	/* Read packet data, and use it (drop non 802.3 Ethertypes) */
196 	eth_type = eth->h_proto;
197 	if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
198 		NO_TEAR_INC(rec->dropped);
199 		return XDP_DROP;
200 	}
201 
202 	if (cpu_dest >= nr_cpus) {
203 		NO_TEAR_INC(rec->issue);
204 		return XDP_ABORTED;
205 	}
206 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
207 }
208 
209 SEC("xdp")
xdp_prognum2_round_robin(struct xdp_md * ctx)210 int  xdp_prognum2_round_robin(struct xdp_md *ctx)
211 {
212 	void *data_end = (void *)(long)ctx->data_end;
213 	void *data     = (void *)(long)ctx->data;
214 	u32 key = bpf_get_smp_processor_id();
215 	struct datarec *rec;
216 	u32 cpu_dest = 0;
217 	u32 key0 = 0;
218 
219 	u32 *cpu_selected;
220 	u32 *cpu_iterator;
221 	u32 *cpu_max;
222 	u32 cpu_idx;
223 
224 	cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
225 	if (!cpu_max)
226 		return XDP_ABORTED;
227 
228 	cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
229 	if (!cpu_iterator)
230 		return XDP_ABORTED;
231 	cpu_idx = *cpu_iterator;
232 
233 	*cpu_iterator += 1;
234 	if (*cpu_iterator == *cpu_max)
235 		*cpu_iterator = 0;
236 
237 	cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
238 	if (!cpu_selected)
239 		return XDP_ABORTED;
240 	cpu_dest = *cpu_selected;
241 
242 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
243 	if (!rec)
244 		return XDP_PASS;
245 	NO_TEAR_INC(rec->processed);
246 
247 	if (cpu_dest >= nr_cpus) {
248 		NO_TEAR_INC(rec->issue);
249 		return XDP_ABORTED;
250 	}
251 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
252 }
253 
254 SEC("xdp")
xdp_prognum3_proto_separate(struct xdp_md * ctx)255 int  xdp_prognum3_proto_separate(struct xdp_md *ctx)
256 {
257 	void *data_end = (void *)(long)ctx->data_end;
258 	void *data     = (void *)(long)ctx->data;
259 	u32 key = bpf_get_smp_processor_id();
260 	struct ethhdr *eth = data;
261 	u8 ip_proto = IPPROTO_UDP;
262 	struct datarec *rec;
263 	u16 eth_proto = 0;
264 	u64 l3_offset = 0;
265 	u32 cpu_dest = 0;
266 	u32 *cpu_lookup;
267 	u32 cpu_idx = 0;
268 
269 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
270 	if (!rec)
271 		return XDP_PASS;
272 	NO_TEAR_INC(rec->processed);
273 
274 	if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
275 		return XDP_PASS; /* Just skip */
276 
277 	/* Extract L4 protocol */
278 	switch (eth_proto) {
279 	case ETH_P_IP:
280 		ip_proto = get_proto_ipv4(ctx, l3_offset);
281 		break;
282 	case ETH_P_IPV6:
283 		ip_proto = get_proto_ipv6(ctx, l3_offset);
284 		break;
285 	case ETH_P_ARP:
286 		cpu_idx = 0; /* ARP packet handled on separate CPU */
287 		break;
288 	default:
289 		cpu_idx = 0;
290 	}
291 
292 	/* Choose CPU based on L4 protocol */
293 	switch (ip_proto) {
294 	case IPPROTO_ICMP:
295 	case IPPROTO_ICMPV6:
296 		cpu_idx = 2;
297 		break;
298 	case IPPROTO_TCP:
299 		cpu_idx = 0;
300 		break;
301 	case IPPROTO_UDP:
302 		cpu_idx = 1;
303 		break;
304 	default:
305 		cpu_idx = 0;
306 	}
307 
308 	cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
309 	if (!cpu_lookup)
310 		return XDP_ABORTED;
311 	cpu_dest = *cpu_lookup;
312 
313 	if (cpu_dest >= nr_cpus) {
314 		NO_TEAR_INC(rec->issue);
315 		return XDP_ABORTED;
316 	}
317 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
318 }
319 
320 SEC("xdp")
xdp_prognum4_ddos_filter_pktgen(struct xdp_md * ctx)321 int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
322 {
323 	void *data_end = (void *)(long)ctx->data_end;
324 	void *data     = (void *)(long)ctx->data;
325 	u32 key = bpf_get_smp_processor_id();
326 	struct ethhdr *eth = data;
327 	u8 ip_proto = IPPROTO_UDP;
328 	struct datarec *rec;
329 	u16 eth_proto = 0;
330 	u64 l3_offset = 0;
331 	u32 cpu_dest = 0;
332 	u32 *cpu_lookup;
333 	u32 cpu_idx = 0;
334 	u16 dest_port;
335 
336 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
337 	if (!rec)
338 		return XDP_PASS;
339 	NO_TEAR_INC(rec->processed);
340 
341 	if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
342 		return XDP_PASS; /* Just skip */
343 
344 	/* Extract L4 protocol */
345 	switch (eth_proto) {
346 	case ETH_P_IP:
347 		ip_proto = get_proto_ipv4(ctx, l3_offset);
348 		break;
349 	case ETH_P_IPV6:
350 		ip_proto = get_proto_ipv6(ctx, l3_offset);
351 		break;
352 	case ETH_P_ARP:
353 		cpu_idx = 0; /* ARP packet handled on separate CPU */
354 		break;
355 	default:
356 		cpu_idx = 0;
357 	}
358 
359 	/* Choose CPU based on L4 protocol */
360 	switch (ip_proto) {
361 	case IPPROTO_ICMP:
362 	case IPPROTO_ICMPV6:
363 		cpu_idx = 2;
364 		break;
365 	case IPPROTO_TCP:
366 		cpu_idx = 0;
367 		break;
368 	case IPPROTO_UDP:
369 		cpu_idx = 1;
370 		/* DDoS filter UDP port 9 (pktgen) */
371 		dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
372 		if (dest_port == 9) {
373 			NO_TEAR_INC(rec->dropped);
374 			return XDP_DROP;
375 		}
376 		break;
377 	default:
378 		cpu_idx = 0;
379 	}
380 
381 	cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
382 	if (!cpu_lookup)
383 		return XDP_ABORTED;
384 	cpu_dest = *cpu_lookup;
385 
386 	if (cpu_dest >= nr_cpus) {
387 		NO_TEAR_INC(rec->issue);
388 		return XDP_ABORTED;
389 	}
390 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
391 }
392 
393 /* Hashing initval */
394 #define INITVAL 15485863
395 
396 static __always_inline
get_ipv4_hash_ip_pair(struct xdp_md * ctx,u64 nh_off)397 u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
398 {
399 	void *data_end = (void *)(long)ctx->data_end;
400 	void *data     = (void *)(long)ctx->data;
401 	struct iphdr *iph = data + nh_off;
402 	u32 cpu_hash;
403 
404 	if (iph + 1 > data_end)
405 		return 0;
406 
407 	cpu_hash = iph->saddr + iph->daddr;
408 	cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
409 
410 	return cpu_hash;
411 }
412 
413 static __always_inline
get_ipv6_hash_ip_pair(struct xdp_md * ctx,u64 nh_off)414 u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
415 {
416 	void *data_end = (void *)(long)ctx->data_end;
417 	void *data     = (void *)(long)ctx->data;
418 	struct ipv6hdr *ip6h = data + nh_off;
419 	u32 cpu_hash;
420 
421 	if (ip6h + 1 > data_end)
422 		return 0;
423 
424 	cpu_hash  = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
425 	cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
426 	cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
427 	cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
428 	cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
429 
430 	return cpu_hash;
431 }
432 
433 /* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
434  * hashing scheme is symmetric, meaning swapping IP src/dest still hit
435  * same CPU.
436  */
437 SEC("xdp")
xdp_prognum5_lb_hash_ip_pairs(struct xdp_md * ctx)438 int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
439 {
440 	void *data_end = (void *)(long)ctx->data_end;
441 	void *data     = (void *)(long)ctx->data;
442 	u32 key = bpf_get_smp_processor_id();
443 	struct ethhdr *eth = data;
444 	struct datarec *rec;
445 	u16 eth_proto = 0;
446 	u64 l3_offset = 0;
447 	u32 cpu_dest = 0;
448 	u32 cpu_idx = 0;
449 	u32 *cpu_lookup;
450 	u32 key0 = 0;
451 	u32 *cpu_max;
452 	u32 cpu_hash;
453 
454 	rec = bpf_map_lookup_elem(&rx_cnt, &key);
455 	if (!rec)
456 		return XDP_PASS;
457 	NO_TEAR_INC(rec->processed);
458 
459 	cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
460 	if (!cpu_max)
461 		return XDP_ABORTED;
462 
463 	if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
464 		return XDP_PASS; /* Just skip */
465 
466 	/* Hash for IPv4 and IPv6 */
467 	switch (eth_proto) {
468 	case ETH_P_IP:
469 		cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
470 		break;
471 	case ETH_P_IPV6:
472 		cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
473 		break;
474 	case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
475 	default:
476 		cpu_hash = 0;
477 	}
478 
479 	/* Choose CPU based on hash */
480 	cpu_idx = cpu_hash % *cpu_max;
481 
482 	cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
483 	if (!cpu_lookup)
484 		return XDP_ABORTED;
485 	cpu_dest = *cpu_lookup;
486 
487 	if (cpu_dest >= nr_cpus) {
488 		NO_TEAR_INC(rec->issue);
489 		return XDP_ABORTED;
490 	}
491 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
492 }
493 
494 SEC("xdp/cpumap")
xdp_redirect_cpu_devmap(struct xdp_md * ctx)495 int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
496 {
497 	void *data_end = (void *)(long)ctx->data_end;
498 	void *data = (void *)(long)ctx->data;
499 	struct ethhdr *eth = data;
500 	u64 nh_off;
501 
502 	nh_off = sizeof(*eth);
503 	if (data + nh_off > data_end)
504 		return XDP_DROP;
505 
506 	swap_src_dst_mac(data);
507 	return bpf_redirect_map(&tx_port, 0, 0);
508 }
509 
510 SEC("xdp/cpumap")
xdp_redirect_cpu_pass(struct xdp_md * ctx)511 int xdp_redirect_cpu_pass(struct xdp_md *ctx)
512 {
513 	return XDP_PASS;
514 }
515 
516 SEC("xdp/cpumap")
xdp_redirect_cpu_drop(struct xdp_md * ctx)517 int xdp_redirect_cpu_drop(struct xdp_md *ctx)
518 {
519 	return XDP_DROP;
520 }
521 
522 SEC("xdp/devmap")
xdp_redirect_egress_prog(struct xdp_md * ctx)523 int xdp_redirect_egress_prog(struct xdp_md *ctx)
524 {
525 	void *data_end = (void *)(long)ctx->data_end;
526 	void *data = (void *)(long)ctx->data;
527 	struct ethhdr *eth = data;
528 	u64 nh_off;
529 
530 	nh_off = sizeof(*eth);
531 	if (data + nh_off > data_end)
532 		return XDP_DROP;
533 
534 	__builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
535 
536 	return XDP_PASS;
537 }
538 
539 char _license[] SEC("license") = "GPL";
540