1 // SPDX-License-Identifier: GPL-2.0
2 #include <limits.h>
3 #include <stddef.h>
4 #include <stdbool.h>
5 #include <string.h>
6 #include <linux/pkt_cls.h>
7 #include <linux/bpf.h>
8 #include <linux/in.h>
9 #include <linux/if_ether.h>
10 #include <linux/icmp.h>
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/tcp.h>
14 #include <linux/udp.h>
15 #include <linux/if_packet.h>
16 #include <sys/socket.h>
17 #include <linux/if_tunnel.h>
18 #include <linux/mpls.h>
19 #include <bpf/bpf_helpers.h>
20 #include <bpf/bpf_endian.h>
21
22 #define PROG(F) PROG_(F, _##F)
23 #define PROG_(NUM, NAME) SEC("flow_dissector") int flow_dissector_##NUM
24
25 /* These are the identifiers of the BPF programs that will be used in tail
26 * calls. Name is limited to 16 characters, with the terminating character and
27 * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
28 */
29 #define IP 0
30 #define IPV6 1
31 #define IPV6OP 2 /* Destination/Hop-by-Hop Options IPv6 Ext. Header */
32 #define IPV6FR 3 /* Fragmentation IPv6 Extension Header */
33 #define MPLS 4
34 #define VLAN 5
35 #define MAX_PROG 6
36
37 #define IP_MF 0x2000
38 #define IP_OFFSET 0x1FFF
39 #define IP6_MF 0x0001
40 #define IP6_OFFSET 0xFFF8
41
42 struct vlan_hdr {
43 __be16 h_vlan_TCI;
44 __be16 h_vlan_encapsulated_proto;
45 };
46
47 struct gre_hdr {
48 __be16 flags;
49 __be16 proto;
50 };
51
52 struct frag_hdr {
53 __u8 nexthdr;
54 __u8 reserved;
55 __be16 frag_off;
56 __be32 identification;
57 };
58
59 struct {
60 __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
61 __uint(max_entries, MAX_PROG);
62 __uint(key_size, sizeof(__u32));
63 __uint(value_size, sizeof(__u32));
64 } jmp_table SEC(".maps");
65
66 struct {
67 __uint(type, BPF_MAP_TYPE_HASH);
68 __uint(max_entries, 1024);
69 __type(key, __u32);
70 __type(value, struct bpf_flow_keys);
71 } last_dissection SEC(".maps");
72
export_flow_keys(struct bpf_flow_keys * keys,int ret)73 static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
74 int ret)
75 {
76 __u32 key = (__u32)(keys->sport) << 16 | keys->dport;
77 struct bpf_flow_keys val;
78
79 memcpy(&val, keys, sizeof(val));
80 bpf_map_update_elem(&last_dissection, &key, &val, BPF_ANY);
81 return ret;
82 }
83
84 #define IPV6_FLOWLABEL_MASK __bpf_constant_htonl(0x000FFFFF)
ip6_flowlabel(const struct ipv6hdr * hdr)85 static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr)
86 {
87 return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK;
88 }
89
bpf_flow_dissect_get_header(struct __sk_buff * skb,__u16 hdr_size,void * buffer)90 static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
91 __u16 hdr_size,
92 void *buffer)
93 {
94 void *data_end = (void *)(long)skb->data_end;
95 void *data = (void *)(long)skb->data;
96 __u16 thoff = skb->flow_keys->thoff;
97 __u8 *hdr;
98
99 /* Verifies this variable offset does not overflow */
100 if (thoff > (USHRT_MAX - hdr_size))
101 return NULL;
102
103 hdr = data + thoff;
104 if (hdr + hdr_size <= data_end)
105 return hdr;
106
107 if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
108 return NULL;
109
110 return buffer;
111 }
112
113 /* Dispatches on ETHERTYPE */
parse_eth_proto(struct __sk_buff * skb,__be16 proto)114 static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
115 {
116 struct bpf_flow_keys *keys = skb->flow_keys;
117
118 switch (proto) {
119 case bpf_htons(ETH_P_IP):
120 bpf_tail_call_static(skb, &jmp_table, IP);
121 break;
122 case bpf_htons(ETH_P_IPV6):
123 bpf_tail_call_static(skb, &jmp_table, IPV6);
124 break;
125 case bpf_htons(ETH_P_MPLS_MC):
126 case bpf_htons(ETH_P_MPLS_UC):
127 bpf_tail_call_static(skb, &jmp_table, MPLS);
128 break;
129 case bpf_htons(ETH_P_8021Q):
130 case bpf_htons(ETH_P_8021AD):
131 bpf_tail_call_static(skb, &jmp_table, VLAN);
132 break;
133 default:
134 /* Protocol not supported */
135 return export_flow_keys(keys, BPF_DROP);
136 }
137
138 return export_flow_keys(keys, BPF_DROP);
139 }
140
141 SEC("flow_dissector")
_dissect(struct __sk_buff * skb)142 int _dissect(struct __sk_buff *skb)
143 {
144 struct bpf_flow_keys *keys = skb->flow_keys;
145
146 return parse_eth_proto(skb, keys->n_proto);
147 }
148
149 /* Parses on IPPROTO_* */
parse_ip_proto(struct __sk_buff * skb,__u8 proto)150 static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
151 {
152 struct bpf_flow_keys *keys = skb->flow_keys;
153 void *data_end = (void *)(long)skb->data_end;
154 struct icmphdr *icmp, _icmp;
155 struct gre_hdr *gre, _gre;
156 struct ethhdr *eth, _eth;
157 struct tcphdr *tcp, _tcp;
158 struct udphdr *udp, _udp;
159
160 switch (proto) {
161 case IPPROTO_ICMP:
162 icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
163 if (!icmp)
164 return export_flow_keys(keys, BPF_DROP);
165 return export_flow_keys(keys, BPF_OK);
166 case IPPROTO_IPIP:
167 keys->is_encap = true;
168 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
169 return export_flow_keys(keys, BPF_OK);
170
171 return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
172 case IPPROTO_IPV6:
173 keys->is_encap = true;
174 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
175 return export_flow_keys(keys, BPF_OK);
176
177 return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
178 case IPPROTO_GRE:
179 gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
180 if (!gre)
181 return export_flow_keys(keys, BPF_DROP);
182
183 if (bpf_htons(gre->flags & GRE_VERSION))
184 /* Only inspect standard GRE packets with version 0 */
185 return export_flow_keys(keys, BPF_OK);
186
187 keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
188 if (GRE_IS_CSUM(gre->flags))
189 keys->thoff += 4; /* Step over chksum and Padding */
190 if (GRE_IS_KEY(gre->flags))
191 keys->thoff += 4; /* Step over key */
192 if (GRE_IS_SEQ(gre->flags))
193 keys->thoff += 4; /* Step over sequence number */
194
195 keys->is_encap = true;
196 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
197 return export_flow_keys(keys, BPF_OK);
198
199 if (gre->proto == bpf_htons(ETH_P_TEB)) {
200 eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
201 &_eth);
202 if (!eth)
203 return export_flow_keys(keys, BPF_DROP);
204
205 keys->thoff += sizeof(*eth);
206
207 return parse_eth_proto(skb, eth->h_proto);
208 } else {
209 return parse_eth_proto(skb, gre->proto);
210 }
211 case IPPROTO_TCP:
212 tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
213 if (!tcp)
214 return export_flow_keys(keys, BPF_DROP);
215
216 if (tcp->doff < 5)
217 return export_flow_keys(keys, BPF_DROP);
218
219 if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
220 return export_flow_keys(keys, BPF_DROP);
221
222 keys->sport = tcp->source;
223 keys->dport = tcp->dest;
224 return export_flow_keys(keys, BPF_OK);
225 case IPPROTO_UDP:
226 case IPPROTO_UDPLITE:
227 udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
228 if (!udp)
229 return export_flow_keys(keys, BPF_DROP);
230
231 keys->sport = udp->source;
232 keys->dport = udp->dest;
233 return export_flow_keys(keys, BPF_OK);
234 default:
235 return export_flow_keys(keys, BPF_DROP);
236 }
237
238 return export_flow_keys(keys, BPF_DROP);
239 }
240
parse_ipv6_proto(struct __sk_buff * skb,__u8 nexthdr)241 static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
242 {
243 struct bpf_flow_keys *keys = skb->flow_keys;
244
245 switch (nexthdr) {
246 case IPPROTO_HOPOPTS:
247 case IPPROTO_DSTOPTS:
248 bpf_tail_call_static(skb, &jmp_table, IPV6OP);
249 break;
250 case IPPROTO_FRAGMENT:
251 bpf_tail_call_static(skb, &jmp_table, IPV6FR);
252 break;
253 default:
254 return parse_ip_proto(skb, nexthdr);
255 }
256
257 return export_flow_keys(keys, BPF_DROP);
258 }
259
PROG(IP)260 PROG(IP)(struct __sk_buff *skb)
261 {
262 void *data_end = (void *)(long)skb->data_end;
263 struct bpf_flow_keys *keys = skb->flow_keys;
264 void *data = (void *)(long)skb->data;
265 struct iphdr *iph, _iph;
266 bool done = false;
267
268 iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
269 if (!iph)
270 return export_flow_keys(keys, BPF_DROP);
271
272 /* IP header cannot be smaller than 20 bytes */
273 if (iph->ihl < 5)
274 return export_flow_keys(keys, BPF_DROP);
275
276 keys->addr_proto = ETH_P_IP;
277 keys->ipv4_src = iph->saddr;
278 keys->ipv4_dst = iph->daddr;
279 keys->ip_proto = iph->protocol;
280
281 keys->thoff += iph->ihl << 2;
282 if (data + keys->thoff > data_end)
283 return export_flow_keys(keys, BPF_DROP);
284
285 if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
286 keys->is_frag = true;
287 if (iph->frag_off & bpf_htons(IP_OFFSET)) {
288 /* From second fragment on, packets do not have headers
289 * we can parse.
290 */
291 done = true;
292 } else {
293 keys->is_first_frag = true;
294 /* No need to parse fragmented packet unless
295 * explicitly asked for.
296 */
297 if (!(keys->flags &
298 BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
299 done = true;
300 }
301 }
302
303 if (done)
304 return export_flow_keys(keys, BPF_OK);
305
306 return parse_ip_proto(skb, iph->protocol);
307 }
308
PROG(IPV6)309 PROG(IPV6)(struct __sk_buff *skb)
310 {
311 struct bpf_flow_keys *keys = skb->flow_keys;
312 struct ipv6hdr *ip6h, _ip6h;
313
314 ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
315 if (!ip6h)
316 return export_flow_keys(keys, BPF_DROP);
317
318 keys->addr_proto = ETH_P_IPV6;
319 memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
320
321 keys->thoff += sizeof(struct ipv6hdr);
322 keys->ip_proto = ip6h->nexthdr;
323 keys->flow_label = ip6_flowlabel(ip6h);
324
325 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
326 return export_flow_keys(keys, BPF_OK);
327
328 return parse_ipv6_proto(skb, ip6h->nexthdr);
329 }
330
PROG(IPV6OP)331 PROG(IPV6OP)(struct __sk_buff *skb)
332 {
333 struct bpf_flow_keys *keys = skb->flow_keys;
334 struct ipv6_opt_hdr *ip6h, _ip6h;
335
336 ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
337 if (!ip6h)
338 return export_flow_keys(keys, BPF_DROP);
339
340 /* hlen is in 8-octets and does not include the first 8 bytes
341 * of the header
342 */
343 keys->thoff += (1 + ip6h->hdrlen) << 3;
344 keys->ip_proto = ip6h->nexthdr;
345
346 return parse_ipv6_proto(skb, ip6h->nexthdr);
347 }
348
PROG(IPV6FR)349 PROG(IPV6FR)(struct __sk_buff *skb)
350 {
351 struct bpf_flow_keys *keys = skb->flow_keys;
352 struct frag_hdr *fragh, _fragh;
353
354 fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
355 if (!fragh)
356 return export_flow_keys(keys, BPF_DROP);
357
358 keys->thoff += sizeof(*fragh);
359 keys->is_frag = true;
360 keys->ip_proto = fragh->nexthdr;
361
362 if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) {
363 keys->is_first_frag = true;
364
365 /* No need to parse fragmented packet unless
366 * explicitly asked for.
367 */
368 if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
369 return export_flow_keys(keys, BPF_OK);
370 } else {
371 return export_flow_keys(keys, BPF_OK);
372 }
373
374 return parse_ipv6_proto(skb, fragh->nexthdr);
375 }
376
PROG(MPLS)377 PROG(MPLS)(struct __sk_buff *skb)
378 {
379 struct bpf_flow_keys *keys = skb->flow_keys;
380 struct mpls_label *mpls, _mpls;
381
382 mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
383 if (!mpls)
384 return export_flow_keys(keys, BPF_DROP);
385
386 return export_flow_keys(keys, BPF_OK);
387 }
388
PROG(VLAN)389 PROG(VLAN)(struct __sk_buff *skb)
390 {
391 struct bpf_flow_keys *keys = skb->flow_keys;
392 struct vlan_hdr *vlan, _vlan;
393
394 /* Account for double-tagging */
395 if (keys->n_proto == bpf_htons(ETH_P_8021AD)) {
396 vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
397 if (!vlan)
398 return export_flow_keys(keys, BPF_DROP);
399
400 if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
401 return export_flow_keys(keys, BPF_DROP);
402
403 keys->nhoff += sizeof(*vlan);
404 keys->thoff += sizeof(*vlan);
405 }
406
407 vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
408 if (!vlan)
409 return export_flow_keys(keys, BPF_DROP);
410
411 keys->nhoff += sizeof(*vlan);
412 keys->thoff += sizeof(*vlan);
413 /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
414 if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
415 vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
416 return export_flow_keys(keys, BPF_DROP);
417
418 keys->n_proto = vlan->h_vlan_encapsulated_proto;
419 return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
420 }
421
422 char __license[] SEC("license") = "GPL";
423