• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Cloudflare Ltd.
3 // Copyright (c) 2020 Isovalent, Inc.
4 
5 #include <stddef.h>
6 #include <stdbool.h>
7 #include <string.h>
8 #include <linux/bpf.h>
9 #include <linux/if_ether.h>
10 #include <linux/in.h>
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/pkt_cls.h>
14 #include <linux/tcp.h>
15 #include <sys/socket.h>
16 #include <bpf/bpf_helpers.h>
17 #include <bpf/bpf_endian.h>
18 
19 #if defined(IPROUTE2_HAVE_LIBBPF)
20 /* Use a new-style map definition. */
21 struct {
22 	__uint(type, BPF_MAP_TYPE_SOCKMAP);
23 	__type(key, int);
24 	__type(value, __u64);
25 	__uint(pinning, LIBBPF_PIN_BY_NAME);
26 	__uint(max_entries, 1);
27 } server_map SEC(".maps");
28 #else
29 /* Pin map under /sys/fs/bpf/tc/globals/<map name> */
30 #define PIN_GLOBAL_NS 2
31 
32 /* Must match struct bpf_elf_map layout from iproute2 */
33 struct {
34 	__u32 type;
35 	__u32 size_key;
36 	__u32 size_value;
37 	__u32 max_elem;
38 	__u32 flags;
39 	__u32 id;
40 	__u32 pinning;
41 } server_map SEC("maps") = {
42 	.type = BPF_MAP_TYPE_SOCKMAP,
43 	.size_key = sizeof(int),
44 	.size_value  = sizeof(__u64),
45 	.max_elem = 1,
46 	.pinning = PIN_GLOBAL_NS,
47 };
48 #endif
49 
50 int _version SEC("version") = 1;
51 char _license[] SEC("license") = "GPL";
52 
53 /* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
54 static inline struct bpf_sock_tuple *
get_tuple(struct __sk_buff * skb,bool * ipv4,bool * tcp)55 get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
56 {
57 	void *data_end = (void *)(long)skb->data_end;
58 	void *data = (void *)(long)skb->data;
59 	struct bpf_sock_tuple *result;
60 	struct ethhdr *eth;
61 	__u64 tuple_len;
62 	__u8 proto = 0;
63 	__u64 ihl_len;
64 
65 	eth = (struct ethhdr *)(data);
66 	if (eth + 1 > data_end)
67 		return NULL;
68 
69 	if (eth->h_proto == bpf_htons(ETH_P_IP)) {
70 		struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth));
71 
72 		if (iph + 1 > data_end)
73 			return NULL;
74 		if (iph->ihl != 5)
75 			/* Options are not supported */
76 			return NULL;
77 		ihl_len = iph->ihl * 4;
78 		proto = iph->protocol;
79 		*ipv4 = true;
80 		result = (struct bpf_sock_tuple *)&iph->saddr;
81 	} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
82 		struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth));
83 
84 		if (ip6h + 1 > data_end)
85 			return NULL;
86 		ihl_len = sizeof(*ip6h);
87 		proto = ip6h->nexthdr;
88 		*ipv4 = false;
89 		result = (struct bpf_sock_tuple *)&ip6h->saddr;
90 	} else {
91 		return (struct bpf_sock_tuple *)data;
92 	}
93 
94 	if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
95 		return NULL;
96 
97 	*tcp = (proto == IPPROTO_TCP);
98 	return result;
99 }
100 
101 static inline int
handle_udp(struct __sk_buff * skb,struct bpf_sock_tuple * tuple,bool ipv4)102 handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
103 {
104 	struct bpf_sock_tuple ln = {0};
105 	struct bpf_sock *sk;
106 	const int zero = 0;
107 	size_t tuple_len;
108 	__be16 dport;
109 	int ret;
110 
111 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
112 	if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
113 		return TC_ACT_SHOT;
114 
115 	sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
116 	if (sk)
117 		goto assign;
118 
119 	dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
120 	if (dport != bpf_htons(4321))
121 		return TC_ACT_OK;
122 
123 	sk = bpf_map_lookup_elem(&server_map, &zero);
124 	if (!sk)
125 		return TC_ACT_SHOT;
126 
127 assign:
128 	ret = bpf_sk_assign(skb, sk, 0);
129 	bpf_sk_release(sk);
130 	return ret;
131 }
132 
133 static inline int
handle_tcp(struct __sk_buff * skb,struct bpf_sock_tuple * tuple,bool ipv4)134 handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
135 {
136 	struct bpf_sock_tuple ln = {0};
137 	struct bpf_sock *sk;
138 	const int zero = 0;
139 	size_t tuple_len;
140 	__be16 dport;
141 	int ret;
142 
143 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
144 	if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
145 		return TC_ACT_SHOT;
146 
147 	sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
148 	if (sk) {
149 		if (sk->state != BPF_TCP_LISTEN)
150 			goto assign;
151 		bpf_sk_release(sk);
152 	}
153 
154 	dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
155 	if (dport != bpf_htons(4321))
156 		return TC_ACT_OK;
157 
158 	sk = bpf_map_lookup_elem(&server_map, &zero);
159 	if (!sk)
160 		return TC_ACT_SHOT;
161 
162 	if (sk->state != BPF_TCP_LISTEN) {
163 		bpf_sk_release(sk);
164 		return TC_ACT_SHOT;
165 	}
166 
167 assign:
168 	ret = bpf_sk_assign(skb, sk, 0);
169 	bpf_sk_release(sk);
170 	return ret;
171 }
172 
173 SEC("classifier/sk_assign_test")
bpf_sk_assign_test(struct __sk_buff * skb)174 int bpf_sk_assign_test(struct __sk_buff *skb)
175 {
176 	struct bpf_sock_tuple *tuple, ln = {0};
177 	bool ipv4 = false;
178 	bool tcp = false;
179 	int tuple_len;
180 	int ret = 0;
181 
182 	tuple = get_tuple(skb, &ipv4, &tcp);
183 	if (!tuple)
184 		return TC_ACT_SHOT;
185 
186 	/* Note that the verifier socket return type for bpf_skc_lookup_tcp()
187 	 * differs from bpf_sk_lookup_udp(), so even though the C-level type is
188 	 * the same here, if we try to share the implementations they will
189 	 * fail to verify because we're crossing pointer types.
190 	 */
191 	if (tcp)
192 		ret = handle_tcp(skb, tuple, ipv4);
193 	else
194 		ret = handle_udp(skb, tuple, ipv4);
195 
196 	return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
197 }
198