1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Cloudflare Ltd.
3 // Copyright (c) 2020 Isovalent, Inc.
4
5 #include <stddef.h>
6 #include <stdbool.h>
7 #include <string.h>
8 #include <linux/bpf.h>
9 #include <linux/if_ether.h>
10 #include <linux/in.h>
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/pkt_cls.h>
14 #include <linux/tcp.h>
15 #include <sys/socket.h>
16 #include <bpf/bpf_helpers.h>
17 #include <bpf/bpf_endian.h>
18
19 /* Pin map under /sys/fs/bpf/tc/globals/<map name> */
20 #define PIN_GLOBAL_NS 2
21
22 /* Must match struct bpf_elf_map layout from iproute2 */
23 struct {
24 __u32 type;
25 __u32 size_key;
26 __u32 size_value;
27 __u32 max_elem;
28 __u32 flags;
29 __u32 id;
30 __u32 pinning;
31 } server_map SEC("maps") = {
32 .type = BPF_MAP_TYPE_SOCKMAP,
33 .size_key = sizeof(int),
34 .size_value = sizeof(__u64),
35 .max_elem = 1,
36 .pinning = PIN_GLOBAL_NS,
37 };
38
39 int _version SEC("version") = 1;
40 char _license[] SEC("license") = "GPL";
41
42 /* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
43 static inline struct bpf_sock_tuple *
get_tuple(struct __sk_buff * skb,bool * ipv4,bool * tcp)44 get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
45 {
46 void *data_end = (void *)(long)skb->data_end;
47 void *data = (void *)(long)skb->data;
48 struct bpf_sock_tuple *result;
49 struct ethhdr *eth;
50 __u64 tuple_len;
51 __u8 proto = 0;
52 __u64 ihl_len;
53
54 eth = (struct ethhdr *)(data);
55 if (eth + 1 > data_end)
56 return NULL;
57
58 if (eth->h_proto == bpf_htons(ETH_P_IP)) {
59 struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth));
60
61 if (iph + 1 > data_end)
62 return NULL;
63 if (iph->ihl != 5)
64 /* Options are not supported */
65 return NULL;
66 ihl_len = iph->ihl * 4;
67 proto = iph->protocol;
68 *ipv4 = true;
69 result = (struct bpf_sock_tuple *)&iph->saddr;
70 } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
71 struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth));
72
73 if (ip6h + 1 > data_end)
74 return NULL;
75 ihl_len = sizeof(*ip6h);
76 proto = ip6h->nexthdr;
77 *ipv4 = false;
78 result = (struct bpf_sock_tuple *)&ip6h->saddr;
79 } else {
80 return (struct bpf_sock_tuple *)data;
81 }
82
83 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
84 return NULL;
85
86 *tcp = (proto == IPPROTO_TCP);
87 return result;
88 }
89
90 static inline int
handle_udp(struct __sk_buff * skb,struct bpf_sock_tuple * tuple,bool ipv4)91 handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
92 {
93 struct bpf_sock_tuple ln = {0};
94 struct bpf_sock *sk;
95 const int zero = 0;
96 size_t tuple_len;
97 __be16 dport;
98 int ret;
99
100 tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
101 if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
102 return TC_ACT_SHOT;
103
104 sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
105 if (sk)
106 goto assign;
107
108 dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
109 if (dport != bpf_htons(4321))
110 return TC_ACT_OK;
111
112 sk = bpf_map_lookup_elem(&server_map, &zero);
113 if (!sk)
114 return TC_ACT_SHOT;
115
116 assign:
117 ret = bpf_sk_assign(skb, sk, 0);
118 bpf_sk_release(sk);
119 return ret;
120 }
121
122 static inline int
handle_tcp(struct __sk_buff * skb,struct bpf_sock_tuple * tuple,bool ipv4)123 handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
124 {
125 struct bpf_sock_tuple ln = {0};
126 struct bpf_sock *sk;
127 const int zero = 0;
128 size_t tuple_len;
129 __be16 dport;
130 int ret;
131
132 tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
133 if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
134 return TC_ACT_SHOT;
135
136 sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
137 if (sk) {
138 if (sk->state != BPF_TCP_LISTEN)
139 goto assign;
140 bpf_sk_release(sk);
141 }
142
143 dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
144 if (dport != bpf_htons(4321))
145 return TC_ACT_OK;
146
147 sk = bpf_map_lookup_elem(&server_map, &zero);
148 if (!sk)
149 return TC_ACT_SHOT;
150
151 if (sk->state != BPF_TCP_LISTEN) {
152 bpf_sk_release(sk);
153 return TC_ACT_SHOT;
154 }
155
156 assign:
157 ret = bpf_sk_assign(skb, sk, 0);
158 bpf_sk_release(sk);
159 return ret;
160 }
161
162 SEC("classifier/sk_assign_test")
bpf_sk_assign_test(struct __sk_buff * skb)163 int bpf_sk_assign_test(struct __sk_buff *skb)
164 {
165 struct bpf_sock_tuple *tuple, ln = {0};
166 bool ipv4 = false;
167 bool tcp = false;
168 int tuple_len;
169 int ret = 0;
170
171 tuple = get_tuple(skb, &ipv4, &tcp);
172 if (!tuple)
173 return TC_ACT_SHOT;
174
175 /* Note that the verifier socket return type for bpf_skc_lookup_tcp()
176 * differs from bpf_sk_lookup_udp(), so even though the C-level type is
177 * the same here, if we try to share the implementations they will
178 * fail to verify because we're crossing pointer types.
179 */
180 if (tcp)
181 ret = handle_tcp(skb, tuple, ipv4);
182 else
183 ret = handle_udp(skb, tuple, ipv4);
184
185 return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
186 }
187