1 // SPDX-License-Identifier: GPL-2.0
2 #include <string.h>
3 #include <linux/tcp.h>
4 #include <linux/bpf.h>
5 #include <netinet/in.h>
6 #include <bpf/bpf_helpers.h>
7
8 char _license[] SEC("license") = "GPL";
9
10 int page_size = 0; /* userspace should set it */
11
12 #ifndef SOL_TCP
13 #define SOL_TCP IPPROTO_TCP
14 #endif
15
16 #define SOL_CUSTOM 0xdeadbeef
17
18 struct sockopt_sk {
19 __u8 val;
20 };
21
22 struct {
23 __uint(type, BPF_MAP_TYPE_SK_STORAGE);
24 __uint(map_flags, BPF_F_NO_PREALLOC);
25 __type(key, int);
26 __type(value, struct sockopt_sk);
27 } socket_storage_map SEC(".maps");
28
29 SEC("cgroup/getsockopt")
_getsockopt(struct bpf_sockopt * ctx)30 int _getsockopt(struct bpf_sockopt *ctx)
31 {
32 __u8 *optval_end = ctx->optval_end;
33 __u8 *optval = ctx->optval;
34 struct sockopt_sk *storage;
35 struct bpf_sock *sk;
36
37 /* Bypass AF_NETLINK. */
38 sk = ctx->sk;
39 if (sk && sk->family == AF_NETLINK)
40 return 1;
41
42 /* Make sure bpf_get_netns_cookie is callable.
43 */
44 if (bpf_get_netns_cookie(NULL) == 0)
45 return 0;
46
47 if (bpf_get_netns_cookie(ctx) == 0)
48 return 0;
49
50 if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
51 /* Not interested in SOL_IP:IP_TOS;
52 * let next BPF program in the cgroup chain or kernel
53 * handle it.
54 */
55 ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
56 return 1;
57 }
58
59 if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
60 /* Not interested in SOL_SOCKET:SO_SNDBUF;
61 * let next BPF program in the cgroup chain or kernel
62 * handle it.
63 */
64 return 1;
65 }
66
67 if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
68 /* Not interested in SOL_TCP:TCP_CONGESTION;
69 * let next BPF program in the cgroup chain or kernel
70 * handle it.
71 */
72 return 1;
73 }
74
75 if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
76 /* Verify that TCP_ZEROCOPY_RECEIVE triggers.
77 * It has a custom implementation for performance
78 * reasons.
79 */
80
81 /* Check that optval contains address (__u64) */
82 if (optval + sizeof(__u64) > optval_end)
83 return 0; /* bounds check */
84
85 if (((struct tcp_zerocopy_receive *)optval)->address != 0)
86 return 0; /* unexpected data */
87
88 return 1;
89 }
90
91 if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
92 if (optval + 1 > optval_end)
93 return 0; /* bounds check */
94
95 ctx->retval = 0; /* Reset system call return value to zero */
96
97 /* Always export 0x55 */
98 optval[0] = 0x55;
99 ctx->optlen = 1;
100
101 /* Userspace buffer is PAGE_SIZE * 2, but BPF
102 * program can only see the first PAGE_SIZE
103 * bytes of data.
104 */
105 if (optval_end - optval != page_size)
106 return 0; /* unexpected data size */
107
108 return 1;
109 }
110
111 if (ctx->level != SOL_CUSTOM)
112 return 0; /* deny everything except custom level */
113
114 if (optval + 1 > optval_end)
115 return 0; /* bounds check */
116
117 storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
118 BPF_SK_STORAGE_GET_F_CREATE);
119 if (!storage)
120 return 0; /* couldn't get sk storage */
121
122 if (!ctx->retval)
123 return 0; /* kernel should not have handled
124 * SOL_CUSTOM, something is wrong!
125 */
126 ctx->retval = 0; /* Reset system call return value to zero */
127
128 optval[0] = storage->val;
129 ctx->optlen = 1;
130
131 return 1;
132 }
133
134 SEC("cgroup/setsockopt")
_setsockopt(struct bpf_sockopt * ctx)135 int _setsockopt(struct bpf_sockopt *ctx)
136 {
137 __u8 *optval_end = ctx->optval_end;
138 __u8 *optval = ctx->optval;
139 struct sockopt_sk *storage;
140 struct bpf_sock *sk;
141
142 /* Bypass AF_NETLINK. */
143 sk = ctx->sk;
144 if (sk && sk->family == AF_NETLINK)
145 return 1;
146
147 /* Make sure bpf_get_netns_cookie is callable.
148 */
149 if (bpf_get_netns_cookie(NULL) == 0)
150 return 0;
151
152 if (bpf_get_netns_cookie(ctx) == 0)
153 return 0;
154
155 if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
156 /* Not interested in SOL_IP:IP_TOS;
157 * let next BPF program in the cgroup chain or kernel
158 * handle it.
159 */
160 ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
161 return 1;
162 }
163
164 if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
165 /* Overwrite SO_SNDBUF value */
166
167 if (optval + sizeof(__u32) > optval_end)
168 return 0; /* bounds check */
169
170 *(__u32 *)optval = 0x55AA;
171 ctx->optlen = 4;
172
173 return 1;
174 }
175
176 if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
177 /* Always use cubic */
178
179 if (optval + 5 > optval_end)
180 return 0; /* bounds check */
181
182 memcpy(optval, "cubic", 5);
183 ctx->optlen = 5;
184
185 return 1;
186 }
187
188 if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
189 /* Original optlen is larger than PAGE_SIZE. */
190 if (ctx->optlen != page_size * 2)
191 return 0; /* unexpected data size */
192
193 if (optval + 1 > optval_end)
194 return 0; /* bounds check */
195
196 /* Make sure we can trim the buffer. */
197 optval[0] = 0;
198 ctx->optlen = 1;
199
200 /* Usepace buffer is PAGE_SIZE * 2, but BPF
201 * program can only see the first PAGE_SIZE
202 * bytes of data.
203 */
204 if (optval_end - optval != page_size)
205 return 0; /* unexpected data size */
206
207 return 1;
208 }
209
210 if (ctx->level != SOL_CUSTOM)
211 return 0; /* deny everything except custom level */
212
213 if (optval + 1 > optval_end)
214 return 0; /* bounds check */
215
216 storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
217 BPF_SK_STORAGE_GET_F_CREATE);
218 if (!storage)
219 return 0; /* couldn't get sk storage */
220
221 storage->val = optval[0];
222 ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
223 * setsockopt handler.
224 */
225
226 return 1;
227 }
228