1 // SPDX-License-Identifier: GPL-2.0
2 #include <string.h>
3 #include <linux/tcp.h>
4 #include <linux/bpf.h>
5 #include <netinet/in.h>
6 #include <bpf/bpf_helpers.h>
7
8 char _license[] SEC("license") = "GPL";
9
10 int page_size = 0; /* userspace should set it */
11
12 #ifndef SOL_TCP
13 #define SOL_TCP IPPROTO_TCP
14 #endif
15
16 #define SOL_CUSTOM 0xdeadbeef
17
18 struct sockopt_sk {
19 __u8 val;
20 };
21
22 struct {
23 __uint(type, BPF_MAP_TYPE_SK_STORAGE);
24 __uint(map_flags, BPF_F_NO_PREALLOC);
25 __type(key, int);
26 __type(value, struct sockopt_sk);
27 } socket_storage_map SEC(".maps");
28
29 SEC("cgroup/getsockopt")
_getsockopt(struct bpf_sockopt * ctx)30 int _getsockopt(struct bpf_sockopt *ctx)
31 {
32 __u8 *optval_end = ctx->optval_end;
33 __u8 *optval = ctx->optval;
34 struct sockopt_sk *storage;
35
36 /* Make sure bpf_get_netns_cookie is callable.
37 */
38 if (bpf_get_netns_cookie(NULL) == 0)
39 return 0;
40
41 if (bpf_get_netns_cookie(ctx) == 0)
42 return 0;
43
44 if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
45 /* Not interested in SOL_IP:IP_TOS;
46 * let next BPF program in the cgroup chain or kernel
47 * handle it.
48 */
49 ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
50 return 1;
51 }
52
53 if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
54 /* Not interested in SOL_SOCKET:SO_SNDBUF;
55 * let next BPF program in the cgroup chain or kernel
56 * handle it.
57 */
58 return 1;
59 }
60
61 if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
62 /* Not interested in SOL_TCP:TCP_CONGESTION;
63 * let next BPF program in the cgroup chain or kernel
64 * handle it.
65 */
66 return 1;
67 }
68
69 if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
70 /* Verify that TCP_ZEROCOPY_RECEIVE triggers.
71 * It has a custom implementation for performance
72 * reasons.
73 */
74
75 if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end)
76 return 0; /* EPERM, bounds check */
77
78 if (((struct tcp_zerocopy_receive *)optval)->address != 0)
79 return 0; /* EPERM, unexpected data */
80
81 return 1;
82 }
83
84 if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
85 if (optval + 1 > optval_end)
86 return 0; /* EPERM, bounds check */
87
88 ctx->retval = 0; /* Reset system call return value to zero */
89
90 /* Always export 0x55 */
91 optval[0] = 0x55;
92 ctx->optlen = 1;
93
94 /* Userspace buffer is PAGE_SIZE * 2, but BPF
95 * program can only see the first PAGE_SIZE
96 * bytes of data.
97 */
98 if (optval_end - optval != page_size)
99 return 0; /* EPERM, unexpected data size */
100
101 return 1;
102 }
103
104 if (ctx->level != SOL_CUSTOM)
105 return 0; /* EPERM, deny everything except custom level */
106
107 if (optval + 1 > optval_end)
108 return 0; /* EPERM, bounds check */
109
110 storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
111 BPF_SK_STORAGE_GET_F_CREATE);
112 if (!storage)
113 return 0; /* EPERM, couldn't get sk storage */
114
115 if (!ctx->retval)
116 return 0; /* EPERM, kernel should not have handled
117 * SOL_CUSTOM, something is wrong!
118 */
119 ctx->retval = 0; /* Reset system call return value to zero */
120
121 optval[0] = storage->val;
122 ctx->optlen = 1;
123
124 return 1;
125 }
126
127 SEC("cgroup/setsockopt")
_setsockopt(struct bpf_sockopt * ctx)128 int _setsockopt(struct bpf_sockopt *ctx)
129 {
130 __u8 *optval_end = ctx->optval_end;
131 __u8 *optval = ctx->optval;
132 struct sockopt_sk *storage;
133
134 /* Make sure bpf_get_netns_cookie is callable.
135 */
136 if (bpf_get_netns_cookie(NULL) == 0)
137 return 0;
138
139 if (bpf_get_netns_cookie(ctx) == 0)
140 return 0;
141
142 if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
143 /* Not interested in SOL_IP:IP_TOS;
144 * let next BPF program in the cgroup chain or kernel
145 * handle it.
146 */
147 ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
148 return 1;
149 }
150
151 if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
152 /* Overwrite SO_SNDBUF value */
153
154 if (optval + sizeof(__u32) > optval_end)
155 return 0; /* EPERM, bounds check */
156
157 *(__u32 *)optval = 0x55AA;
158 ctx->optlen = 4;
159
160 return 1;
161 }
162
163 if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
164 /* Always use cubic */
165
166 if (optval + 5 > optval_end)
167 return 0; /* EPERM, bounds check */
168
169 memcpy(optval, "cubic", 5);
170 ctx->optlen = 5;
171
172 return 1;
173 }
174
175 if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
176 /* Original optlen is larger than PAGE_SIZE. */
177 if (ctx->optlen != page_size * 2)
178 return 0; /* EPERM, unexpected data size */
179
180 if (optval + 1 > optval_end)
181 return 0; /* EPERM, bounds check */
182
183 /* Make sure we can trim the buffer. */
184 optval[0] = 0;
185 ctx->optlen = 1;
186
187 /* Usepace buffer is PAGE_SIZE * 2, but BPF
188 * program can only see the first PAGE_SIZE
189 * bytes of data.
190 */
191 if (optval_end - optval != page_size)
192 return 0; /* EPERM, unexpected data size */
193
194 return 1;
195 }
196
197 if (ctx->level != SOL_CUSTOM)
198 return 0; /* EPERM, deny everything except custom level */
199
200 if (optval + 1 > optval_end)
201 return 0; /* EPERM, bounds check */
202
203 storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
204 BPF_SK_STORAGE_GET_F_CREATE);
205 if (!storage)
206 return 0; /* EPERM, couldn't get sk storage */
207
208 storage->val = optval[0];
209 ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
210 * setsockopt handler.
211 */
212
213 return 1;
214 }
215