1 /*
2 * nghttp2 - HTTP/2 C Library
3 *
4 * Copyright (c) 2021 Tatsuhiro Tsujikawa
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25 #include <linux/udp.h>
26 #include <linux/bpf.h>
27
28 #include <bpf/bpf_helpers.h>
29
30 /*
31 * How to compile:
32 *
33 * clang-12 -O2 -Wall -target bpf -g -c reuseport_kern.c -o reuseport_kern.o \
34 * -I/path/to/kernel/include
35 *
36 * See
37 * https://www.kernel.org/doc/Documentation/kbuild/headers_install.txt
38 * how to install kernel header files.
39 */
40
41 /* AES_CBC_decrypt_buffer: https://github.com/kokke/tiny-AES-c
42 License is Public Domain. Commit hash:
43 12e7744b4919e9d55de75b7ab566326a1c8e7a67 */
44
45 #define AES_BLOCKLEN \
46 16 /* Block length in bytes - AES is 128b block \
47 only */
48
49 #define AES_KEYLEN 16 /* Key length in bytes */
50 #define AES_keyExpSize 176
51
52 struct AES_ctx {
53 __u8 RoundKey[AES_keyExpSize];
54 };
55
56 /* The number of columns comprising a state in AES. This is a constant
57 in AES. Value=4 */
58 #define Nb 4
59
60 #define Nk 4 /* The number of 32 bit words in a key. */
61 #define Nr 10 /* The number of rounds in AES Cipher. */
62
63 /* state - array holding the intermediate results during
64 decryption. */
65 typedef __u8 state_t[4][4];
66
67 /* The lookup-tables are marked const so they can be placed in
68 read-only storage instead of RAM The numbers below can be computed
69 dynamically trading ROM for RAM - This can be useful in (embedded)
70 bootloader applications, where ROM is often limited. */
71 static const __u8 sbox[256] = {
72 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
73 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b,
74 0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
75 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26,
76 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
77 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
78 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
79 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed,
80 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
81 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f,
82 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
83 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec,
84 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
85 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14,
86 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
87 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d,
88 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
89 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f,
90 0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
91 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11,
92 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
93 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f,
94 0xb0, 0x54, 0xbb, 0x16};
95
96 static const __u8 rsbox[256] = {
97 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e,
98 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
99 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32,
100 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
101 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49,
102 0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
103 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50,
104 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
105 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05,
106 0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
107 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41,
108 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
109 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8,
110 0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
111 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b,
112 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
113 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59,
114 0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
115 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d,
116 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
117 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63,
118 0x55, 0x21, 0x0c, 0x7d};
119
120 /* The round constant word array, Rcon[i], contains the values given
121 by x to the power (i-1) being powers of x (x is denoted as {02}) in
122 the field GF(2^8) */
123 static const __u8 Rcon[11] = {0x8d, 0x01, 0x02, 0x04, 0x08, 0x10,
124 0x20, 0x40, 0x80, 0x1b, 0x36};
125
126 #define getSBoxValue(num) (sbox[(num)])
127
128 /* This function produces Nb(Nr+1) round keys. The round keys are used
129 in each round to decrypt the states. */
KeyExpansion(__u8 * RoundKey,const __u8 * Key)130 static void KeyExpansion(__u8 *RoundKey, const __u8 *Key) {
131 unsigned i, j, k;
132 __u8 tempa[4]; /* Used for the column/row operations */
133
134 /* The first round key is the key itself. */
135 for (i = 0; i < Nk; ++i) {
136 RoundKey[(i * 4) + 0] = Key[(i * 4) + 0];
137 RoundKey[(i * 4) + 1] = Key[(i * 4) + 1];
138 RoundKey[(i * 4) + 2] = Key[(i * 4) + 2];
139 RoundKey[(i * 4) + 3] = Key[(i * 4) + 3];
140 }
141
142 /* All other round keys are found from the previous round keys. */
143 for (i = Nk; i < Nb * (Nr + 1); ++i) {
144 {
145 k = (i - 1) * 4;
146 tempa[0] = RoundKey[k + 0];
147 tempa[1] = RoundKey[k + 1];
148 tempa[2] = RoundKey[k + 2];
149 tempa[3] = RoundKey[k + 3];
150 }
151
152 if (i % Nk == 0) {
153 /* This function shifts the 4 bytes in a word to the left once.
154 [a0,a1,a2,a3] becomes [a1,a2,a3,a0] */
155
156 /* Function RotWord() */
157 {
158 const __u8 u8tmp = tempa[0];
159 tempa[0] = tempa[1];
160 tempa[1] = tempa[2];
161 tempa[2] = tempa[3];
162 tempa[3] = u8tmp;
163 }
164
165 /* SubWord() is a function that takes a four-byte input word and
166 applies the S-box to each of the four bytes to produce an
167 output word. */
168
169 /* Function Subword() */
170 {
171 tempa[0] = getSBoxValue(tempa[0]);
172 tempa[1] = getSBoxValue(tempa[1]);
173 tempa[2] = getSBoxValue(tempa[2]);
174 tempa[3] = getSBoxValue(tempa[3]);
175 }
176
177 tempa[0] = tempa[0] ^ Rcon[i / Nk];
178 }
179 j = i * 4;
180 k = (i - Nk) * 4;
181 RoundKey[j + 0] = RoundKey[k + 0] ^ tempa[0];
182 RoundKey[j + 1] = RoundKey[k + 1] ^ tempa[1];
183 RoundKey[j + 2] = RoundKey[k + 2] ^ tempa[2];
184 RoundKey[j + 3] = RoundKey[k + 3] ^ tempa[3];
185 }
186 }
187
AES_init_ctx(struct AES_ctx * ctx,const __u8 * key)188 static void AES_init_ctx(struct AES_ctx *ctx, const __u8 *key) {
189 KeyExpansion(ctx->RoundKey, key);
190 }
191
192 /* This function adds the round key to state. The round key is added
193 to the state by an XOR function. */
AddRoundKey(__u8 round,state_t * state,const __u8 * RoundKey)194 static void AddRoundKey(__u8 round, state_t *state, const __u8 *RoundKey) {
195 __u8 i, j;
196 for (i = 0; i < 4; ++i) {
197 for (j = 0; j < 4; ++j) {
198 (*state)[i][j] ^= RoundKey[(round * Nb * 4) + (i * Nb) + j];
199 }
200 }
201 }
202
xtime(__u8 x)203 static __u8 xtime(__u8 x) { return ((x << 1) ^ (((x >> 7) & 1) * 0x1b)); }
204
205 #define Multiply(x, y) \
206 (((y & 1) * x) ^ ((y >> 1 & 1) * xtime(x)) ^ \
207 ((y >> 2 & 1) * xtime(xtime(x))) ^ \
208 ((y >> 3 & 1) * xtime(xtime(xtime(x)))) ^ \
209 ((y >> 4 & 1) * xtime(xtime(xtime(xtime(x))))))
210
211 #define getSBoxInvert(num) (rsbox[(num)])
212
213 /* MixColumns function mixes the columns of the state matrix. The
214 method used to multiply may be difficult to understand for the
215 inexperienced. Please use the references to gain more
216 information. */
InvMixColumns(state_t * state)217 static void InvMixColumns(state_t *state) {
218 int i;
219 __u8 a, b, c, d;
220 for (i = 0; i < 4; ++i) {
221 a = (*state)[i][0];
222 b = (*state)[i][1];
223 c = (*state)[i][2];
224 d = (*state)[i][3];
225
226 (*state)[i][0] = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^
227 Multiply(d, 0x09);
228 (*state)[i][1] = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^
229 Multiply(d, 0x0d);
230 (*state)[i][2] = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^
231 Multiply(d, 0x0b);
232 (*state)[i][3] = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^
233 Multiply(d, 0x0e);
234 }
235 }
236
237 extern __u32 LINUX_KERNEL_VERSION __kconfig;
238
239 /* The SubBytes Function Substitutes the values in the state matrix
240 with values in an S-box. */
InvSubBytes(state_t * state)241 static void InvSubBytes(state_t *state) {
242 __u8 i, j;
243 if (LINUX_KERNEL_VERSION < KERNEL_VERSION(5, 10, 0)) {
244 for (i = 0; i < 4; ++i) {
245 for (j = 0; j < 4; ++j) {
246 /* Ubuntu 20.04 LTS kernel 5.4.0 needs this workaround
247 otherwise "math between map_value pointer and register with
248 unbounded min value is not allowed". 5.10.0 is a kernel
249 version that works but it might not be the minimum
250 version. */
251 __u8 k = (*state)[j][i];
252 (*state)[j][i] = k ? getSBoxInvert(k) : getSBoxInvert(0);
253 }
254 }
255 } else {
256 for (i = 0; i < 4; ++i) {
257 for (j = 0; j < 4; ++j) {
258 (*state)[j][i] = getSBoxInvert((*state)[j][i]);
259 }
260 }
261 }
262 }
263
InvShiftRows(state_t * state)264 static void InvShiftRows(state_t *state) {
265 __u8 temp;
266
267 /* Rotate first row 1 columns to right */
268 temp = (*state)[3][1];
269 (*state)[3][1] = (*state)[2][1];
270 (*state)[2][1] = (*state)[1][1];
271 (*state)[1][1] = (*state)[0][1];
272 (*state)[0][1] = temp;
273
274 /* Rotate second row 2 columns to right */
275 temp = (*state)[0][2];
276 (*state)[0][2] = (*state)[2][2];
277 (*state)[2][2] = temp;
278
279 temp = (*state)[1][2];
280 (*state)[1][2] = (*state)[3][2];
281 (*state)[3][2] = temp;
282
283 /* Rotate third row 3 columns to right */
284 temp = (*state)[0][3];
285 (*state)[0][3] = (*state)[1][3];
286 (*state)[1][3] = (*state)[2][3];
287 (*state)[2][3] = (*state)[3][3];
288 (*state)[3][3] = temp;
289 }
290
InvCipher(state_t * state,const __u8 * RoundKey)291 static void InvCipher(state_t *state, const __u8 *RoundKey) {
292 /* Add the First round key to the state before starting the
293 rounds. */
294 AddRoundKey(Nr, state, RoundKey);
295
296 /* There will be Nr rounds. The first Nr-1 rounds are identical.
297 These Nr rounds are executed in the loop below. Last one without
298 InvMixColumn() */
299 InvShiftRows(state);
300 InvSubBytes(state);
301 AddRoundKey(Nr - 1, state, RoundKey);
302 InvMixColumns(state);
303
304 InvShiftRows(state);
305 InvSubBytes(state);
306 AddRoundKey(Nr - 2, state, RoundKey);
307 InvMixColumns(state);
308
309 InvShiftRows(state);
310 InvSubBytes(state);
311 AddRoundKey(Nr - 3, state, RoundKey);
312 InvMixColumns(state);
313
314 InvShiftRows(state);
315 InvSubBytes(state);
316 AddRoundKey(Nr - 4, state, RoundKey);
317 InvMixColumns(state);
318
319 InvShiftRows(state);
320 InvSubBytes(state);
321 AddRoundKey(Nr - 5, state, RoundKey);
322 InvMixColumns(state);
323
324 InvShiftRows(state);
325 InvSubBytes(state);
326 AddRoundKey(Nr - 6, state, RoundKey);
327 InvMixColumns(state);
328
329 InvShiftRows(state);
330 InvSubBytes(state);
331 AddRoundKey(Nr - 7, state, RoundKey);
332 InvMixColumns(state);
333
334 InvShiftRows(state);
335 InvSubBytes(state);
336 AddRoundKey(Nr - 8, state, RoundKey);
337 InvMixColumns(state);
338
339 InvShiftRows(state);
340 InvSubBytes(state);
341 AddRoundKey(Nr - 9, state, RoundKey);
342 InvMixColumns(state);
343
344 InvShiftRows(state);
345 InvSubBytes(state);
346 AddRoundKey(Nr - 10, state, RoundKey);
347 }
348
AES_ECB_decrypt(const struct AES_ctx * ctx,__u8 * buf)349 static void AES_ECB_decrypt(const struct AES_ctx *ctx, __u8 *buf) {
350 /* The next function call decrypts the PlainText with the Key using
351 AES algorithm. */
352 InvCipher((state_t *)buf, ctx->RoundKey);
353 }
354
355 /* rol32: From linux kernel source code */
356
357 /**
358 * rol32 - rotate a 32-bit value left
359 * @word: value to rotate
360 * @shift: bits to roll
361 */
rol32(__u32 word,unsigned int shift)362 static inline __u32 rol32(__u32 word, unsigned int shift) {
363 return (word << shift) | (word >> ((-shift) & 31));
364 }
365
366 /* jhash.h: Jenkins hash support.
367 *
368 * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
369 *
370 * https://burtleburtle.net/bob/hash/
371 *
372 * These are the credits from Bob's sources:
373 *
374 * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
375 *
376 * These are functions for producing 32-bit hashes for hash table lookup.
377 * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
378 * are externally useful functions. Routines to test the hash are included
379 * if SELF_TEST is defined. You can use this free for any purpose. It's in
380 * the public domain. It has no warranty.
381 *
382 * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
383 *
384 * I've modified Bob's hash to be useful in the Linux kernel, and
385 * any bugs present are my fault.
386 * Jozsef
387 */
388
389 /* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
390 #define __jhash_final(a, b, c) \
391 { \
392 c ^= b; \
393 c -= rol32(b, 14); \
394 a ^= c; \
395 a -= rol32(c, 11); \
396 b ^= a; \
397 b -= rol32(a, 25); \
398 c ^= b; \
399 c -= rol32(b, 16); \
400 a ^= c; \
401 a -= rol32(c, 4); \
402 b ^= a; \
403 b -= rol32(a, 14); \
404 c ^= b; \
405 c -= rol32(b, 24); \
406 }
407
408 /* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
__jhash_nwords(__u32 a,__u32 b,__u32 c,__u32 initval)409 static inline __u32 __jhash_nwords(__u32 a, __u32 b, __u32 c, __u32 initval) {
410 a += initval;
411 b += initval;
412 c += initval;
413
414 __jhash_final(a, b, c);
415
416 return c;
417 }
418
419 /* An arbitrary initial parameter */
420 #define JHASH_INITVAL 0xdeadbeef
421
jhash_2words(__u32 a,__u32 b,__u32 initval)422 static inline __u32 jhash_2words(__u32 a, __u32 b, __u32 initval) {
423 return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
424 }
425
426 struct {
427 __uint(type, BPF_MAP_TYPE_HASH);
428 __uint(max_entries, 255);
429 __type(key, __u64);
430 __type(value, __u32);
431 } cid_prefix_map SEC(".maps");
432
433 struct {
434 __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
435 __uint(max_entries, 255);
436 __type(key, __u32);
437 __type(value, __u32);
438 } reuseport_array SEC(".maps");
439
440 struct {
441 __uint(type, BPF_MAP_TYPE_ARRAY);
442 __uint(max_entries, 3);
443 __type(key, __u32);
444 __type(value, __u64);
445 } sk_info SEC(".maps");
446
447 typedef struct quic_hd {
448 __u8 *dcid;
449 __u32 dcidlen;
450 __u32 dcid_offset;
451 __u8 type;
452 } quic_hd;
453
454 #define SV_DCIDLEN 20
455 #define MAX_DCIDLEN 20
456 #define MIN_DCIDLEN 8
457 #define CID_PREFIXLEN 8
458 #define CID_PREFIX_OFFSET 1
459
460 enum {
461 NGTCP2_PKT_INITIAL = 0x0,
462 NGTCP2_PKT_0RTT = 0x1,
463 NGTCP2_PKT_HANDSHAKE = 0x2,
464 NGTCP2_PKT_SHORT = 0x40,
465 };
466
parse_quic(quic_hd * qhd,__u8 * data,__u8 * data_end)467 static inline int parse_quic(quic_hd *qhd, __u8 *data, __u8 *data_end) {
468 __u8 *p;
469 __u64 dcidlen;
470
471 if (*data & 0x80) {
472 p = data + 1 + 4;
473
474 /* Do not check the actual DCID length because we might not buffer
475 entire DCID here. */
476 dcidlen = *p;
477
478 if (dcidlen > MAX_DCIDLEN || dcidlen < MIN_DCIDLEN) {
479 return -1;
480 }
481
482 ++p;
483
484 qhd->type = (*data & 0x30) >> 4;
485 qhd->dcid = p;
486 qhd->dcidlen = dcidlen;
487 qhd->dcid_offset = 6;
488 } else {
489 qhd->type = NGTCP2_PKT_SHORT;
490 qhd->dcid = data + 1;
491 qhd->dcidlen = SV_DCIDLEN;
492 qhd->dcid_offset = 1;
493 }
494
495 return 0;
496 }
497
hash(const __u8 * data,__u32 datalen,__u32 initval)498 static __u32 hash(const __u8 *data, __u32 datalen, __u32 initval) {
499 __u32 a, b;
500
501 a = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
502 b = (data[4] << 24) | (data[5] << 16) | (data[6] << 8) | data[7];
503
504 return jhash_2words(a, b, initval);
505 }
506
sk_index_from_dcid(const quic_hd * qhd,const struct sk_reuseport_md * reuse_md,__u64 num_socks)507 static __u32 sk_index_from_dcid(const quic_hd *qhd,
508 const struct sk_reuseport_md *reuse_md,
509 __u64 num_socks) {
510 __u32 len = qhd->dcidlen;
511 __u32 h = reuse_md->hash;
512 __u8 hbuf[8];
513
514 if (len > 16) {
515 __builtin_memset(hbuf, 0, sizeof(hbuf));
516
517 switch (len) {
518 case 20:
519 __builtin_memcpy(hbuf, qhd->dcid + 16, 4);
520 break;
521 case 19:
522 __builtin_memcpy(hbuf, qhd->dcid + 16, 3);
523 break;
524 case 18:
525 __builtin_memcpy(hbuf, qhd->dcid + 16, 2);
526 break;
527 case 17:
528 __builtin_memcpy(hbuf, qhd->dcid + 16, 1);
529 break;
530 }
531
532 h = hash(hbuf, sizeof(hbuf), h);
533 len = 16;
534 }
535
536 if (len > 8) {
537 __builtin_memset(hbuf, 0, sizeof(hbuf));
538
539 switch (len) {
540 case 16:
541 __builtin_memcpy(hbuf, qhd->dcid + 8, 8);
542 break;
543 case 15:
544 __builtin_memcpy(hbuf, qhd->dcid + 8, 7);
545 break;
546 case 14:
547 __builtin_memcpy(hbuf, qhd->dcid + 8, 6);
548 break;
549 case 13:
550 __builtin_memcpy(hbuf, qhd->dcid + 8, 5);
551 break;
552 case 12:
553 __builtin_memcpy(hbuf, qhd->dcid + 8, 4);
554 break;
555 case 11:
556 __builtin_memcpy(hbuf, qhd->dcid + 8, 3);
557 break;
558 case 10:
559 __builtin_memcpy(hbuf, qhd->dcid + 8, 2);
560 break;
561 case 9:
562 __builtin_memcpy(hbuf, qhd->dcid + 8, 1);
563 break;
564 }
565
566 h = hash(hbuf, sizeof(hbuf), h);
567 len = 8;
568 }
569
570 return hash(qhd->dcid, len, h) % num_socks;
571 }
572
573 SEC("sk_reuseport")
select_reuseport(struct sk_reuseport_md * reuse_md)574 int select_reuseport(struct sk_reuseport_md *reuse_md) {
575 __u32 sk_index, *psk_index;
576 __u64 *pnum_socks, *pkey;
577 __u32 zero = 0, key_high_idx = 1, key_low_idx = 2;
578 int rv;
579 quic_hd qhd;
580 __u8 qpktbuf[6 + MAX_DCIDLEN];
581 struct AES_ctx aes_ctx;
582 __u8 key[AES_KEYLEN];
583 __u8 *cid_prefix;
584
585 if (bpf_skb_load_bytes(reuse_md, sizeof(struct udphdr), qpktbuf,
586 sizeof(qpktbuf)) != 0) {
587 return SK_DROP;
588 }
589
590 pnum_socks = bpf_map_lookup_elem(&sk_info, &zero);
591 if (pnum_socks == NULL) {
592 return SK_DROP;
593 }
594
595 pkey = bpf_map_lookup_elem(&sk_info, &key_high_idx);
596 if (pkey == NULL) {
597 return SK_DROP;
598 }
599
600 __builtin_memcpy(key, pkey, sizeof(*pkey));
601
602 pkey = bpf_map_lookup_elem(&sk_info, &key_low_idx);
603 if (pkey == NULL) {
604 return SK_DROP;
605 }
606
607 __builtin_memcpy(key + sizeof(*pkey), pkey, sizeof(*pkey));
608
609 rv = parse_quic(&qhd, qpktbuf, qpktbuf + sizeof(qpktbuf));
610 if (rv != 0) {
611 return SK_DROP;
612 }
613
614 AES_init_ctx(&aes_ctx, key);
615
616 switch (qhd.type) {
617 case NGTCP2_PKT_INITIAL:
618 case NGTCP2_PKT_0RTT:
619 if (qhd.dcidlen == SV_DCIDLEN) {
620 cid_prefix = qhd.dcid + CID_PREFIX_OFFSET;
621 AES_ECB_decrypt(&aes_ctx, cid_prefix);
622
623 psk_index = bpf_map_lookup_elem(&cid_prefix_map, cid_prefix);
624 if (psk_index != NULL) {
625 sk_index = *psk_index;
626
627 break;
628 }
629 }
630
631 sk_index = sk_index_from_dcid(&qhd, reuse_md, *pnum_socks);
632
633 break;
634 case NGTCP2_PKT_HANDSHAKE:
635 case NGTCP2_PKT_SHORT:
636 if (qhd.dcidlen != SV_DCIDLEN) {
637 return SK_DROP;
638 }
639
640 cid_prefix = qhd.dcid + CID_PREFIX_OFFSET;
641 AES_ECB_decrypt(&aes_ctx, cid_prefix);
642
643 psk_index = bpf_map_lookup_elem(&cid_prefix_map, cid_prefix);
644 if (psk_index == NULL) {
645 sk_index = sk_index_from_dcid(&qhd, reuse_md, *pnum_socks);
646
647 break;
648 }
649
650 sk_index = *psk_index;
651
652 break;
653 default:
654 return SK_DROP;
655 }
656
657 rv = bpf_sk_select_reuseport(reuse_md, &reuseport_array, &sk_index, 0);
658 if (rv != 0) {
659 return SK_DROP;
660 }
661
662 return SK_PASS;
663 }
664