• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * nghttp2 - HTTP/2 C Library
3  *
4  * Copyright (c) 2021 Tatsuhiro Tsujikawa
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining
7  * a copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sublicense, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 #include <linux/udp.h>
26 #include <linux/bpf.h>
27 
28 #include <bpf/bpf_helpers.h>
29 
30 /*
31  * How to compile:
32  *
33  * clang-12 -O2 -Wall -target bpf -g -c reuseport_kern.c -o reuseport_kern.o \
34  *   -I/path/to/kernel/include
35  *
36  * See
37  * https://www.kernel.org/doc/Documentation/kbuild/headers_install.txt
38  * how to install kernel header files.
39  */
40 
41 /* AES_CBC_decrypt_buffer: https://github.com/kokke/tiny-AES-c
42    License is Public Domain.  Commit hash:
43    12e7744b4919e9d55de75b7ab566326a1c8e7a67 */
44 
45 #define AES_BLOCKLEN                                                           \
46   16 /* Block length in bytes - AES is 128b block                              \
47         only */
48 
49 #define AES_KEYLEN 16 /* Key length in bytes */
50 #define AES_keyExpSize 176
51 
52 struct AES_ctx {
53   __u8 RoundKey[AES_keyExpSize];
54 };
55 
56 /* The number of columns comprising a state in AES. This is a constant
57    in AES. Value=4 */
58 #define Nb 4
59 
60 #define Nk 4  /* The number of 32 bit words in a key. */
61 #define Nr 10 /* The number of rounds in AES Cipher. */
62 
63 /* state - array holding the intermediate results during
64    decryption. */
65 typedef __u8 state_t[4][4];
66 
67 /* The lookup-tables are marked const so they can be placed in
68    read-only storage instead of RAM The numbers below can be computed
69    dynamically trading ROM for RAM - This can be useful in (embedded)
70    bootloader applications, where ROM is often limited. */
71 static const __u8 sbox[256] = {
72     /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
73     0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b,
74     0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
75     0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26,
76     0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
77     0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
78     0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
79     0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed,
80     0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
81     0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f,
82     0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
83     0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec,
84     0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
85     0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14,
86     0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
87     0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d,
88     0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
89     0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f,
90     0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
91     0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11,
92     0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
93     0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f,
94     0xb0, 0x54, 0xbb, 0x16};
95 
96 static const __u8 rsbox[256] = {
97     0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e,
98     0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
99     0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32,
100     0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
101     0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49,
102     0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
103     0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50,
104     0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
105     0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05,
106     0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
107     0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41,
108     0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
109     0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8,
110     0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
111     0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b,
112     0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
113     0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59,
114     0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
115     0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d,
116     0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
117     0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63,
118     0x55, 0x21, 0x0c, 0x7d};
119 
120 /* The round constant word array, Rcon[i], contains the values given
121    by x to the power (i-1) being powers of x (x is denoted as {02}) in
122    the field GF(2^8) */
123 static const __u8 Rcon[11] = {0x8d, 0x01, 0x02, 0x04, 0x08, 0x10,
124                               0x20, 0x40, 0x80, 0x1b, 0x36};
125 
126 #define getSBoxValue(num) (sbox[(num)])
127 
128 /* This function produces Nb(Nr+1) round keys. The round keys are used
129    in each round to decrypt the states. */
KeyExpansion(__u8 * RoundKey,const __u8 * Key)130 static void KeyExpansion(__u8 *RoundKey, const __u8 *Key) {
131   unsigned i, j, k;
132   __u8 tempa[4]; /* Used for the column/row operations */
133 
134   /* The first round key is the key itself. */
135   for (i = 0; i < Nk; ++i) {
136     RoundKey[(i * 4) + 0] = Key[(i * 4) + 0];
137     RoundKey[(i * 4) + 1] = Key[(i * 4) + 1];
138     RoundKey[(i * 4) + 2] = Key[(i * 4) + 2];
139     RoundKey[(i * 4) + 3] = Key[(i * 4) + 3];
140   }
141 
142   /* All other round keys are found from the previous round keys. */
143   for (i = Nk; i < Nb * (Nr + 1); ++i) {
144     {
145       k = (i - 1) * 4;
146       tempa[0] = RoundKey[k + 0];
147       tempa[1] = RoundKey[k + 1];
148       tempa[2] = RoundKey[k + 2];
149       tempa[3] = RoundKey[k + 3];
150     }
151 
152     if (i % Nk == 0) {
153       /* This function shifts the 4 bytes in a word to the left once.
154          [a0,a1,a2,a3] becomes [a1,a2,a3,a0] */
155 
156       /* Function RotWord() */
157       {
158         const __u8 u8tmp = tempa[0];
159         tempa[0] = tempa[1];
160         tempa[1] = tempa[2];
161         tempa[2] = tempa[3];
162         tempa[3] = u8tmp;
163       }
164 
165       /* SubWord() is a function that takes a four-byte input word and
166          applies the S-box to each of the four bytes to produce an
167          output word. */
168 
169       /* Function Subword() */
170       {
171         tempa[0] = getSBoxValue(tempa[0]);
172         tempa[1] = getSBoxValue(tempa[1]);
173         tempa[2] = getSBoxValue(tempa[2]);
174         tempa[3] = getSBoxValue(tempa[3]);
175       }
176 
177       tempa[0] = tempa[0] ^ Rcon[i / Nk];
178     }
179     j = i * 4;
180     k = (i - Nk) * 4;
181     RoundKey[j + 0] = RoundKey[k + 0] ^ tempa[0];
182     RoundKey[j + 1] = RoundKey[k + 1] ^ tempa[1];
183     RoundKey[j + 2] = RoundKey[k + 2] ^ tempa[2];
184     RoundKey[j + 3] = RoundKey[k + 3] ^ tempa[3];
185   }
186 }
187 
AES_init_ctx(struct AES_ctx * ctx,const __u8 * key)188 static void AES_init_ctx(struct AES_ctx *ctx, const __u8 *key) {
189   KeyExpansion(ctx->RoundKey, key);
190 }
191 
192 /* This function adds the round key to state.  The round key is added
193    to the state by an XOR function. */
AddRoundKey(__u8 round,state_t * state,const __u8 * RoundKey)194 static void AddRoundKey(__u8 round, state_t *state, const __u8 *RoundKey) {
195   __u8 i, j;
196   for (i = 0; i < 4; ++i) {
197     for (j = 0; j < 4; ++j) {
198       (*state)[i][j] ^= RoundKey[(round * Nb * 4) + (i * Nb) + j];
199     }
200   }
201 }
202 
xtime(__u8 x)203 static __u8 xtime(__u8 x) { return ((x << 1) ^ (((x >> 7) & 1) * 0x1b)); }
204 
205 #define Multiply(x, y)                                                         \
206   (((y & 1) * x) ^ ((y >> 1 & 1) * xtime(x)) ^                                 \
207    ((y >> 2 & 1) * xtime(xtime(x))) ^                                          \
208    ((y >> 3 & 1) * xtime(xtime(xtime(x)))) ^                                   \
209    ((y >> 4 & 1) * xtime(xtime(xtime(xtime(x))))))
210 
211 #define getSBoxInvert(num) (rsbox[(num)])
212 
213 /* MixColumns function mixes the columns of the state matrix.  The
214    method used to multiply may be difficult to understand for the
215    inexperienced. Please use the references to gain more
216    information. */
InvMixColumns(state_t * state)217 static void InvMixColumns(state_t *state) {
218   int i;
219   __u8 a, b, c, d;
220   for (i = 0; i < 4; ++i) {
221     a = (*state)[i][0];
222     b = (*state)[i][1];
223     c = (*state)[i][2];
224     d = (*state)[i][3];
225 
226     (*state)[i][0] = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^
227                      Multiply(d, 0x09);
228     (*state)[i][1] = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^
229                      Multiply(d, 0x0d);
230     (*state)[i][2] = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^
231                      Multiply(d, 0x0b);
232     (*state)[i][3] = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^
233                      Multiply(d, 0x0e);
234   }
235 }
236 
237 extern __u32 LINUX_KERNEL_VERSION __kconfig;
238 
239 /* The SubBytes Function Substitutes the values in the state matrix
240    with values in an S-box. */
InvSubBytes(state_t * state)241 static void InvSubBytes(state_t *state) {
242   __u8 i, j;
243   if (LINUX_KERNEL_VERSION < KERNEL_VERSION(5, 10, 0)) {
244     for (i = 0; i < 4; ++i) {
245       for (j = 0; j < 4; ++j) {
246         /* Ubuntu 20.04 LTS kernel 5.4.0 needs this workaround
247            otherwise "math between map_value pointer and register with
248            unbounded min value is not allowed".  5.10.0 is a kernel
249            version that works but it might not be the minimum
250            version.  */
251         __u8 k = (*state)[j][i];
252         (*state)[j][i] = k ? getSBoxInvert(k) : getSBoxInvert(0);
253       }
254     }
255   } else {
256     for (i = 0; i < 4; ++i) {
257       for (j = 0; j < 4; ++j) {
258         (*state)[j][i] = getSBoxInvert((*state)[j][i]);
259       }
260     }
261   }
262 }
263 
InvShiftRows(state_t * state)264 static void InvShiftRows(state_t *state) {
265   __u8 temp;
266 
267   /* Rotate first row 1 columns to right */
268   temp = (*state)[3][1];
269   (*state)[3][1] = (*state)[2][1];
270   (*state)[2][1] = (*state)[1][1];
271   (*state)[1][1] = (*state)[0][1];
272   (*state)[0][1] = temp;
273 
274   /* Rotate second row 2 columns to right */
275   temp = (*state)[0][2];
276   (*state)[0][2] = (*state)[2][2];
277   (*state)[2][2] = temp;
278 
279   temp = (*state)[1][2];
280   (*state)[1][2] = (*state)[3][2];
281   (*state)[3][2] = temp;
282 
283   /* Rotate third row 3 columns to right */
284   temp = (*state)[0][3];
285   (*state)[0][3] = (*state)[1][3];
286   (*state)[1][3] = (*state)[2][3];
287   (*state)[2][3] = (*state)[3][3];
288   (*state)[3][3] = temp;
289 }
290 
InvCipher(state_t * state,const __u8 * RoundKey)291 static void InvCipher(state_t *state, const __u8 *RoundKey) {
292   /* Add the First round key to the state before starting the
293      rounds. */
294   AddRoundKey(Nr, state, RoundKey);
295 
296   /* There will be Nr rounds.  The first Nr-1 rounds are identical.
297      These Nr rounds are executed in the loop below.  Last one without
298      InvMixColumn() */
299   InvShiftRows(state);
300   InvSubBytes(state);
301   AddRoundKey(Nr - 1, state, RoundKey);
302   InvMixColumns(state);
303 
304   InvShiftRows(state);
305   InvSubBytes(state);
306   AddRoundKey(Nr - 2, state, RoundKey);
307   InvMixColumns(state);
308 
309   InvShiftRows(state);
310   InvSubBytes(state);
311   AddRoundKey(Nr - 3, state, RoundKey);
312   InvMixColumns(state);
313 
314   InvShiftRows(state);
315   InvSubBytes(state);
316   AddRoundKey(Nr - 4, state, RoundKey);
317   InvMixColumns(state);
318 
319   InvShiftRows(state);
320   InvSubBytes(state);
321   AddRoundKey(Nr - 5, state, RoundKey);
322   InvMixColumns(state);
323 
324   InvShiftRows(state);
325   InvSubBytes(state);
326   AddRoundKey(Nr - 6, state, RoundKey);
327   InvMixColumns(state);
328 
329   InvShiftRows(state);
330   InvSubBytes(state);
331   AddRoundKey(Nr - 7, state, RoundKey);
332   InvMixColumns(state);
333 
334   InvShiftRows(state);
335   InvSubBytes(state);
336   AddRoundKey(Nr - 8, state, RoundKey);
337   InvMixColumns(state);
338 
339   InvShiftRows(state);
340   InvSubBytes(state);
341   AddRoundKey(Nr - 9, state, RoundKey);
342   InvMixColumns(state);
343 
344   InvShiftRows(state);
345   InvSubBytes(state);
346   AddRoundKey(Nr - 10, state, RoundKey);
347 }
348 
AES_ECB_decrypt(const struct AES_ctx * ctx,__u8 * buf)349 static void AES_ECB_decrypt(const struct AES_ctx *ctx, __u8 *buf) {
350   /* The next function call decrypts the PlainText with the Key using
351      AES algorithm. */
352   InvCipher((state_t *)buf, ctx->RoundKey);
353 }
354 
355 /* rol32: From linux kernel source code */
356 
357 /**
358  * rol32 - rotate a 32-bit value left
359  * @word: value to rotate
360  * @shift: bits to roll
361  */
rol32(__u32 word,unsigned int shift)362 static inline __u32 rol32(__u32 word, unsigned int shift) {
363   return (word << shift) | (word >> ((-shift) & 31));
364 }
365 
366 /* jhash.h: Jenkins hash support.
367  *
368  * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
369  *
370  * https://burtleburtle.net/bob/hash/
371  *
372  * These are the credits from Bob's sources:
373  *
374  * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
375  *
376  * These are functions for producing 32-bit hashes for hash table lookup.
377  * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
378  * are externally useful functions.  Routines to test the hash are included
379  * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
380  * the public domain.  It has no warranty.
381  *
382  * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
383  *
384  * I've modified Bob's hash to be useful in the Linux kernel, and
385  * any bugs present are my fault.
386  * Jozsef
387  */
388 
389 /* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
390 #define __jhash_final(a, b, c)                                                 \
391   {                                                                            \
392     c ^= b;                                                                    \
393     c -= rol32(b, 14);                                                         \
394     a ^= c;                                                                    \
395     a -= rol32(c, 11);                                                         \
396     b ^= a;                                                                    \
397     b -= rol32(a, 25);                                                         \
398     c ^= b;                                                                    \
399     c -= rol32(b, 16);                                                         \
400     a ^= c;                                                                    \
401     a -= rol32(c, 4);                                                          \
402     b ^= a;                                                                    \
403     b -= rol32(a, 14);                                                         \
404     c ^= b;                                                                    \
405     c -= rol32(b, 24);                                                         \
406   }
407 
408 /* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
__jhash_nwords(__u32 a,__u32 b,__u32 c,__u32 initval)409 static inline __u32 __jhash_nwords(__u32 a, __u32 b, __u32 c, __u32 initval) {
410   a += initval;
411   b += initval;
412   c += initval;
413 
414   __jhash_final(a, b, c);
415 
416   return c;
417 }
418 
419 /* An arbitrary initial parameter */
420 #define JHASH_INITVAL 0xdeadbeef
421 
jhash_2words(__u32 a,__u32 b,__u32 initval)422 static inline __u32 jhash_2words(__u32 a, __u32 b, __u32 initval) {
423   return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
424 }
425 
426 struct {
427   __uint(type, BPF_MAP_TYPE_HASH);
428   __uint(max_entries, 255);
429   __type(key, __u64);
430   __type(value, __u32);
431 } cid_prefix_map SEC(".maps");
432 
433 struct {
434   __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
435   __uint(max_entries, 255);
436   __type(key, __u32);
437   __type(value, __u32);
438 } reuseport_array SEC(".maps");
439 
440 struct {
441   __uint(type, BPF_MAP_TYPE_ARRAY);
442   __uint(max_entries, 3);
443   __type(key, __u32);
444   __type(value, __u64);
445 } sk_info SEC(".maps");
446 
447 typedef struct quic_hd {
448   __u8 *dcid;
449   __u32 dcidlen;
450   __u32 dcid_offset;
451   __u8 type;
452 } quic_hd;
453 
454 #define SV_DCIDLEN 20
455 #define MAX_DCIDLEN 20
456 #define MIN_DCIDLEN 8
457 #define CID_PREFIXLEN 8
458 #define CID_PREFIX_OFFSET 1
459 
460 enum {
461   NGTCP2_PKT_INITIAL = 0x0,
462   NGTCP2_PKT_0RTT = 0x1,
463   NGTCP2_PKT_HANDSHAKE = 0x2,
464   NGTCP2_PKT_SHORT = 0x40,
465 };
466 
parse_quic(quic_hd * qhd,__u8 * data,__u8 * data_end)467 static inline int parse_quic(quic_hd *qhd, __u8 *data, __u8 *data_end) {
468   __u8 *p;
469   __u64 dcidlen;
470 
471   if (*data & 0x80) {
472     p = data + 1 + 4;
473 
474     /* Do not check the actual DCID length because we might not buffer
475        entire DCID here. */
476     dcidlen = *p;
477 
478     if (dcidlen > MAX_DCIDLEN || dcidlen < MIN_DCIDLEN) {
479       return -1;
480     }
481 
482     ++p;
483 
484     qhd->type = (*data & 0x30) >> 4;
485     qhd->dcid = p;
486     qhd->dcidlen = dcidlen;
487     qhd->dcid_offset = 6;
488   } else {
489     qhd->type = NGTCP2_PKT_SHORT;
490     qhd->dcid = data + 1;
491     qhd->dcidlen = SV_DCIDLEN;
492     qhd->dcid_offset = 1;
493   }
494 
495   return 0;
496 }
497 
hash(const __u8 * data,__u32 datalen,__u32 initval)498 static __u32 hash(const __u8 *data, __u32 datalen, __u32 initval) {
499   __u32 a, b;
500 
501   a = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
502   b = (data[4] << 24) | (data[5] << 16) | (data[6] << 8) | data[7];
503 
504   return jhash_2words(a, b, initval);
505 }
506 
sk_index_from_dcid(const quic_hd * qhd,const struct sk_reuseport_md * reuse_md,__u64 num_socks)507 static __u32 sk_index_from_dcid(const quic_hd *qhd,
508                                 const struct sk_reuseport_md *reuse_md,
509                                 __u64 num_socks) {
510   __u32 len = qhd->dcidlen;
511   __u32 h = reuse_md->hash;
512   __u8 hbuf[8];
513 
514   if (len > 16) {
515     __builtin_memset(hbuf, 0, sizeof(hbuf));
516 
517     switch (len) {
518     case 20:
519       __builtin_memcpy(hbuf, qhd->dcid + 16, 4);
520       break;
521     case 19:
522       __builtin_memcpy(hbuf, qhd->dcid + 16, 3);
523       break;
524     case 18:
525       __builtin_memcpy(hbuf, qhd->dcid + 16, 2);
526       break;
527     case 17:
528       __builtin_memcpy(hbuf, qhd->dcid + 16, 1);
529       break;
530     }
531 
532     h = hash(hbuf, sizeof(hbuf), h);
533     len = 16;
534   }
535 
536   if (len > 8) {
537     __builtin_memset(hbuf, 0, sizeof(hbuf));
538 
539     switch (len) {
540     case 16:
541       __builtin_memcpy(hbuf, qhd->dcid + 8, 8);
542       break;
543     case 15:
544       __builtin_memcpy(hbuf, qhd->dcid + 8, 7);
545       break;
546     case 14:
547       __builtin_memcpy(hbuf, qhd->dcid + 8, 6);
548       break;
549     case 13:
550       __builtin_memcpy(hbuf, qhd->dcid + 8, 5);
551       break;
552     case 12:
553       __builtin_memcpy(hbuf, qhd->dcid + 8, 4);
554       break;
555     case 11:
556       __builtin_memcpy(hbuf, qhd->dcid + 8, 3);
557       break;
558     case 10:
559       __builtin_memcpy(hbuf, qhd->dcid + 8, 2);
560       break;
561     case 9:
562       __builtin_memcpy(hbuf, qhd->dcid + 8, 1);
563       break;
564     }
565 
566     h = hash(hbuf, sizeof(hbuf), h);
567     len = 8;
568   }
569 
570   return hash(qhd->dcid, len, h) % num_socks;
571 }
572 
573 SEC("sk_reuseport")
select_reuseport(struct sk_reuseport_md * reuse_md)574 int select_reuseport(struct sk_reuseport_md *reuse_md) {
575   __u32 sk_index, *psk_index;
576   __u64 *pnum_socks, *pkey;
577   __u32 zero = 0, key_high_idx = 1, key_low_idx = 2;
578   int rv;
579   quic_hd qhd;
580   __u8 qpktbuf[6 + MAX_DCIDLEN];
581   struct AES_ctx aes_ctx;
582   __u8 key[AES_KEYLEN];
583   __u8 *cid_prefix;
584 
585   if (bpf_skb_load_bytes(reuse_md, sizeof(struct udphdr), qpktbuf,
586                          sizeof(qpktbuf)) != 0) {
587     return SK_DROP;
588   }
589 
590   pnum_socks = bpf_map_lookup_elem(&sk_info, &zero);
591   if (pnum_socks == NULL) {
592     return SK_DROP;
593   }
594 
595   pkey = bpf_map_lookup_elem(&sk_info, &key_high_idx);
596   if (pkey == NULL) {
597     return SK_DROP;
598   }
599 
600   __builtin_memcpy(key, pkey, sizeof(*pkey));
601 
602   pkey = bpf_map_lookup_elem(&sk_info, &key_low_idx);
603   if (pkey == NULL) {
604     return SK_DROP;
605   }
606 
607   __builtin_memcpy(key + sizeof(*pkey), pkey, sizeof(*pkey));
608 
609   rv = parse_quic(&qhd, qpktbuf, qpktbuf + sizeof(qpktbuf));
610   if (rv != 0) {
611     return SK_DROP;
612   }
613 
614   AES_init_ctx(&aes_ctx, key);
615 
616   switch (qhd.type) {
617   case NGTCP2_PKT_INITIAL:
618   case NGTCP2_PKT_0RTT:
619     if (qhd.dcidlen == SV_DCIDLEN) {
620       cid_prefix = qhd.dcid + CID_PREFIX_OFFSET;
621       AES_ECB_decrypt(&aes_ctx, cid_prefix);
622 
623       psk_index = bpf_map_lookup_elem(&cid_prefix_map, cid_prefix);
624       if (psk_index != NULL) {
625         sk_index = *psk_index;
626 
627         break;
628       }
629     }
630 
631     sk_index = sk_index_from_dcid(&qhd, reuse_md, *pnum_socks);
632 
633     break;
634   case NGTCP2_PKT_HANDSHAKE:
635   case NGTCP2_PKT_SHORT:
636     if (qhd.dcidlen != SV_DCIDLEN) {
637       return SK_DROP;
638     }
639 
640     cid_prefix = qhd.dcid + CID_PREFIX_OFFSET;
641     AES_ECB_decrypt(&aes_ctx, cid_prefix);
642 
643     psk_index = bpf_map_lookup_elem(&cid_prefix_map, cid_prefix);
644     if (psk_index == NULL) {
645       sk_index = sk_index_from_dcid(&qhd, reuse_md, *pnum_socks);
646 
647       break;
648     }
649 
650     sk_index = *psk_index;
651 
652     break;
653   default:
654     return SK_DROP;
655   }
656 
657   rv = bpf_sk_select_reuseport(reuse_md, &reuseport_array, &sk_index, 0);
658   if (rv != 0) {
659     return SK_DROP;
660   }
661 
662   return SK_PASS;
663 }
664