1 /*
2 chacha-merged.c version 20080118
3 D. J. Bernstein
4 Public domain.
5 */
6 
7 #include <libwebsockets.h>
8 #include "lws-ssh.h"
9 
10 #include <string.h>
11 #include <stdlib.h>
12 
13 struct chacha_ctx {
14 	u_int input[16];
15 };
16 
17 #define CHACHA_MINKEYLEN 	16
18 #define CHACHA_NONCELEN		8
19 #define CHACHA_CTRLEN		8
20 #define CHACHA_STATELEN		(CHACHA_NONCELEN+CHACHA_CTRLEN)
21 #define CHACHA_BLOCKLEN		64
22 
23 typedef unsigned char u8;
24 typedef unsigned int u32;
25 
26 typedef struct chacha_ctx chacha_ctx;
27 
28 #define U8C(v) (v##U)
29 #define U32C(v) (v##U)
30 
31 #define U8V(v) ((u8)((v) & U8C(0xFF)))
32 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
33 
34 #define ROTL32(v, n) \
35   (U32V((v) << (n)) | ((v) >> (32 - (n))))
36 
37 #define U8TO32_LITTLE(p) \
38   (((u32)((p)[0])      ) | \
39    ((u32)((p)[1]) <<  8) | \
40    ((u32)((p)[2]) << 16) | \
41    ((u32)((p)[3]) << 24))
42 
43 #define U32TO8_LITTLE(p, v) \
44   do { \
45     (p)[0] = U8V((v)      ); \
46     (p)[1] = U8V((v) >>  8); \
47     (p)[2] = U8V((v) >> 16); \
48     (p)[3] = U8V((v) >> 24); \
49   } while (0)
50 
51 #define ROTATE(v,c) (ROTL32(v,c))
52 #define XOR(v,w) ((v) ^ (w))
53 #define PLUS(v,w) (U32V((v) + (w)))
54 #define PLUSONE(v) (PLUS((v),1))
55 
56 #define QUARTERROUND(a,b,c,d) \
57   a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
58   c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
59   a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
60   c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
61 
62 static const char sigma[16] = "expand 32-byte k";
63 static const char tau[16] = "expand 16-byte k";
64 
65 void
chacha_keysetup(chacha_ctx * x,const u8 * k,u32 kbits)66 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
67 {
68   const char *constants;
69 
70   x->input[4] = U8TO32_LITTLE(k + 0);
71   x->input[5] = U8TO32_LITTLE(k + 4);
72   x->input[6] = U8TO32_LITTLE(k + 8);
73   x->input[7] = U8TO32_LITTLE(k + 12);
74   if (kbits == 256) { /* recommended */
75     k += 16;
76     constants = sigma;
77   } else { /* kbits == 128 */
78     constants = tau;
79   }
80   x->input[8] = U8TO32_LITTLE(k + 0);
81   x->input[9] = U8TO32_LITTLE(k + 4);
82   x->input[10] = U8TO32_LITTLE(k + 8);
83   x->input[11] = U8TO32_LITTLE(k + 12);
84   x->input[0] = U8TO32_LITTLE(constants + 0);
85   x->input[1] = U8TO32_LITTLE(constants + 4);
86   x->input[2] = U8TO32_LITTLE(constants + 8);
87   x->input[3] = U8TO32_LITTLE(constants + 12);
88 }
89 
90 void
chacha_ivsetup(chacha_ctx * x,const u8 * iv,const u8 * counter)91 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
92 {
93   x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
94   x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
95   x->input[14] = U8TO32_LITTLE(iv + 0);
96   x->input[15] = U8TO32_LITTLE(iv + 4);
97 }
98 
99 void
chacha_encrypt_bytes(chacha_ctx * x,const u8 * m,u8 * c,u32 bytes)100 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
101 {
102   u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
103   u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
104   u8 *ctarget = NULL;
105   u8 tmp[64];
106   u_int i;
107 
108   if (!bytes) return;
109 
110   j0 = x->input[0];
111   j1 = x->input[1];
112   j2 = x->input[2];
113   j3 = x->input[3];
114   j4 = x->input[4];
115   j5 = x->input[5];
116   j6 = x->input[6];
117   j7 = x->input[7];
118   j8 = x->input[8];
119   j9 = x->input[9];
120   j10 = x->input[10];
121   j11 = x->input[11];
122   j12 = x->input[12];
123   j13 = x->input[13];
124   j14 = x->input[14];
125   j15 = x->input[15];
126 
127   for (;;) {
128     if (bytes < 64) {
129       for (i = 0;i < bytes;++i) tmp[i] = m[i];
130       m = tmp;
131       ctarget = c;
132       c = tmp;
133     }
134     x0 = j0;
135     x1 = j1;
136     x2 = j2;
137     x3 = j3;
138     x4 = j4;
139     x5 = j5;
140     x6 = j6;
141     x7 = j7;
142     x8 = j8;
143     x9 = j9;
144     x10 = j10;
145     x11 = j11;
146     x12 = j12;
147     x13 = j13;
148     x14 = j14;
149     x15 = j15;
150     for (i = 20;i > 0;i -= 2) {
151       QUARTERROUND( x0, x4, x8,x12)
152       QUARTERROUND( x1, x5, x9,x13)
153       QUARTERROUND( x2, x6,x10,x14)
154       QUARTERROUND( x3, x7,x11,x15)
155       QUARTERROUND( x0, x5,x10,x15)
156       QUARTERROUND( x1, x6,x11,x12)
157       QUARTERROUND( x2, x7, x8,x13)
158       QUARTERROUND( x3, x4, x9,x14)
159     }
160     x0 = PLUS(x0,j0);
161     x1 = PLUS(x1,j1);
162     x2 = PLUS(x2,j2);
163     x3 = PLUS(x3,j3);
164     x4 = PLUS(x4,j4);
165     x5 = PLUS(x5,j5);
166     x6 = PLUS(x6,j6);
167     x7 = PLUS(x7,j7);
168     x8 = PLUS(x8,j8);
169     x9 = PLUS(x9,j9);
170     x10 = PLUS(x10,j10);
171     x11 = PLUS(x11,j11);
172     x12 = PLUS(x12,j12);
173     x13 = PLUS(x13,j13);
174     x14 = PLUS(x14,j14);
175     x15 = PLUS(x15,j15);
176 
177     x0 = XOR(x0,U8TO32_LITTLE(m + 0));
178     x1 = XOR(x1,U8TO32_LITTLE(m + 4));
179     x2 = XOR(x2,U8TO32_LITTLE(m + 8));
180     x3 = XOR(x3,U8TO32_LITTLE(m + 12));
181     x4 = XOR(x4,U8TO32_LITTLE(m + 16));
182     x5 = XOR(x5,U8TO32_LITTLE(m + 20));
183     x6 = XOR(x6,U8TO32_LITTLE(m + 24));
184     x7 = XOR(x7,U8TO32_LITTLE(m + 28));
185     x8 = XOR(x8,U8TO32_LITTLE(m + 32));
186     x9 = XOR(x9,U8TO32_LITTLE(m + 36));
187     x10 = XOR(x10,U8TO32_LITTLE(m + 40));
188     x11 = XOR(x11,U8TO32_LITTLE(m + 44));
189     x12 = XOR(x12,U8TO32_LITTLE(m + 48));
190     x13 = XOR(x13,U8TO32_LITTLE(m + 52));
191     x14 = XOR(x14,U8TO32_LITTLE(m + 56));
192     x15 = XOR(x15,U8TO32_LITTLE(m + 60));
193 
194     j12 = PLUSONE(j12);
195     if (!j12)
196       j13 = PLUSONE(j13);
197       /* stopping at 2^70 bytes per nonce is user's responsibility */
198 
199     U32TO8_LITTLE(c + 0,x0);
200     U32TO8_LITTLE(c + 4,x1);
201     U32TO8_LITTLE(c + 8,x2);
202     U32TO8_LITTLE(c + 12,x3);
203     U32TO8_LITTLE(c + 16,x4);
204     U32TO8_LITTLE(c + 20,x5);
205     U32TO8_LITTLE(c + 24,x6);
206     U32TO8_LITTLE(c + 28,x7);
207     U32TO8_LITTLE(c + 32,x8);
208     U32TO8_LITTLE(c + 36,x9);
209     U32TO8_LITTLE(c + 40,x10);
210     U32TO8_LITTLE(c + 44,x11);
211     U32TO8_LITTLE(c + 48,x12);
212     U32TO8_LITTLE(c + 52,x13);
213     U32TO8_LITTLE(c + 56,x14);
214     U32TO8_LITTLE(c + 60,x15);
215 
216     if (bytes <= 64) {
217       if (bytes < 64) {
218         for (i = 0;i < bytes;++i) ctarget[i] = c[i];
219       }
220       x->input[12] = j12;
221       x->input[13] = j13;
222       return;
223     }
224     bytes -= 64;
225     c += 64;
226     m += 64;
227   }
228 }
229 
230 struct lws_cipher_chacha {
231 	struct chacha_ctx ccctx[2];
232 };
233 
234 #define K_1(_keys) &((struct lws_cipher_chacha *)_keys->cipher)->ccctx[0]
235 #define K_2(_keys) &((struct lws_cipher_chacha *)_keys->cipher)->ccctx[1]
236 
237 int
lws_chacha_activate(struct lws_ssh_keys * keys)238 lws_chacha_activate(struct lws_ssh_keys *keys)
239 {
240 	if (keys->cipher) {
241 		free(keys->cipher);
242 		keys->cipher = NULL;
243 	}
244 
245 	keys->cipher = malloc(sizeof(struct lws_cipher_chacha));
246 	if (!keys->cipher)
247 		return 1;
248 
249 	memset(keys->cipher, 0, sizeof(struct lws_cipher_chacha));
250 
251 	/* uses 2 x 256-bit keys, so 512 bits (64 bytes) needed */
252 	chacha_keysetup(K_2(keys), keys->key[SSH_KEYIDX_ENC], 256);
253 	chacha_keysetup(K_1(keys), &keys->key[SSH_KEYIDX_ENC][32], 256);
254 
255 	keys->valid = 1;
256 	keys->full_length = 1;
257 	keys->padding_alignment = 8; // CHACHA_BLOCKLEN;
258 	keys->MAC_length = POLY1305_TAGLEN;
259 
260 	return 0;
261 }
262 
263 void
lws_chacha_destroy(struct lws_ssh_keys * keys)264 lws_chacha_destroy(struct lws_ssh_keys *keys)
265 {
266 	if (keys->cipher) {
267 		free(keys->cipher);
268 		keys->cipher = NULL;
269 	}
270 }
271 
272 uint32_t
lws_chachapoly_get_length(struct lws_ssh_keys * keys,uint32_t seq,const uint8_t * in4)273 lws_chachapoly_get_length(struct lws_ssh_keys *keys, uint32_t seq,
274 			  const uint8_t *in4)
275 {
276         uint8_t buf[4], seqbuf[8];
277 
278 	/*
279 	 * When receiving a packet, the length must be decrypted first.  When 4
280 	 * bytes of ciphertext length have been received, they may be decrypted
281 	 * using the K_1 key, a nonce consisting of the packet sequence number
282 	 * encoded as a uint64 under the usual SSH wire encoding and a zero
283 	 * block counter to obtain the plaintext length.
284 	 */
285         POKE_U64(seqbuf, seq);
286 	chacha_ivsetup(K_1(keys), seqbuf, NULL);
287         chacha_encrypt_bytes(K_1(keys), in4, buf, 4);
288 
289 	return PEEK_U32(buf);
290 }
291 
292 /*
293  * chachapoly_crypt() operates as following:
294  * En/decrypt with header key 'aadlen' bytes from 'src', storing result
295  * to 'dest'. The ciphertext here is treated as additional authenticated
296  * data for MAC calculation.
297  * En/decrypt 'len' bytes at offset 'aadlen' from 'src' to 'dest'. Use
298  * POLY1305_TAGLEN bytes at offset 'len'+'aadlen' as the authentication
299  * tag. This tag is written on encryption and verified on decryption.
300  */
301 int
chachapoly_crypt(struct lws_ssh_keys * keys,u_int seqnr,u_char * dest,const u_char * src,u_int len,u_int aadlen,u_int authlen,int do_encrypt)302 chachapoly_crypt(struct lws_ssh_keys *keys, u_int seqnr, u_char *dest,
303     const u_char *src, u_int len, u_int aadlen, u_int authlen, int do_encrypt)
304 {
305         u_char seqbuf[8];
306         const u_char one[8] = { 1, 0, 0, 0, 0, 0, 0, 0 }; /* NB little-endian */
307         u_char expected_tag[POLY1305_TAGLEN], poly_key[POLY1305_KEYLEN];
308         int r = 1;
309 
310         /*
311          * Run ChaCha20 once to generate the Poly1305 key. The IV is the
312          * packet sequence number.
313          */
314         memset(poly_key, 0, sizeof(poly_key));
315         POKE_U64(seqbuf, seqnr);
316         chacha_ivsetup(K_2(keys), seqbuf, NULL);
317         chacha_encrypt_bytes(K_2(keys),
318             poly_key, poly_key, sizeof(poly_key));
319 
320         /* If decrypting, check tag before anything else */
321         if (!do_encrypt) {
322                 const u_char *tag = src + aadlen + len;
323 
324                 poly1305_auth(expected_tag, src, aadlen + len, poly_key);
325                 if (lws_timingsafe_bcmp(expected_tag, tag, POLY1305_TAGLEN)) {
326                         r = 2;
327                         goto out;
328                 }
329         }
330 
331         /* Crypt additional data */
332         if (aadlen) {
333                 chacha_ivsetup(K_1(keys), seqbuf, NULL);
334                 chacha_encrypt_bytes(K_1(keys), src, dest, aadlen);
335         }
336 
337         /* Set Chacha's block counter to 1 */
338         chacha_ivsetup(K_2(keys), seqbuf, one);
339         chacha_encrypt_bytes(K_2(keys), src + aadlen, dest + aadlen, len);
340 
341         /* If encrypting, calculate and append tag */
342         if (do_encrypt) {
343                 poly1305_auth(dest + aadlen + len, dest, aadlen + len,
344                     poly_key);
345         }
346         r = 0;
347  out:
348         lws_explicit_bzero(expected_tag, sizeof(expected_tag));
349         lws_explicit_bzero(seqbuf, sizeof(seqbuf));
350         lws_explicit_bzero(poly_key, sizeof(poly_key));
351         return r;
352 }
353 
354 int
lws_chacha_decrypt(struct lws_ssh_keys * keys,uint32_t seq,const uint8_t * ct,uint32_t len,uint8_t * pt)355 lws_chacha_decrypt(struct lws_ssh_keys *keys, uint32_t seq,
356 		   const uint8_t *ct, uint32_t len, uint8_t *pt)
357 {
358 	return chachapoly_crypt(keys, seq, pt, ct, len - POLY1305_TAGLEN - 4, 4,
359 			 POLY1305_TAGLEN, 0);
360 }
361 
362 int
lws_chacha_encrypt(struct lws_ssh_keys * keys,uint32_t seq,const uint8_t * ct,uint32_t len,uint8_t * pt)363 lws_chacha_encrypt(struct lws_ssh_keys *keys, uint32_t seq,
364 		   const uint8_t *ct, uint32_t len, uint8_t *pt)
365 {
366 	return chachapoly_crypt(keys, seq, pt, ct, len - 4, 4, 0, 1);
367 }
368 
369