1 /* OPENBSD ORIGINAL: lib/libc/crypt/chacha_private.h */
2
3 /*
4 chacha-merged.c version 20080118
5 D. J. Bernstein
6 Public domain.
7 */
8
9 /* $OpenBSD: chacha_private.h,v 1.3 2022/02/28 21:56:29 dtucker Exp $ */
10
11 typedef unsigned char u8;
12 typedef unsigned int u32;
13 typedef unsigned int u_int;
14 typedef unsigned char u_char;
15
16 typedef struct
17 {
18 u32 input[16]; /* could be compressed */
19 } chacha_ctx;
20
21 #define U8C(v) (v##U)
22 #define U32C(v) (v##U)
23
24 #define U8V(v) ((u8)(v) & U8C(0xFF))
25 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
26
27 #define ROTL32(v, n) \
28 (U32V((v) << (n)) | ((v) >> (32 - (n))))
29
30 #define U8TO32_LITTLE(p) \
31 (((u32)((p)[0]) ) | \
32 ((u32)((p)[1]) << 8) | \
33 ((u32)((p)[2]) << 16) | \
34 ((u32)((p)[3]) << 24))
35
36 #define U32TO8_LITTLE(p, v) \
37 do { \
38 (p)[0] = U8V((v) ); \
39 (p)[1] = U8V((v) >> 8); \
40 (p)[2] = U8V((v) >> 16); \
41 (p)[3] = U8V((v) >> 24); \
42 } while (0)
43
44 #define ROTATE(v,c) (ROTL32(v,c))
45 #define XOR(v,w) ((v) ^ (w))
46 #define PLUS(v,w) (U32V((v) + (w)))
47 #define PLUSONE(v) (PLUS((v),1))
48
49 #define QUARTERROUND(a,b,c,d) \
50 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
51 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
52 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
53 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
54
55 static const char sigma[16] = "expand 32-byte k";
56 static const char tau[16] = "expand 16-byte k";
57
58 static void
chacha_keysetup(chacha_ctx * x,const u8 * k,u32 kbits)59 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
60 {
61 const char *constants;
62
63 x->input[4] = U8TO32_LITTLE(k + 0);
64 x->input[5] = U8TO32_LITTLE(k + 4);
65 x->input[6] = U8TO32_LITTLE(k + 8);
66 x->input[7] = U8TO32_LITTLE(k + 12);
67 if (kbits == 256) { /* recommended */
68 k += 16;
69 constants = sigma;
70 } else { /* kbits == 128 */
71 constants = tau;
72 }
73 x->input[8] = U8TO32_LITTLE(k + 0);
74 x->input[9] = U8TO32_LITTLE(k + 4);
75 x->input[10] = U8TO32_LITTLE(k + 8);
76 x->input[11] = U8TO32_LITTLE(k + 12);
77 x->input[0] = U8TO32_LITTLE(constants + 0);
78 x->input[1] = U8TO32_LITTLE(constants + 4);
79 x->input[2] = U8TO32_LITTLE(constants + 8);
80 x->input[3] = U8TO32_LITTLE(constants + 12);
81 }
82
83 static void
chacha_ivsetup(chacha_ctx * x,const u8 * iv)84 chacha_ivsetup(chacha_ctx *x,const u8 *iv)
85 {
86 x->input[12] = 0;
87 x->input[13] = 0;
88 x->input[14] = U8TO32_LITTLE(iv + 0);
89 x->input[15] = U8TO32_LITTLE(iv + 4);
90 }
91
92 static void
chacha_encrypt_bytes(chacha_ctx * x,const u8 * m,u8 * c,u32 bytes)93 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
94 {
95 u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
96 u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
97 u8 *ctarget = NULL;
98 u8 tmp[64];
99 u_int i;
100
101 if (!bytes) return;
102
103 j0 = x->input[0];
104 j1 = x->input[1];
105 j2 = x->input[2];
106 j3 = x->input[3];
107 j4 = x->input[4];
108 j5 = x->input[5];
109 j6 = x->input[6];
110 j7 = x->input[7];
111 j8 = x->input[8];
112 j9 = x->input[9];
113 j10 = x->input[10];
114 j11 = x->input[11];
115 j12 = x->input[12];
116 j13 = x->input[13];
117 j14 = x->input[14];
118 j15 = x->input[15];
119
120 for (;;) {
121 if (bytes < 64) {
122 for (i = 0;i < bytes;++i) tmp[i] = m[i];
123 m = tmp;
124 ctarget = c;
125 c = tmp;
126 }
127 x0 = j0;
128 x1 = j1;
129 x2 = j2;
130 x3 = j3;
131 x4 = j4;
132 x5 = j5;
133 x6 = j6;
134 x7 = j7;
135 x8 = j8;
136 x9 = j9;
137 x10 = j10;
138 x11 = j11;
139 x12 = j12;
140 x13 = j13;
141 x14 = j14;
142 x15 = j15;
143 for (i = 20;i > 0;i -= 2) {
144 QUARTERROUND( x0, x4, x8,x12)
145 QUARTERROUND( x1, x5, x9,x13)
146 QUARTERROUND( x2, x6,x10,x14)
147 QUARTERROUND( x3, x7,x11,x15)
148 QUARTERROUND( x0, x5,x10,x15)
149 QUARTERROUND( x1, x6,x11,x12)
150 QUARTERROUND( x2, x7, x8,x13)
151 QUARTERROUND( x3, x4, x9,x14)
152 }
153 x0 = PLUS(x0,j0);
154 x1 = PLUS(x1,j1);
155 x2 = PLUS(x2,j2);
156 x3 = PLUS(x3,j3);
157 x4 = PLUS(x4,j4);
158 x5 = PLUS(x5,j5);
159 x6 = PLUS(x6,j6);
160 x7 = PLUS(x7,j7);
161 x8 = PLUS(x8,j8);
162 x9 = PLUS(x9,j9);
163 x10 = PLUS(x10,j10);
164 x11 = PLUS(x11,j11);
165 x12 = PLUS(x12,j12);
166 x13 = PLUS(x13,j13);
167 x14 = PLUS(x14,j14);
168 x15 = PLUS(x15,j15);
169
170 #ifndef KEYSTREAM_ONLY
171 x0 = XOR(x0,U8TO32_LITTLE(m + 0));
172 x1 = XOR(x1,U8TO32_LITTLE(m + 4));
173 x2 = XOR(x2,U8TO32_LITTLE(m + 8));
174 x3 = XOR(x3,U8TO32_LITTLE(m + 12));
175 x4 = XOR(x4,U8TO32_LITTLE(m + 16));
176 x5 = XOR(x5,U8TO32_LITTLE(m + 20));
177 x6 = XOR(x6,U8TO32_LITTLE(m + 24));
178 x7 = XOR(x7,U8TO32_LITTLE(m + 28));
179 x8 = XOR(x8,U8TO32_LITTLE(m + 32));
180 x9 = XOR(x9,U8TO32_LITTLE(m + 36));
181 x10 = XOR(x10,U8TO32_LITTLE(m + 40));
182 x11 = XOR(x11,U8TO32_LITTLE(m + 44));
183 x12 = XOR(x12,U8TO32_LITTLE(m + 48));
184 x13 = XOR(x13,U8TO32_LITTLE(m + 52));
185 x14 = XOR(x14,U8TO32_LITTLE(m + 56));
186 x15 = XOR(x15,U8TO32_LITTLE(m + 60));
187 #endif
188
189 j12 = PLUSONE(j12);
190 if (!j12) {
191 j13 = PLUSONE(j13);
192 /* stopping at 2^70 bytes per nonce is user's responsibility */
193 }
194
195 U32TO8_LITTLE(c + 0,x0);
196 U32TO8_LITTLE(c + 4,x1);
197 U32TO8_LITTLE(c + 8,x2);
198 U32TO8_LITTLE(c + 12,x3);
199 U32TO8_LITTLE(c + 16,x4);
200 U32TO8_LITTLE(c + 20,x5);
201 U32TO8_LITTLE(c + 24,x6);
202 U32TO8_LITTLE(c + 28,x7);
203 U32TO8_LITTLE(c + 32,x8);
204 U32TO8_LITTLE(c + 36,x9);
205 U32TO8_LITTLE(c + 40,x10);
206 U32TO8_LITTLE(c + 44,x11);
207 U32TO8_LITTLE(c + 48,x12);
208 U32TO8_LITTLE(c + 52,x13);
209 U32TO8_LITTLE(c + 56,x14);
210 U32TO8_LITTLE(c + 60,x15);
211
212 if (bytes <= 64) {
213 if (bytes < 64) {
214 for (i = 0;i < bytes;++i) ctarget[i] = c[i];
215 }
216 x->input[12] = j12;
217 x->input[13] = j13;
218 return;
219 }
220 bytes -= 64;
221 c += 64;
222 #ifndef KEYSTREAM_ONLY
223 m += 64;
224 #endif
225 }
226 }
227