1 /* Copyright (c) 2014, Google Inc.
2 *
3 * Permission to use, copy, modify, and/or distribute this software for any
4 * purpose with or without fee is hereby granted, provided that the above
5 * copyright notice and this permission notice appear in all copies.
6 *
7 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
15 // This implementation of poly1305 is by Andrew Moon
16 // (https://github.com/floodyberry/poly1305-donna) and released as public
17 // domain.
18
19 #include <GFp/poly1305.h>
20
21 #include "internal.h"
22 #include "../internal.h"
23
24
25 #if !defined(BORINGSSL_HAS_UINT128) || !defined(OPENSSL_X86_64)
26
27 #if defined(__GNUC__)
28 #pragma GCC diagnostic ignored "-Wsign-conversion"
29 #pragma GCC diagnostic ignored "-Wconversion"
30 #endif
31
32 // We can assume little-endian.
U8TO32_LE(const uint8_t * m)33 static uint32_t U8TO32_LE(const uint8_t *m) {
34 uint32_t r;
35 GFp_memcpy(&r, m, sizeof(r));
36 return r;
37 }
38
U32TO8_LE(uint8_t * m,uint32_t v)39 static void U32TO8_LE(uint8_t *m, uint32_t v) {
40 GFp_memcpy(m, &v, sizeof(v));
41 }
42
mul32x32_64(uint32_t a,uint32_t b)43 static uint64_t mul32x32_64(uint32_t a, uint32_t b) { return (uint64_t)a * b; }
44
45 struct poly1305_state_st {
46 uint32_t r0, r1, r2, r3, r4;
47 uint32_t s1, s2, s3, s4;
48 uint32_t h0, h1, h2, h3, h4;
49 uint8_t buf[16];
50 size_t buf_used;
51 uint8_t key[16];
52 };
53
54 OPENSSL_STATIC_ASSERT(sizeof(struct poly1305_state_st) <= sizeof(poly1305_state),
55 "poly1305_state isn't large enough to hold aligned poly1305_state_st");
56
poly1305_aligned_state(poly1305_state * state)57 static inline struct poly1305_state_st *poly1305_aligned_state(
58 poly1305_state *state) {
59 dev_assert_secret(((uintptr_t)state & 63) == 0);
60 return (struct poly1305_state_st *)(((uintptr_t)state + 63) & ~63);
61 }
62
63 // poly1305_blocks updates |state| given some amount of input data. This
64 // function may only be called with a |len| that is not a multiple of 16 at the
65 // end of the data. Otherwise the input must be buffered into 16 byte blocks.
poly1305_update(struct poly1305_state_st * state,const uint8_t * in,size_t len)66 static void poly1305_update(struct poly1305_state_st *state, const uint8_t *in,
67 size_t len) {
68 uint32_t t0, t1, t2, t3;
69 uint64_t t[5];
70 uint32_t b;
71 uint64_t c;
72 size_t j;
73 uint8_t mp[16];
74
75 if (len < 16) {
76 goto poly1305_donna_atmost15bytes;
77 }
78
79 poly1305_donna_16bytes:
80 t0 = U8TO32_LE(in);
81 t1 = U8TO32_LE(in + 4);
82 t2 = U8TO32_LE(in + 8);
83 t3 = U8TO32_LE(in + 12);
84
85 in += 16;
86 len -= 16;
87
88 state->h0 += t0 & 0x3ffffff;
89 state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
90 state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
91 state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
92 state->h4 += (t3 >> 8) | (1 << 24);
93
94 poly1305_donna_mul:
95 t[0] = mul32x32_64(state->h0, state->r0) + mul32x32_64(state->h1, state->s4) +
96 mul32x32_64(state->h2, state->s3) + mul32x32_64(state->h3, state->s2) +
97 mul32x32_64(state->h4, state->s1);
98 t[1] = mul32x32_64(state->h0, state->r1) + mul32x32_64(state->h1, state->r0) +
99 mul32x32_64(state->h2, state->s4) + mul32x32_64(state->h3, state->s3) +
100 mul32x32_64(state->h4, state->s2);
101 t[2] = mul32x32_64(state->h0, state->r2) + mul32x32_64(state->h1, state->r1) +
102 mul32x32_64(state->h2, state->r0) + mul32x32_64(state->h3, state->s4) +
103 mul32x32_64(state->h4, state->s3);
104 t[3] = mul32x32_64(state->h0, state->r3) + mul32x32_64(state->h1, state->r2) +
105 mul32x32_64(state->h2, state->r1) + mul32x32_64(state->h3, state->r0) +
106 mul32x32_64(state->h4, state->s4);
107 t[4] = mul32x32_64(state->h0, state->r4) + mul32x32_64(state->h1, state->r3) +
108 mul32x32_64(state->h2, state->r2) + mul32x32_64(state->h3, state->r1) +
109 mul32x32_64(state->h4, state->r0);
110
111 state->h0 = (uint32_t)t[0] & 0x3ffffff;
112 c = (t[0] >> 26);
113 t[1] += c;
114 state->h1 = (uint32_t)t[1] & 0x3ffffff;
115 b = (uint32_t)(t[1] >> 26);
116 t[2] += b;
117 state->h2 = (uint32_t)t[2] & 0x3ffffff;
118 b = (uint32_t)(t[2] >> 26);
119 t[3] += b;
120 state->h3 = (uint32_t)t[3] & 0x3ffffff;
121 b = (uint32_t)(t[3] >> 26);
122 t[4] += b;
123 state->h4 = (uint32_t)t[4] & 0x3ffffff;
124 b = (uint32_t)(t[4] >> 26);
125 state->h0 += b * 5;
126
127 if (len >= 16) {
128 goto poly1305_donna_16bytes;
129 }
130
131 // final bytes
132 poly1305_donna_atmost15bytes:
133 if (!len) {
134 return;
135 }
136
137 for (j = 0; j < len; j++) {
138 mp[j] = in[j];
139 }
140 mp[j++] = 1;
141 for (; j < 16; j++) {
142 mp[j] = 0;
143 }
144 len = 0;
145
146 t0 = U8TO32_LE(mp + 0);
147 t1 = U8TO32_LE(mp + 4);
148 t2 = U8TO32_LE(mp + 8);
149 t3 = U8TO32_LE(mp + 12);
150
151 state->h0 += t0 & 0x3ffffff;
152 state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
153 state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
154 state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
155 state->h4 += (t3 >> 8);
156
157 goto poly1305_donna_mul;
158 }
159
GFp_poly1305_init(poly1305_state * statep,const uint8_t key[32])160 void GFp_poly1305_init(poly1305_state *statep, const uint8_t key[32]) {
161 struct poly1305_state_st *state = poly1305_aligned_state(statep);
162 uint32_t t0, t1, t2, t3;
163
164 t0 = U8TO32_LE(key + 0);
165 t1 = U8TO32_LE(key + 4);
166 t2 = U8TO32_LE(key + 8);
167 t3 = U8TO32_LE(key + 12);
168
169 // precompute multipliers
170 state->r0 = t0 & 0x3ffffff;
171 t0 >>= 26;
172 t0 |= t1 << 6;
173 state->r1 = t0 & 0x3ffff03;
174 t1 >>= 20;
175 t1 |= t2 << 12;
176 state->r2 = t1 & 0x3ffc0ff;
177 t2 >>= 14;
178 t2 |= t3 << 18;
179 state->r3 = t2 & 0x3f03fff;
180 t3 >>= 8;
181 state->r4 = t3 & 0x00fffff;
182
183 state->s1 = state->r1 * 5;
184 state->s2 = state->r2 * 5;
185 state->s3 = state->r3 * 5;
186 state->s4 = state->r4 * 5;
187
188 // init state
189 state->h0 = 0;
190 state->h1 = 0;
191 state->h2 = 0;
192 state->h3 = 0;
193 state->h4 = 0;
194
195 state->buf_used = 0;
196 GFp_memcpy(state->key, key + 16, sizeof(state->key));
197 }
198
GFp_poly1305_update(poly1305_state * statep,const uint8_t * in,size_t in_len)199 void GFp_poly1305_update(poly1305_state *statep, const uint8_t *in,
200 size_t in_len) {
201 struct poly1305_state_st *state = poly1305_aligned_state(statep);
202
203 if (state->buf_used) {
204 size_t todo = 16 - state->buf_used;
205 if (todo > in_len) {
206 todo = in_len;
207 }
208 for (size_t i = 0; i < todo; i++) {
209 state->buf[state->buf_used + i] = in[i];
210 }
211 state->buf_used += todo;
212 in_len -= todo;
213 in += todo;
214
215 if (state->buf_used == 16) {
216 poly1305_update(state, state->buf, 16);
217 state->buf_used = 0;
218 }
219 }
220
221 if (in_len >= 16) {
222 size_t todo = in_len & ~0xf;
223 poly1305_update(state, in, todo);
224 in += todo;
225 in_len &= 0xf;
226 }
227
228 if (in_len) {
229 for (size_t i = 0; i < in_len; i++) {
230 state->buf[i] = in[i];
231 }
232 state->buf_used = in_len;
233 }
234 }
235
GFp_poly1305_finish(poly1305_state * statep,uint8_t mac[16])236 void GFp_poly1305_finish(poly1305_state *statep, uint8_t mac[16]) {
237 struct poly1305_state_st *state = poly1305_aligned_state(statep);
238 uint64_t f0, f1, f2, f3;
239 uint32_t g0, g1, g2, g3, g4;
240 uint32_t b, nb;
241
242 if (state->buf_used) {
243 poly1305_update(state, state->buf, state->buf_used);
244 }
245
246 b = state->h0 >> 26;
247 state->h0 = state->h0 & 0x3ffffff;
248 state->h1 += b;
249 b = state->h1 >> 26;
250 state->h1 = state->h1 & 0x3ffffff;
251 state->h2 += b;
252 b = state->h2 >> 26;
253 state->h2 = state->h2 & 0x3ffffff;
254 state->h3 += b;
255 b = state->h3 >> 26;
256 state->h3 = state->h3 & 0x3ffffff;
257 state->h4 += b;
258 b = state->h4 >> 26;
259 state->h4 = state->h4 & 0x3ffffff;
260 state->h0 += b * 5;
261
262 g0 = state->h0 + 5;
263 b = g0 >> 26;
264 g0 &= 0x3ffffff;
265 g1 = state->h1 + b;
266 b = g1 >> 26;
267 g1 &= 0x3ffffff;
268 g2 = state->h2 + b;
269 b = g2 >> 26;
270 g2 &= 0x3ffffff;
271 g3 = state->h3 + b;
272 b = g3 >> 26;
273 g3 &= 0x3ffffff;
274 g4 = state->h4 + b - (1 << 26);
275
276 b = (g4 >> 31) - 1;
277 nb = ~b;
278 state->h0 = (state->h0 & nb) | (g0 & b);
279 state->h1 = (state->h1 & nb) | (g1 & b);
280 state->h2 = (state->h2 & nb) | (g2 & b);
281 state->h3 = (state->h3 & nb) | (g3 & b);
282 state->h4 = (state->h4 & nb) | (g4 & b);
283
284 f0 = ((state->h0) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&state->key[0]);
285 f1 = ((state->h1 >> 6) | (state->h2 << 20)) +
286 (uint64_t)U8TO32_LE(&state->key[4]);
287 f2 = ((state->h2 >> 12) | (state->h3 << 14)) +
288 (uint64_t)U8TO32_LE(&state->key[8]);
289 f3 = ((state->h3 >> 18) | (state->h4 << 8)) +
290 (uint64_t)U8TO32_LE(&state->key[12]);
291
292 U32TO8_LE(&mac[0], (uint32_t)f0);
293 f1 += (f0 >> 32);
294 U32TO8_LE(&mac[4], (uint32_t)f1);
295 f2 += (f1 >> 32);
296 U32TO8_LE(&mac[8], (uint32_t)f2);
297 f3 += (f2 >> 32);
298 U32TO8_LE(&mac[12], (uint32_t)f3);
299 }
300
301 #endif // !BORINGSSL_HAS_UINT128 || !OPENSSL_X86_64
302