1 /*
2 BLAKE2 reference source code package - optimized C implementations
3
4 Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
5
6 To the extent possible under law, the author(s) have dedicated all copyright
7 and related and neighboring rights to this software to the public domain
8 worldwide. This software is distributed without any warranty.
9
10 You should have received a copy of the CC0 Public Domain Dedication along with
11 this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
12 */
13
14 #include <stdint.h>
15 #include <string.h>
16 #include <stdio.h>
17
18 #include "blake2.h"
19 #include "blake2-impl.h"
20
21 #include "blake2-config.h"
22
23 #if defined(_MSC_VER)
24 #include <intrin.h>
25 #endif
26
27 #if defined(HAVE_SSE2)
28 #include <emmintrin.h>
29 // MSVC only defines _mm_set_epi64x for x86_64...
30 #if defined(_MSC_VER) && !defined(_M_X64)
_mm_set_epi64x(const uint64_t u1,const uint64_t u0)31 static inline __m128i _mm_set_epi64x( const uint64_t u1, const uint64_t u0 )
32 {
33 return _mm_set_epi32( u1 >> 32, u1, u0 >> 32, u0 );
34 }
35 #endif
36 #endif
37
38 #if defined(HAVE_SSSE3)
39 #include <tmmintrin.h>
40 #endif
41 #if defined(HAVE_SSE4_1)
42 #include <smmintrin.h>
43 #endif
44 #if defined(HAVE_AVX)
45 #include <immintrin.h>
46 #endif
47 #if defined(HAVE_XOP) && !defined(_MSC_VER)
48 #include <x86intrin.h>
49 #endif
50
51
52
53 #include "blake2b-round.h"
54
55 static const uint64_t blake2b_IV[8] =
56 {
57 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
58 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
59 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
60 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
61 };
62
63 static const uint8_t blake2b_sigma[12][16] =
64 {
65 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
66 { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
67 { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
68 { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
69 { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
70 { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
71 { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
72 { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
73 { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
74 { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
75 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
76 { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
77 };
78
79
80 /* Some helper functions, not necessarily useful */
blake2b_set_lastnode(blake2b_state * S)81 static inline int blake2b_set_lastnode( blake2b_state *S )
82 {
83 S->f[1] = ~0ULL;
84 return 0;
85 }
86
blake2b_clear_lastnode(blake2b_state * S)87 static inline int blake2b_clear_lastnode( blake2b_state *S )
88 {
89 S->f[1] = 0ULL;
90 return 0;
91 }
92
blake2b_set_lastblock(blake2b_state * S)93 static inline int blake2b_set_lastblock( blake2b_state *S )
94 {
95 if( S->last_node ) blake2b_set_lastnode( S );
96
97 S->f[0] = ~0ULL;
98 return 0;
99 }
100
blake2b_clear_lastblock(blake2b_state * S)101 static inline int blake2b_clear_lastblock( blake2b_state *S )
102 {
103 if( S->last_node ) blake2b_clear_lastnode( S );
104
105 S->f[0] = 0ULL;
106 return 0;
107 }
108
109
blake2b_increment_counter(blake2b_state * S,const uint64_t inc)110 static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
111 {
112 #if defined(__x86_64__) && (defined(__GNUC__) || defined(__clang__))
113 // ADD/ADC chain
114 __uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0];
115 t += inc;
116 S->t[0] = ( uint64_t )( t >> 0 );
117 S->t[1] = ( uint64_t )( t >> 64 );
118 #else
119 S->t[0] += inc;
120 S->t[1] += ( S->t[0] < inc );
121 #endif
122 return 0;
123 }
124
125
126 // Parameter-related functions
blake2b_param_set_digest_length(blake2b_param * P,const uint8_t digest_length)127 static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length )
128 {
129 P->digest_length = digest_length;
130 return 0;
131 }
132
blake2b_param_set_fanout(blake2b_param * P,const uint8_t fanout)133 static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout )
134 {
135 P->fanout = fanout;
136 return 0;
137 }
138
blake2b_param_set_max_depth(blake2b_param * P,const uint8_t depth)139 static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth )
140 {
141 P->depth = depth;
142 return 0;
143 }
144
blake2b_param_set_leaf_length(blake2b_param * P,const uint32_t leaf_length)145 static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length )
146 {
147 P->leaf_length = leaf_length;
148 return 0;
149 }
150
blake2b_param_set_node_offset(blake2b_param * P,const uint64_t node_offset)151 static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset )
152 {
153 P->node_offset = node_offset;
154 return 0;
155 }
156
blake2b_param_set_node_depth(blake2b_param * P,const uint8_t node_depth)157 static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth )
158 {
159 P->node_depth = node_depth;
160 return 0;
161 }
162
blake2b_param_set_inner_length(blake2b_param * P,const uint8_t inner_length)163 static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length )
164 {
165 P->inner_length = inner_length;
166 return 0;
167 }
168
blake2b_param_set_salt(blake2b_param * P,const uint8_t salt[BLAKE2B_SALTBYTES])169 static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] )
170 {
171 memcpy( P->salt, salt, BLAKE2B_SALTBYTES );
172 return 0;
173 }
174
blake2b_param_set_personal(blake2b_param * P,const uint8_t personal[BLAKE2B_PERSONALBYTES])175 static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] )
176 {
177 memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES );
178 return 0;
179 }
180
blake2b_init0(blake2b_state * S)181 static inline int blake2b_init0( blake2b_state *S )
182 {
183 memset( S, 0, sizeof( blake2b_state ) );
184
185 for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
186
187 return 0;
188 }
189
190
191
192 #if defined(__cplusplus)
193 extern "C" {
194 #endif
195 int blake2b_init( blake2b_state *S, size_t outlen );
196 int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
197 int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
198 int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen );
199 int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen );
200 int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen );
201 #if defined(__cplusplus)
202 }
203 #endif
204
205 /* init xors IV with input parameter block */
blake2b_init_param(blake2b_state * S,const blake2b_param * P)206 int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
207 {
208 uint8_t *p, *h, *v;
209 //blake2b_init0( S );
210 v = ( uint8_t * )( blake2b_IV );
211 h = ( uint8_t * )( S->h );
212 p = ( uint8_t * )( P );
213 /* IV XOR ParamBlock */
214 memset( S, 0, sizeof( blake2b_state ) );
215
216 for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
217
218 S->outlen = P->digest_length;
219 return 0;
220 }
221
222
223 /* Some sort of default parameter block initialization, for sequential blake2b */
224
blake2b_init(blake2b_state * S,size_t outlen)225 int blake2b_init( blake2b_state *S, size_t outlen )
226 {
227 if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
228
229 const blake2b_param P =
230 {
231 ( uint8_t ) outlen,
232 0,
233 1,
234 1,
235 0,
236 0,
237 0,
238 0,
239 {0},
240 {0},
241 {0}
242 };
243 return blake2b_init_param( S, &P );
244 }
245
blake2b_init_key(blake2b_state * S,size_t outlen,const void * key,size_t keylen)246 int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
247 {
248 if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
249
250 if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
251
252 const blake2b_param P =
253 {
254 ( uint8_t ) outlen,
255 ( uint8_t ) keylen,
256 1,
257 1,
258 0,
259 0,
260 0,
261 0,
262 {0},
263 {0},
264 {0}
265 };
266
267 if( blake2b_init_param( S, &P ) < 0 )
268 return 0;
269
270 {
271 uint8_t block[BLAKE2B_BLOCKBYTES];
272 memset( block, 0, BLAKE2B_BLOCKBYTES );
273 memcpy( block, key, keylen );
274 blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
275 secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
276 }
277 return 0;
278 }
279
blake2b_compress(blake2b_state * S,const uint8_t block[BLAKE2B_BLOCKBYTES])280 static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
281 {
282 __m128i row1l, row1h;
283 __m128i row2l, row2h;
284 __m128i row3l, row3h;
285 __m128i row4l, row4h;
286 __m128i b0, b1;
287 __m128i t0, t1;
288 #if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
289 const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
290 const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
291 #endif
292 #if defined(HAVE_SSE4_1)
293 const __m128i m0 = LOADU( block + 00 );
294 const __m128i m1 = LOADU( block + 16 );
295 const __m128i m2 = LOADU( block + 32 );
296 const __m128i m3 = LOADU( block + 48 );
297 const __m128i m4 = LOADU( block + 64 );
298 const __m128i m5 = LOADU( block + 80 );
299 const __m128i m6 = LOADU( block + 96 );
300 const __m128i m7 = LOADU( block + 112 );
301 #else
302 const uint64_t m0 = ( ( uint64_t * )block )[ 0];
303 const uint64_t m1 = ( ( uint64_t * )block )[ 1];
304 const uint64_t m2 = ( ( uint64_t * )block )[ 2];
305 const uint64_t m3 = ( ( uint64_t * )block )[ 3];
306 const uint64_t m4 = ( ( uint64_t * )block )[ 4];
307 const uint64_t m5 = ( ( uint64_t * )block )[ 5];
308 const uint64_t m6 = ( ( uint64_t * )block )[ 6];
309 const uint64_t m7 = ( ( uint64_t * )block )[ 7];
310 const uint64_t m8 = ( ( uint64_t * )block )[ 8];
311 const uint64_t m9 = ( ( uint64_t * )block )[ 9];
312 const uint64_t m10 = ( ( uint64_t * )block )[10];
313 const uint64_t m11 = ( ( uint64_t * )block )[11];
314 const uint64_t m12 = ( ( uint64_t * )block )[12];
315 const uint64_t m13 = ( ( uint64_t * )block )[13];
316 const uint64_t m14 = ( ( uint64_t * )block )[14];
317 const uint64_t m15 = ( ( uint64_t * )block )[15];
318 #endif
319 row1l = LOADU( &S->h[0] );
320 row1h = LOADU( &S->h[2] );
321 row2l = LOADU( &S->h[4] );
322 row2h = LOADU( &S->h[6] );
323 row3l = LOADU( &blake2b_IV[0] );
324 row3h = LOADU( &blake2b_IV[2] );
325 row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
326 row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
327 ROUND( 0 );
328 ROUND( 1 );
329 ROUND( 2 );
330 ROUND( 3 );
331 ROUND( 4 );
332 ROUND( 5 );
333 ROUND( 6 );
334 ROUND( 7 );
335 ROUND( 8 );
336 ROUND( 9 );
337 ROUND( 10 );
338 ROUND( 11 );
339 row1l = _mm_xor_si128( row3l, row1l );
340 row1h = _mm_xor_si128( row3h, row1h );
341 STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
342 STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
343 row2l = _mm_xor_si128( row4l, row2l );
344 row2h = _mm_xor_si128( row4h, row2h );
345 STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
346 STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
347 return 0;
348 }
349
350
blake2b_update(blake2b_state * S,const uint8_t * in,size_t inlen)351 int blake2b_update( blake2b_state *S, const uint8_t *in, size_t inlen )
352 {
353 while( inlen > 0 )
354 {
355 uint32_t left = S->buflen;
356 uint32_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
357
358 if( inlen > fill )
359 {
360 memcpy( S->buf + left, in, fill ); // Fill buffer
361 S->buflen += fill;
362 blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
363 blake2b_compress( S, S->buf ); // Compress
364 memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
365 S->buflen -= BLAKE2B_BLOCKBYTES;
366 in += fill;
367 inlen -= fill;
368 }
369 else // inlen <= fill
370 {
371 memcpy( S->buf + left, in, inlen );
372 S->buflen += ( uint32_t ) inlen; // Be lazy, do not compress
373 in += inlen;
374 inlen -= inlen;
375 }
376 }
377
378 return 0;
379 }
380
381
blake2b_final(blake2b_state * S,uint8_t * out,size_t outlen)382 int blake2b_final( blake2b_state *S, uint8_t *out, size_t outlen )
383 {
384 if(S->outlen != outlen) return -1;
385
386 if( S->buflen > BLAKE2B_BLOCKBYTES )
387 {
388 blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
389 blake2b_compress( S, S->buf );
390 S->buflen -= BLAKE2B_BLOCKBYTES;
391 memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
392 }
393
394 blake2b_increment_counter( S, S->buflen );
395 blake2b_set_lastblock( S );
396 memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
397 blake2b_compress( S, S->buf );
398 memcpy( out, &S->h[0], outlen );
399 return 0;
400 }
401
402
blake2b(uint8_t * out,const void * in,const void * key,size_t outlen,size_t inlen,size_t keylen)403 int blake2b( uint8_t *out, const void *in, const void *key, size_t outlen, size_t inlen, size_t keylen )
404 {
405 blake2b_state S[1];
406
407 /* Verify parameters */
408 if ( NULL == in && inlen > 0 ) return -1;
409
410 if ( NULL == out ) return -1;
411
412 if( NULL == key && keylen > 0 ) return -1;
413
414 if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
415
416 if( keylen > BLAKE2B_KEYBYTES ) return -1;
417
418 if( keylen )
419 {
420 if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
421 }
422 else
423 {
424 if( blake2b_init( S, outlen ) < 0 ) return -1;
425 }
426
427 if( blake2b_update( S, ( uint8_t * )in, inlen ) < 0) return -1;
428 return blake2b_final( S, out, outlen );
429 }
430
431 #if defined(SUPERCOP)
crypto_hash(unsigned char * out,unsigned char * in,unsigned long long inlen)432 int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
433 {
434 return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 );
435 }
436 #endif
437