• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the OpenSSL license (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include <openssl/opensslconf.h>
11 /*-
12  * IMPLEMENTATION NOTES.
13  *
14  * As you might have noticed 32-bit hash algorithms:
15  *
16  * - permit SHA_LONG to be wider than 32-bit
17  * - optimized versions implement two transform functions: one operating
18  *   on [aligned] data in host byte order and one - on data in input
19  *   stream byte order;
20  * - share common byte-order neutral collector and padding function
21  *   implementations, ../md32_common.h;
22  *
23  * Neither of the above applies to this SHA-512 implementations. Reasons
24  * [in reverse order] are:
25  *
26  * - it's the only 64-bit hash algorithm for the moment of this writing,
27  *   there is no need for common collector/padding implementation [yet];
28  * - by supporting only one transform function [which operates on
29  *   *aligned* data in input stream byte order, big-endian in this case]
30  *   we minimize burden of maintenance in two ways: a) collector/padding
31  *   function is simpler; b) only one transform function to stare at;
32  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
33  *   apply a number of optimizations to mitigate potential performance
34  *   penalties caused by previous design decision;
35  *
36  * Caveat lector.
37  *
38  * Implementation relies on the fact that "long long" is 64-bit on
39  * both 32- and 64-bit platforms. If some compiler vendor comes up
40  * with 128-bit long long, adjustment to sha.h would be required.
41  * As this implementation relies on 64-bit integer type, it's totally
42  * inappropriate for platforms which don't support it, most notably
43  * 16-bit platforms.
44  */
45 #include <stdlib.h>
46 #include <string.h>
47 
48 #include <openssl/crypto.h>
49 #include <openssl/sha.h>
50 #include <openssl/opensslv.h>
51 
52 #include "internal/cryptlib.h"
53 #include "crypto/sha.h"
54 
55 #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56     defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57     defined(__s390__) || defined(__s390x__) || \
58     defined(__aarch64__) || \
59     defined(SHA512_ASM)
60 # define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
61 #endif
62 
sha512_224_init(SHA512_CTX * c)63 int sha512_224_init(SHA512_CTX *c)
64 {
65     c->h[0] = U64(0x8c3d37c819544da2);
66     c->h[1] = U64(0x73e1996689dcd4d6);
67     c->h[2] = U64(0x1dfab7ae32ff9c82);
68     c->h[3] = U64(0x679dd514582f9fcf);
69     c->h[4] = U64(0x0f6d2b697bd44da8);
70     c->h[5] = U64(0x77e36f7304c48942);
71     c->h[6] = U64(0x3f9d85a86a1d36c8);
72     c->h[7] = U64(0x1112e6ad91d692a1);
73 
74     c->Nl = 0;
75     c->Nh = 0;
76     c->num = 0;
77     c->md_len = SHA224_DIGEST_LENGTH;
78     return 1;
79 }
80 
sha512_256_init(SHA512_CTX * c)81 int sha512_256_init(SHA512_CTX *c)
82 {
83     c->h[0] = U64(0x22312194fc2bf72c);
84     c->h[1] = U64(0x9f555fa3c84c64c2);
85     c->h[2] = U64(0x2393b86b6f53b151);
86     c->h[3] = U64(0x963877195940eabd);
87     c->h[4] = U64(0x96283ee2a88effe3);
88     c->h[5] = U64(0xbe5e1e2553863992);
89     c->h[6] = U64(0x2b0199fc2c85b8aa);
90     c->h[7] = U64(0x0eb72ddc81c52ca2);
91 
92     c->Nl = 0;
93     c->Nh = 0;
94     c->num = 0;
95     c->md_len = SHA256_DIGEST_LENGTH;
96     return 1;
97 }
98 
SHA384_Init(SHA512_CTX * c)99 int SHA384_Init(SHA512_CTX *c)
100 {
101     c->h[0] = U64(0xcbbb9d5dc1059ed8);
102     c->h[1] = U64(0x629a292a367cd507);
103     c->h[2] = U64(0x9159015a3070dd17);
104     c->h[3] = U64(0x152fecd8f70e5939);
105     c->h[4] = U64(0x67332667ffc00b31);
106     c->h[5] = U64(0x8eb44a8768581511);
107     c->h[6] = U64(0xdb0c2e0d64f98fa7);
108     c->h[7] = U64(0x47b5481dbefa4fa4);
109 
110     c->Nl = 0;
111     c->Nh = 0;
112     c->num = 0;
113     c->md_len = SHA384_DIGEST_LENGTH;
114     return 1;
115 }
116 
SHA512_Init(SHA512_CTX * c)117 int SHA512_Init(SHA512_CTX *c)
118 {
119     c->h[0] = U64(0x6a09e667f3bcc908);
120     c->h[1] = U64(0xbb67ae8584caa73b);
121     c->h[2] = U64(0x3c6ef372fe94f82b);
122     c->h[3] = U64(0xa54ff53a5f1d36f1);
123     c->h[4] = U64(0x510e527fade682d1);
124     c->h[5] = U64(0x9b05688c2b3e6c1f);
125     c->h[6] = U64(0x1f83d9abfb41bd6b);
126     c->h[7] = U64(0x5be0cd19137e2179);
127 
128     c->Nl = 0;
129     c->Nh = 0;
130     c->num = 0;
131     c->md_len = SHA512_DIGEST_LENGTH;
132     return 1;
133 }
134 
135 #ifndef SHA512_ASM
136 static
137 #endif
138 void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
139 
SHA512_Final(unsigned char * md,SHA512_CTX * c)140 int SHA512_Final(unsigned char *md, SHA512_CTX *c)
141 {
142     unsigned char *p = (unsigned char *)c->u.p;
143     size_t n = c->num;
144 
145     p[n] = 0x80;                /* There always is a room for one */
146     n++;
147     if (n > (sizeof(c->u) - 16)) {
148         memset(p + n, 0, sizeof(c->u) - n);
149         n = 0;
150         sha512_block_data_order(c, p, 1);
151     }
152 
153     memset(p + n, 0, sizeof(c->u) - 16 - n);
154 #ifdef  B_ENDIAN
155     c->u.d[SHA_LBLOCK - 2] = c->Nh;
156     c->u.d[SHA_LBLOCK - 1] = c->Nl;
157 #else
158     p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
159     p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
160     p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
161     p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
162     p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
163     p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
164     p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
165     p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
166     p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
167     p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
168     p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
169     p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
170     p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
171     p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
172     p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
173     p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
174 #endif
175 
176     sha512_block_data_order(c, p, 1);
177 
178     if (md == 0)
179         return 0;
180 
181     switch (c->md_len) {
182     /* Let compiler decide if it's appropriate to unroll... */
183     case SHA224_DIGEST_LENGTH:
184         for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
185             SHA_LONG64 t = c->h[n];
186 
187             *(md++) = (unsigned char)(t >> 56);
188             *(md++) = (unsigned char)(t >> 48);
189             *(md++) = (unsigned char)(t >> 40);
190             *(md++) = (unsigned char)(t >> 32);
191             *(md++) = (unsigned char)(t >> 24);
192             *(md++) = (unsigned char)(t >> 16);
193             *(md++) = (unsigned char)(t >> 8);
194             *(md++) = (unsigned char)(t);
195         }
196         /*
197          * For 224 bits, there are four bytes left over that have to be
198          * processed separately.
199          */
200         {
201             SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
202 
203             *(md++) = (unsigned char)(t >> 56);
204             *(md++) = (unsigned char)(t >> 48);
205             *(md++) = (unsigned char)(t >> 40);
206             *(md++) = (unsigned char)(t >> 32);
207         }
208         break;
209     case SHA256_DIGEST_LENGTH:
210         for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
211             SHA_LONG64 t = c->h[n];
212 
213             *(md++) = (unsigned char)(t >> 56);
214             *(md++) = (unsigned char)(t >> 48);
215             *(md++) = (unsigned char)(t >> 40);
216             *(md++) = (unsigned char)(t >> 32);
217             *(md++) = (unsigned char)(t >> 24);
218             *(md++) = (unsigned char)(t >> 16);
219             *(md++) = (unsigned char)(t >> 8);
220             *(md++) = (unsigned char)(t);
221         }
222         break;
223     case SHA384_DIGEST_LENGTH:
224         for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
225             SHA_LONG64 t = c->h[n];
226 
227             *(md++) = (unsigned char)(t >> 56);
228             *(md++) = (unsigned char)(t >> 48);
229             *(md++) = (unsigned char)(t >> 40);
230             *(md++) = (unsigned char)(t >> 32);
231             *(md++) = (unsigned char)(t >> 24);
232             *(md++) = (unsigned char)(t >> 16);
233             *(md++) = (unsigned char)(t >> 8);
234             *(md++) = (unsigned char)(t);
235         }
236         break;
237     case SHA512_DIGEST_LENGTH:
238         for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
239             SHA_LONG64 t = c->h[n];
240 
241             *(md++) = (unsigned char)(t >> 56);
242             *(md++) = (unsigned char)(t >> 48);
243             *(md++) = (unsigned char)(t >> 40);
244             *(md++) = (unsigned char)(t >> 32);
245             *(md++) = (unsigned char)(t >> 24);
246             *(md++) = (unsigned char)(t >> 16);
247             *(md++) = (unsigned char)(t >> 8);
248             *(md++) = (unsigned char)(t);
249         }
250         break;
251     /* ... as well as make sure md_len is not abused. */
252     default:
253         return 0;
254     }
255 
256     return 1;
257 }
258 
SHA384_Final(unsigned char * md,SHA512_CTX * c)259 int SHA384_Final(unsigned char *md, SHA512_CTX *c)
260 {
261     return SHA512_Final(md, c);
262 }
263 
SHA512_Update(SHA512_CTX * c,const void * _data,size_t len)264 int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
265 {
266     SHA_LONG64 l;
267     unsigned char *p = c->u.p;
268     const unsigned char *data = (const unsigned char *)_data;
269 
270     if (len == 0)
271         return 1;
272 
273     l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
274     if (l < c->Nl)
275         c->Nh++;
276     if (sizeof(len) >= 8)
277         c->Nh += (((SHA_LONG64) len) >> 61);
278     c->Nl = l;
279 
280     if (c->num != 0) {
281         size_t n = sizeof(c->u) - c->num;
282 
283         if (len < n) {
284             memcpy(p + c->num, data, len), c->num += (unsigned int)len;
285             return 1;
286         } else {
287             memcpy(p + c->num, data, n), c->num = 0;
288             len -= n, data += n;
289             sha512_block_data_order(c, p, 1);
290         }
291     }
292 
293     if (len >= sizeof(c->u)) {
294 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
295         if ((size_t)data % sizeof(c->u.d[0]) != 0)
296             while (len >= sizeof(c->u))
297                 memcpy(p, data, sizeof(c->u)),
298                 sha512_block_data_order(c, p, 1),
299                 len -= sizeof(c->u), data += sizeof(c->u);
300         else
301 #endif
302             sha512_block_data_order(c, data, len / sizeof(c->u)),
303             data += len, len %= sizeof(c->u), data -= len;
304     }
305 
306     if (len != 0)
307         memcpy(p, data, len), c->num = (int)len;
308 
309     return 1;
310 }
311 
SHA384_Update(SHA512_CTX * c,const void * data,size_t len)312 int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
313 {
314     return SHA512_Update(c, data, len);
315 }
316 
SHA512_Transform(SHA512_CTX * c,const unsigned char * data)317 void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
318 {
319 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
320     if ((size_t)data % sizeof(c->u.d[0]) != 0)
321         memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
322 #endif
323     sha512_block_data_order(c, data, 1);
324 }
325 
SHA384(const unsigned char * d,size_t n,unsigned char * md)326 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
327 {
328     SHA512_CTX c;
329     static unsigned char m[SHA384_DIGEST_LENGTH];
330 
331     if (md == NULL)
332         md = m;
333     SHA384_Init(&c);
334     SHA512_Update(&c, d, n);
335     SHA512_Final(md, &c);
336     OPENSSL_cleanse(&c, sizeof(c));
337     return md;
338 }
339 
SHA512(const unsigned char * d,size_t n,unsigned char * md)340 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
341 {
342     SHA512_CTX c;
343     static unsigned char m[SHA512_DIGEST_LENGTH];
344 
345     if (md == NULL)
346         md = m;
347     SHA512_Init(&c);
348     SHA512_Update(&c, d, n);
349     SHA512_Final(md, &c);
350     OPENSSL_cleanse(&c, sizeof(c));
351     return md;
352 }
353 
354 #ifndef SHA512_ASM
355 static const SHA_LONG64 K512[80] = {
356     U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
357     U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
358     U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
359     U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
360     U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
361     U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
362     U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
363     U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
364     U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
365     U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
366     U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
367     U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
368     U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
369     U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
370     U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
371     U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
372     U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
373     U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
374     U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
375     U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
376     U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
377     U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
378     U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
379     U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
380     U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
381     U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
382     U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
383     U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
384     U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
385     U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
386     U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
387     U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
388     U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
389     U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
390     U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
391     U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
392     U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
393     U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
394     U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
395     U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
396 };
397 
398 # ifndef PEDANTIC
399 #  if defined(__GNUC__) && __GNUC__>=2 && \
400       !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
401 #   if defined(__x86_64) || defined(__x86_64__)
402 #    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
403                                 asm ("rorq %1,%0"       \
404                                 : "=r"(ret)             \
405                                 : "J"(n),"0"(a)         \
406                                 : "cc"); ret;           })
407 #    if !defined(B_ENDIAN)
408 #     define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
409                                 asm ("bswapq    %0"             \
410                                 : "=r"(ret)                     \
411                                 : "0"(ret)); ret;               })
412 #    endif
413 #   elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
414 #    if defined(I386_ONLY)
415 #     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
416                           unsigned int hi=p[0],lo=p[1];          \
417                                 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
418                                     "roll $16,%%eax; roll $16,%%edx; "\
419                                     "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
420                                 : "=a"(lo),"=d"(hi)             \
421                                 : "0"(lo),"1"(hi) : "cc");      \
422                                 ((SHA_LONG64)hi)<<32|lo;        })
423 #    else
424 #     define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
425                           unsigned int hi=p[0],lo=p[1];         \
426                                 asm ("bswapl %0; bswapl %1;"    \
427                                 : "=r"(lo),"=r"(hi)             \
428                                 : "0"(lo),"1"(hi));             \
429                                 ((SHA_LONG64)hi)<<32|lo;        })
430 #    endif
431 #   elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
432 #    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
433                                 asm ("rotrdi %0,%1,%2"  \
434                                 : "=r"(ret)             \
435                                 : "r"(a),"K"(n)); ret;  })
436 #   elif defined(__aarch64__)
437 #    define ROTR(a,n)    ({ SHA_LONG64 ret;             \
438                                 asm ("ror %0,%1,%2"     \
439                                 : "=r"(ret)             \
440                                 : "r"(a),"I"(n)); ret;  })
441 #    if  defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
442         __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
443 #     define PULL64(x)   ({ SHA_LONG64 ret;                     \
444                                 asm ("rev       %0,%1"          \
445                                 : "=r"(ret)                     \
446                                 : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
447 #    endif
448 #   endif
449 #  elif defined(_MSC_VER)
450 #   if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
451 #    pragma intrinsic(_rotr64)
452 #    define ROTR(a,n)    _rotr64((a),n)
453 #   endif
454 #   if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
455        !defined(OPENSSL_NO_INLINE_ASM)
456 #    if defined(I386_ONLY)
__pull64be(const void * x)457 static SHA_LONG64 __fastcall __pull64be(const void *x)
458 {
459     _asm mov  edx,[ecx + 0]
460     _asm mov  eax,[ecx + 4]
461     _asm xchg dh, dl
462     _asm xchg ah, al
463     _asm rol  edx, 16
464     _asm rol  eax, 16
465     _asm xchg dh, dl
466     _asm xchg ah, al
467 }
468 #    else
__pull64be(const void * x)469 static SHA_LONG64 __fastcall __pull64be(const void *x)
470 {
471     _asm mov   edx,[ecx + 0]
472     _asm mov   eax,[ecx + 4]
473     _asm bswap edx
474     _asm bswap eax
475 }
476 #    endif
477 #    define PULL64(x) __pull64be(&(x))
478 #   endif
479 #  endif
480 # endif
481 # ifndef PULL64
482 #  define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
483 #  define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
484 # endif
485 # ifndef ROTR
486 #  define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
487 # endif
488 # define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
489 # define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
490 # define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
491 # define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
492 # define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
493 # define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
494 
495 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
496 /*
497  * This code should give better results on 32-bit CPU with less than
498  * ~24 registers, both size and performance wise...
499  */
500 
sha512_block_data_order(SHA512_CTX * ctx,const void * in,size_t num)501 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
502                                     size_t num)
503 {
504     const SHA_LONG64 *W = in;
505     SHA_LONG64 A, E, T;
506     SHA_LONG64 X[9 + 80], *F;
507     int i;
508 
509     while (num--) {
510 
511         F = X + 80;
512         A = ctx->h[0];
513         F[1] = ctx->h[1];
514         F[2] = ctx->h[2];
515         F[3] = ctx->h[3];
516         E = ctx->h[4];
517         F[5] = ctx->h[5];
518         F[6] = ctx->h[6];
519         F[7] = ctx->h[7];
520 
521         for (i = 0; i < 16; i++, F--) {
522 #  ifdef B_ENDIAN
523             T = W[i];
524 #  else
525             T = PULL64(W[i]);
526 #  endif
527             F[0] = A;
528             F[4] = E;
529             F[8] = T;
530             T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
531             E = F[3] + T;
532             A = T + Sigma0(A) + Maj(A, F[1], F[2]);
533         }
534 
535         for (; i < 80; i++, F--) {
536             T = sigma0(F[8 + 16 - 1]);
537             T += sigma1(F[8 + 16 - 14]);
538             T += F[8 + 16] + F[8 + 16 - 9];
539 
540             F[0] = A;
541             F[4] = E;
542             F[8] = T;
543             T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
544             E = F[3] + T;
545             A = T + Sigma0(A) + Maj(A, F[1], F[2]);
546         }
547 
548         ctx->h[0] += A;
549         ctx->h[1] += F[1];
550         ctx->h[2] += F[2];
551         ctx->h[3] += F[3];
552         ctx->h[4] += E;
553         ctx->h[5] += F[5];
554         ctx->h[6] += F[6];
555         ctx->h[7] += F[7];
556 
557         W += SHA_LBLOCK;
558     }
559 }
560 
561 # elif defined(OPENSSL_SMALL_FOOTPRINT)
562 
sha512_block_data_order(SHA512_CTX * ctx,const void * in,size_t num)563 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
564                                     size_t num)
565 {
566     const SHA_LONG64 *W = in;
567     SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
568     SHA_LONG64 X[16];
569     int i;
570 
571     while (num--) {
572 
573         a = ctx->h[0];
574         b = ctx->h[1];
575         c = ctx->h[2];
576         d = ctx->h[3];
577         e = ctx->h[4];
578         f = ctx->h[5];
579         g = ctx->h[6];
580         h = ctx->h[7];
581 
582         for (i = 0; i < 16; i++) {
583 #  ifdef B_ENDIAN
584             T1 = X[i] = W[i];
585 #  else
586             T1 = X[i] = PULL64(W[i]);
587 #  endif
588             T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
589             T2 = Sigma0(a) + Maj(a, b, c);
590             h = g;
591             g = f;
592             f = e;
593             e = d + T1;
594             d = c;
595             c = b;
596             b = a;
597             a = T1 + T2;
598         }
599 
600         for (; i < 80; i++) {
601             s0 = X[(i + 1) & 0x0f];
602             s0 = sigma0(s0);
603             s1 = X[(i + 14) & 0x0f];
604             s1 = sigma1(s1);
605 
606             T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
607             T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
608             T2 = Sigma0(a) + Maj(a, b, c);
609             h = g;
610             g = f;
611             f = e;
612             e = d + T1;
613             d = c;
614             c = b;
615             b = a;
616             a = T1 + T2;
617         }
618 
619         ctx->h[0] += a;
620         ctx->h[1] += b;
621         ctx->h[2] += c;
622         ctx->h[3] += d;
623         ctx->h[4] += e;
624         ctx->h[5] += f;
625         ctx->h[6] += g;
626         ctx->h[7] += h;
627 
628         W += SHA_LBLOCK;
629     }
630 }
631 
632 # else
633 #  define ROUND_00_15(i,a,b,c,d,e,f,g,h)        do {    \
634         T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
635         h = Sigma0(a) + Maj(a,b,c);                     \
636         d += T1;        h += T1;                        } while (0)
637 
638 #  define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)    do {    \
639         s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
640         s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
641         T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
642         ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
643 
sha512_block_data_order(SHA512_CTX * ctx,const void * in,size_t num)644 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
645                                     size_t num)
646 {
647     const SHA_LONG64 *W = in;
648     SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
649     SHA_LONG64 X[16];
650     int i;
651 
652     while (num--) {
653 
654         a = ctx->h[0];
655         b = ctx->h[1];
656         c = ctx->h[2];
657         d = ctx->h[3];
658         e = ctx->h[4];
659         f = ctx->h[5];
660         g = ctx->h[6];
661         h = ctx->h[7];
662 
663 #  ifdef B_ENDIAN
664         T1 = X[0] = W[0];
665         ROUND_00_15(0, a, b, c, d, e, f, g, h);
666         T1 = X[1] = W[1];
667         ROUND_00_15(1, h, a, b, c, d, e, f, g);
668         T1 = X[2] = W[2];
669         ROUND_00_15(2, g, h, a, b, c, d, e, f);
670         T1 = X[3] = W[3];
671         ROUND_00_15(3, f, g, h, a, b, c, d, e);
672         T1 = X[4] = W[4];
673         ROUND_00_15(4, e, f, g, h, a, b, c, d);
674         T1 = X[5] = W[5];
675         ROUND_00_15(5, d, e, f, g, h, a, b, c);
676         T1 = X[6] = W[6];
677         ROUND_00_15(6, c, d, e, f, g, h, a, b);
678         T1 = X[7] = W[7];
679         ROUND_00_15(7, b, c, d, e, f, g, h, a);
680         T1 = X[8] = W[8];
681         ROUND_00_15(8, a, b, c, d, e, f, g, h);
682         T1 = X[9] = W[9];
683         ROUND_00_15(9, h, a, b, c, d, e, f, g);
684         T1 = X[10] = W[10];
685         ROUND_00_15(10, g, h, a, b, c, d, e, f);
686         T1 = X[11] = W[11];
687         ROUND_00_15(11, f, g, h, a, b, c, d, e);
688         T1 = X[12] = W[12];
689         ROUND_00_15(12, e, f, g, h, a, b, c, d);
690         T1 = X[13] = W[13];
691         ROUND_00_15(13, d, e, f, g, h, a, b, c);
692         T1 = X[14] = W[14];
693         ROUND_00_15(14, c, d, e, f, g, h, a, b);
694         T1 = X[15] = W[15];
695         ROUND_00_15(15, b, c, d, e, f, g, h, a);
696 #  else
697         T1 = X[0] = PULL64(W[0]);
698         ROUND_00_15(0, a, b, c, d, e, f, g, h);
699         T1 = X[1] = PULL64(W[1]);
700         ROUND_00_15(1, h, a, b, c, d, e, f, g);
701         T1 = X[2] = PULL64(W[2]);
702         ROUND_00_15(2, g, h, a, b, c, d, e, f);
703         T1 = X[3] = PULL64(W[3]);
704         ROUND_00_15(3, f, g, h, a, b, c, d, e);
705         T1 = X[4] = PULL64(W[4]);
706         ROUND_00_15(4, e, f, g, h, a, b, c, d);
707         T1 = X[5] = PULL64(W[5]);
708         ROUND_00_15(5, d, e, f, g, h, a, b, c);
709         T1 = X[6] = PULL64(W[6]);
710         ROUND_00_15(6, c, d, e, f, g, h, a, b);
711         T1 = X[7] = PULL64(W[7]);
712         ROUND_00_15(7, b, c, d, e, f, g, h, a);
713         T1 = X[8] = PULL64(W[8]);
714         ROUND_00_15(8, a, b, c, d, e, f, g, h);
715         T1 = X[9] = PULL64(W[9]);
716         ROUND_00_15(9, h, a, b, c, d, e, f, g);
717         T1 = X[10] = PULL64(W[10]);
718         ROUND_00_15(10, g, h, a, b, c, d, e, f);
719         T1 = X[11] = PULL64(W[11]);
720         ROUND_00_15(11, f, g, h, a, b, c, d, e);
721         T1 = X[12] = PULL64(W[12]);
722         ROUND_00_15(12, e, f, g, h, a, b, c, d);
723         T1 = X[13] = PULL64(W[13]);
724         ROUND_00_15(13, d, e, f, g, h, a, b, c);
725         T1 = X[14] = PULL64(W[14]);
726         ROUND_00_15(14, c, d, e, f, g, h, a, b);
727         T1 = X[15] = PULL64(W[15]);
728         ROUND_00_15(15, b, c, d, e, f, g, h, a);
729 #  endif
730 
731         for (i = 16; i < 80; i += 16) {
732             ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
733             ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
734             ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
735             ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
736             ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
737             ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
738             ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
739             ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
740             ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
741             ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
742             ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
743             ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
744             ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
745             ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
746             ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
747             ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
748         }
749 
750         ctx->h[0] += a;
751         ctx->h[1] += b;
752         ctx->h[2] += c;
753         ctx->h[3] += d;
754         ctx->h[4] += e;
755         ctx->h[5] += f;
756         ctx->h[6] += g;
757         ctx->h[7] += h;
758 
759         W += SHA_LBLOCK;
760     }
761 }
762 
763 # endif
764 
765 #endif                         /* SHA512_ASM */
766