1 /* Sha256.c -- SHA-256 Hash
2 2024-03-01 : Igor Pavlov : Public domain
3 This code is based on public domain code from Wei Dai's Crypto++ library. */
4
5 #include "Precomp.h"
6
7 #include <string.h>
8
9 #include "CpuArch.h"
10 #include "RotateDefs.h"
11 #include "Sha256.h"
12
13 #if defined(_MSC_VER) && (_MSC_VER < 1900)
14 // #define USE_MY_MM
15 #endif
16
17 #ifdef MY_CPU_X86_OR_AMD64
18 #if defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
19 || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
20 || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
21 || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) \
22 || defined(_MSC_VER) && (_MSC_VER >= 1200)
23 #define Z7_COMPILER_SHA256_SUPPORTED
24 #endif
25 #elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
26
27 #if defined(__ARM_FEATURE_SHA2) \
28 || defined(__ARM_FEATURE_CRYPTO)
29 #define Z7_COMPILER_SHA256_SUPPORTED
30 #else
31 #if defined(MY_CPU_ARM64) \
32 || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
33 || defined(Z7_MSC_VER_ORIGINAL)
34 #if defined(__ARM_FP) && \
35 ( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
36 || defined(__GNUC__) && (__GNUC__ >= 6) \
37 ) \
38 || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
39 #if defined(MY_CPU_ARM64) \
40 || !defined(Z7_CLANG_VERSION) \
41 || defined(__ARM_NEON) && \
42 (Z7_CLANG_VERSION < 170000 || \
43 Z7_CLANG_VERSION > 170001)
44 #define Z7_COMPILER_SHA256_SUPPORTED
45 #endif
46 #endif
47 #endif
48 #endif
49 #endif
50
51 void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
52
53 #ifdef Z7_COMPILER_SHA256_SUPPORTED
54 void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
55
56 static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
57 static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS_HW;
58
59 #define SHA256_UPDATE_BLOCKS(p) p->func_UpdateBlocks
60 #else
61 #define SHA256_UPDATE_BLOCKS(p) Sha256_UpdateBlocks
62 #endif
63
64
Sha256_SetFunction(CSha256 * p,unsigned algo)65 BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
66 {
67 SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks;
68
69 #ifdef Z7_COMPILER_SHA256_SUPPORTED
70 if (algo != SHA256_ALGO_SW)
71 {
72 if (algo == SHA256_ALGO_DEFAULT)
73 func = g_SHA256_FUNC_UPDATE_BLOCKS;
74 else
75 {
76 if (algo != SHA256_ALGO_HW)
77 return False;
78 func = g_SHA256_FUNC_UPDATE_BLOCKS_HW;
79 if (!func)
80 return False;
81 }
82 }
83 #else
84 if (algo > 1)
85 return False;
86 #endif
87
88 p->func_UpdateBlocks = func;
89 return True;
90 }
91
92
93 /* define it for speed optimization */
94
95 #ifdef Z7_SFX
96 #define STEP_PRE 1
97 #define STEP_MAIN 1
98 #else
99 #define STEP_PRE 2
100 #define STEP_MAIN 4
101 // #define Z7_SHA256_UNROLL
102 #endif
103
104 #undef Z7_SHA256_BIG_W
105 #if STEP_MAIN != 16
106 #define Z7_SHA256_BIG_W
107 #endif
108
109
110
111
Sha256_InitState(CSha256 * p)112 void Sha256_InitState(CSha256 *p)
113 {
114 p->count = 0;
115 p->state[0] = 0x6a09e667;
116 p->state[1] = 0xbb67ae85;
117 p->state[2] = 0x3c6ef372;
118 p->state[3] = 0xa54ff53a;
119 p->state[4] = 0x510e527f;
120 p->state[5] = 0x9b05688c;
121 p->state[6] = 0x1f83d9ab;
122 p->state[7] = 0x5be0cd19;
123 }
124
Sha256_Init(CSha256 * p)125 void Sha256_Init(CSha256 *p)
126 {
127 p->func_UpdateBlocks =
128 #ifdef Z7_COMPILER_SHA256_SUPPORTED
129 g_SHA256_FUNC_UPDATE_BLOCKS;
130 #else
131 NULL;
132 #endif
133 Sha256_InitState(p);
134 }
135
136 #define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22))
137 #define S1(x) (rotrFixed(x, 6) ^ rotrFixed(x,11) ^ rotrFixed(x, 25))
138 #define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
139 #define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10))
140
141 #define Ch(x,y,z) (z^(x&(y^z)))
142 #define Maj(x,y,z) ((x&y)|(z&(x|y)))
143
144
145 #define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe32(data + ((size_t)(j) + i) * 4))
146
147 #define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
148
149 #ifdef Z7_SHA256_BIG_W
150 // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
151 #define w(j, i) W[(size_t)(j) + i]
152 #define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
153 #else
154 #if STEP_MAIN == 16
155 #define w(j, i) W[(i) & 15]
156 #else
157 #define w(j, i) W[((size_t)(j) + (i)) & 15]
158 #endif
159 #define blk2(j, i) (w(j, i) += blk2_main(j, i))
160 #endif
161
162 #define W_MAIN(i) blk2(j, i)
163
164
165 #define T1(wx, i) \
166 tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
167 h = g; \
168 g = f; \
169 f = e; \
170 e = d + tmp; \
171 tmp += S0(a) + Maj(a, b, c); \
172 d = c; \
173 c = b; \
174 b = a; \
175 a = tmp; \
176
177 #define R1_PRE(i) T1( W_PRE, i)
178 #define R1_MAIN(i) T1( W_MAIN, i)
179
180 #if (!defined(Z7_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
181 #define R2_MAIN(i) \
182 R1_MAIN(i) \
183 R1_MAIN(i + 1) \
184
185 #endif
186
187
188
189 #if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8
190
191 #define T4( a,b,c,d,e,f,g,h, wx, i) \
192 h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
193 tmp = h; \
194 h += d; \
195 d = tmp + S0(a) + Maj(a, b, c); \
196
197 #define R4( wx, i) \
198 T4 ( a,b,c,d,e,f,g,h, wx, (i )); \
199 T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \
200 T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \
201 T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \
202
203 #define R4_PRE(i) R4( W_PRE, i)
204 #define R4_MAIN(i) R4( W_MAIN, i)
205
206
207 #define T8( a,b,c,d,e,f,g,h, wx, i) \
208 h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
209 d += h; \
210 h += S0(a) + Maj(a, b, c); \
211
212 #define R8( wx, i) \
213 T8 ( a,b,c,d,e,f,g,h, wx, i ); \
214 T8 ( h,a,b,c,d,e,f,g, wx, i+1); \
215 T8 ( g,h,a,b,c,d,e,f, wx, i+2); \
216 T8 ( f,g,h,a,b,c,d,e, wx, i+3); \
217 T8 ( e,f,g,h,a,b,c,d, wx, i+4); \
218 T8 ( d,e,f,g,h,a,b,c, wx, i+5); \
219 T8 ( c,d,e,f,g,h,a,b, wx, i+6); \
220 T8 ( b,c,d,e,f,g,h,a, wx, i+7); \
221
222 #define R8_PRE(i) R8( W_PRE, i)
223 #define R8_MAIN(i) R8( W_MAIN, i)
224
225 #endif
226
227 // static
228 extern MY_ALIGN(64)
229 const UInt32 SHA256_K_ARRAY[64];
230
231 MY_ALIGN(64)
232 const UInt32 SHA256_K_ARRAY[64] = {
233 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
234 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
235 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
236 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
237 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
238 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
239 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
240 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
241 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
242 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
243 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
244 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
245 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
246 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
247 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
248 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
249 };
250
251 #define K SHA256_K_ARRAY
252
253
254 Z7_NO_INLINE
Sha256_UpdateBlocks(UInt32 state[8],const Byte * data,size_t numBlocks)255 void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
256 {
257 UInt32 W
258 #ifdef Z7_SHA256_BIG_W
259 [64];
260 #else
261 [16];
262 #endif
263
264 unsigned j;
265
266 UInt32 a,b,c,d,e,f,g,h;
267
268 #if !defined(Z7_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
269 UInt32 tmp;
270 #endif
271
272 a = state[0];
273 b = state[1];
274 c = state[2];
275 d = state[3];
276 e = state[4];
277 f = state[5];
278 g = state[6];
279 h = state[7];
280
281 while (numBlocks)
282 {
283
284 for (j = 0; j < 16; j += STEP_PRE)
285 {
286 #if STEP_PRE > 4
287
288 #if STEP_PRE < 8
289 R4_PRE(0);
290 #else
291 R8_PRE(0);
292 #if STEP_PRE == 16
293 R8_PRE(8);
294 #endif
295 #endif
296
297 #else
298
299 R1_PRE(0)
300 #if STEP_PRE >= 2
301 R1_PRE(1)
302 #if STEP_PRE >= 4
303 R1_PRE(2)
304 R1_PRE(3)
305 #endif
306 #endif
307
308 #endif
309 }
310
311 for (j = 16; j < 64; j += STEP_MAIN)
312 {
313 #if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8
314
315 #if STEP_MAIN < 8
316 R4_MAIN(0)
317 #else
318 R8_MAIN(0)
319 #if STEP_MAIN == 16
320 R8_MAIN(8)
321 #endif
322 #endif
323
324 #else
325
326 R1_MAIN(0)
327 #if STEP_MAIN >= 2
328 R1_MAIN(1)
329 #if STEP_MAIN >= 4
330 R2_MAIN(2)
331 #if STEP_MAIN >= 8
332 R2_MAIN(4)
333 R2_MAIN(6)
334 #if STEP_MAIN >= 16
335 R2_MAIN(8)
336 R2_MAIN(10)
337 R2_MAIN(12)
338 R2_MAIN(14)
339 #endif
340 #endif
341 #endif
342 #endif
343 #endif
344 }
345
346 a += state[0]; state[0] = a;
347 b += state[1]; state[1] = b;
348 c += state[2]; state[2] = c;
349 d += state[3]; state[3] = d;
350 e += state[4]; state[4] = e;
351 f += state[5]; state[5] = f;
352 g += state[6]; state[6] = g;
353 h += state[7]; state[7] = h;
354
355 data += 64;
356 numBlocks--;
357 }
358
359 /* Wipe variables */
360 /* memset(W, 0, sizeof(W)); */
361 }
362
363 #undef S0
364 #undef S1
365 #undef s0
366 #undef s1
367 #undef K
368
369 #define Sha256_UpdateBlock(p) SHA256_UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
370
Sha256_Update(CSha256 * p,const Byte * data,size_t size)371 void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
372 {
373 if (size == 0)
374 return;
375
376 {
377 unsigned pos = (unsigned)p->count & 0x3F;
378 unsigned num;
379
380 p->count += size;
381
382 num = 64 - pos;
383 if (num > size)
384 {
385 memcpy(p->buffer + pos, data, size);
386 return;
387 }
388
389 if (pos != 0)
390 {
391 size -= num;
392 memcpy(p->buffer + pos, data, num);
393 data += num;
394 Sha256_UpdateBlock(p);
395 }
396 }
397 {
398 size_t numBlocks = size >> 6;
399 SHA256_UPDATE_BLOCKS(p)(p->state, data, numBlocks);
400 size &= 0x3F;
401 if (size == 0)
402 return;
403 data += (numBlocks << 6);
404 memcpy(p->buffer, data, size);
405 }
406 }
407
408
Sha256_Final(CSha256 * p,Byte * digest)409 void Sha256_Final(CSha256 *p, Byte *digest)
410 {
411 unsigned pos = (unsigned)p->count & 0x3F;
412 unsigned i;
413
414 p->buffer[pos++] = 0x80;
415
416 if (pos > (64 - 8))
417 {
418 while (pos != 64) { p->buffer[pos++] = 0; }
419 // memset(&p->buf.buffer[pos], 0, 64 - pos);
420 Sha256_UpdateBlock(p);
421 pos = 0;
422 }
423
424 /*
425 if (pos & 3)
426 {
427 p->buffer[pos] = 0;
428 p->buffer[pos + 1] = 0;
429 p->buffer[pos + 2] = 0;
430 pos += 3;
431 pos &= ~3;
432 }
433 {
434 for (; pos < 64 - 8; pos += 4)
435 *(UInt32 *)(&p->buffer[pos]) = 0;
436 }
437 */
438
439 memset(&p->buffer[pos], 0, (64 - 8) - pos);
440
441 {
442 UInt64 numBits = (p->count << 3);
443 SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32))
444 SetBe32(p->buffer + 64 - 4, (UInt32)(numBits))
445 }
446
447 Sha256_UpdateBlock(p);
448
449 for (i = 0; i < 8; i += 2)
450 {
451 UInt32 v0 = p->state[i];
452 UInt32 v1 = p->state[(size_t)i + 1];
453 SetBe32(digest , v0)
454 SetBe32(digest + 4, v1)
455 digest += 8;
456 }
457
458 Sha256_InitState(p);
459 }
460
461
Sha256Prepare(void)462 void Sha256Prepare(void)
463 {
464 #ifdef Z7_COMPILER_SHA256_SUPPORTED
465 SHA256_FUNC_UPDATE_BLOCKS f, f_hw;
466 f = Sha256_UpdateBlocks;
467 f_hw = NULL;
468 #ifdef MY_CPU_X86_OR_AMD64
469 #ifndef USE_MY_MM
470 if (CPU_IsSupported_SHA()
471 && CPU_IsSupported_SSSE3()
472 // && CPU_IsSupported_SSE41()
473 )
474 #endif
475 #else
476 if (CPU_IsSupported_SHA2())
477 #endif
478 {
479 // printf("\n========== HW SHA256 ======== \n");
480 f = f_hw = Sha256_UpdateBlocks_HW;
481 }
482 g_SHA256_FUNC_UPDATE_BLOCKS = f;
483 g_SHA256_FUNC_UPDATE_BLOCKS_HW = f_hw;
484 #endif
485 }
486
487 #undef S0
488 #undef S1
489 #undef s0
490 #undef s1
491 #undef Ch
492 #undef Maj
493 #undef W_MAIN
494 #undef W_PRE
495 #undef w
496 #undef blk2_main
497 #undef blk2
498 #undef T1
499 #undef T4
500 #undef T8
501 #undef R1_PRE
502 #undef R1_MAIN
503 #undef R2_MAIN
504 #undef R4
505 #undef R4_PRE
506 #undef R4_MAIN
507 #undef R8
508 #undef R8_PRE
509 #undef R8_MAIN
510 #undef STEP_PRE
511 #undef STEP_MAIN
512 #undef Z7_SHA256_BIG_W
513 #undef Z7_SHA256_UNROLL
514 #undef Z7_COMPILER_SHA256_SUPPORTED
515