1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#ifdef HITLS_CRYPTO_SHA256 18 19#include "crypt_arm.h" 20 21 .arch armv8-a+crypto 22 23/* sha256 used constant value. For the data source, see the RFC4634 document. */ 24.extern g_cryptArmCpuInfo 25.hidden g_cryptArmCpuInfo 26.section .rodata 27.balign 64 28.K256: 29 .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 30 .long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 31 .long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da 32 .long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 33 .long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 34 .long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 35 .long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 36 .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 37 38/* 39 * Macro description: updates the 32-bit plaintext information. W 40 * Input register: 41 * wi_16: W[i-16] 42 * wi_15: W[i-15] 43 * wi_7: W[i-7] 44 * wi_2: W[i-2] 45 * Modify the register: wi_16 w17 w28 46 * Output register: 47 * wi_16: Latest W[i] value, W[i] = sigma1(W[i-2]) + W[i-7] + sigma0(W[i-15]) + W[i-16] 48 * Function/Macro Call:None 49 */ 50 .macro UPDATE_W wi_16, wi_15, wi_7, wi_2 51 ror w28, \wi_15, #7 52 ror w17, \wi_2, #17 53 eor w28, w28, \wi_15, ror#18 54 eor w17, w17, \wi_2, ror#19 55 eor w28, w28, \wi_15, lsr#3 // w28 = sigma0(w[i-15]) 56 eor w17, w17, \wi_2, lsr#10 // w17 = sigma1(W[i-2]) 57 add \wi_16, \wi_16, \wi_7 // + W[i-7] 58 add \wi_16, \wi_16, w28 // + sigma0(w[i-15]) 59 add \wi_16, \wi_16, w17 // + sigma1(W[i-2]) 60 .endm 61 62/* 63 * Macro description: Processes the update of a round of hash values in 64 rounds of compression. 64 * Input register: 65 * x19: Point to the address of the corresponding element in the g_k256 constant 66 * wi: Plaintext data after processing 67 * a - h: Intermediate variable of hash value 68 * Modify the register: h d w16 w17 w28 w29 69 * Output register: 70 * h: Indicates the value after a cyclic update. 71 * d: Indicates the value after a cyclic update. 72 * Function/Macro Call:None 73 */ 74 .macro ONE_ROUND wi, a, b, c, d, e, f, g, h 75 ldr w16, [x19], #4 // K[i] 76 and w17, \f, \e // e&f 77 bic w28, \g, \e // g&(~e) 78 add \h, \h, w16 // h += K[i] 79 eor w29, \e, \e, ror#14 80 ror w16, \e, #6 81 orr w17, w17, w28 // Ch(e, f, g) = e&f | g&(~e) 82 add \h, \h, \wi // h += W[i] 83 eor w29, w16, w29, ror#11 // Sigma1(e) = ROR(e, 6) ^ ROR(e, 11) ^ ROR(e, 25) 84 eor w28, \a, \c // a^c 85 eor w16, \a, \b // a^b 86 add \h, \h, w29 // h += Sigma1(e) 87 and w28, w28, w16 // (a^b)&(a^c) 88 eor w29, \a, \a, ror#9 89 add \h, \h, w17 // h += Ch(e, f, g) 90 eor w28, w28, \a // Maj(a, b, c) = ((a^b)&(a^c))^a = (a&b)^(b&c)^(a&c) 91 ror w16, \a, #2 92 add \d, \d, \h // d += h 93 add \h, \h, w28 // h += Maj(a, b, c) 94 eor w29, w16, w29, ror#13 // Sigma0(a) = ROR(a, 2)^ROR(a, 13)^ROR(a, 22) 95 add \h, \h, w29 // h += Sigma0(a) 96 .endm 97 98/* 99 * Function Description:Performs 64 rounds of compression calculation based on the input plaintext data 100 * and updates the hash value. 101 * Function prototype:void SHA256CompressMultiBlocks(uint32_t hash[8], const uint8_t *in, uint32_t num); 102 * Input register: 103 * x0: Storage address of the hash value 104 * x1: Pointer to the input data address 105 * x2: Number of 64 rounds of cycles 106 * Modify the register: x0-x17 107 * Output register: None 108 * Function/Macro Call: None 109 * 110 */ 111 .text 112 .balign 16 113 .global SHA256CompressMultiBlocks 114 .type SHA256CompressMultiBlocks, %function 115SHA256CompressMultiBlocks: 116 cbz x2, .Lend_sha256 117 /* If the SHA256 cryptography extension instruction is supported, go to. */ 118 adrp x5, g_cryptArmCpuInfo 119 ldr w6, [x5, #:lo12:g_cryptArmCpuInfo] 120 tst w6, #CRYPT_ARM_SHA256 121 bne SHA256CryptoExt 122 /* Extension instructions are not supported. Base instructions are used. */ 123 stp x29, x30, [sp, #-112]! 124 add x29, sp, #0 125 stp x19, x20, [sp, #8*2] 126 stp x21, x22, [sp, #8*4] 127 stp x23, x24, [sp, #8*6] 128 stp x25, x26, [sp, #8*8] 129 stp x27, x28, [sp, #8*10] 130 131 /* load a - h */ 132 ldp w20, w21, [x0] 133 ldp w22, w23, [x0, #4*2] 134 ldp w24, w25, [x0, #4*4] 135 ldp w26, w27, [x0, #4*6] 136 137 str x0, [sp, #96] 138 mov x16, x1 // Enter Value Address 139 lsl x30, x2, #6 // Number of times to process 2^6 = 64 140 141 /* w0-w15 are used to record input values W[i] and temporary registers */ 142.Lloop_compress_64: 143 144 /* Start a 64-round process */ 145 sub x30, x30, #16 146 adrp x19, .K256 147 add x19, x19, :lo12:.K256 148 /* 8 bytes are loaded each time, and then two rounds are processed. */ 149 ldp w0, w1, [x16] // load input value 150 ldp w2, w3, [x16, #4*2] 151 ldp w4, w5, [x16, #4*4] 152 ldp w6, w7, [x16, #4*6] 153 ldp w8, w9, [x16, #4*8] 154 ldp w10, w11, [x16, #4*10] 155 ldp w12, w13, [x16, #4*12] 156 ldp w14, w15, [x16, #4*14] 157 158 add x16, x16, #64 159 str x16, [sp, #104] 160#ifndef HITLS_BIG_ENDIAN 161 rev w0, w0 162 rev w1, w1 163 rev w2, w2 164 rev w3, w3 165 rev w4, w4 166 rev w5, w5 167 rev w6, w6 168 rev w7, w7 169 rev w8, w8 170 rev w9, w9 171 rev w10, w10 172 rev w11, w11 173 rev w12, w12 174 rev w13, w13 175 rev w14, w14 176 rev w15, w15 177#endif 178 /* w16 w17 w28 w29 used as a temporary register */ 179 ONE_ROUND w0, w20, w21, w22, w23, w24, w25, w26, w27 180 ONE_ROUND w1, w27, w20, w21, w22, w23, w24, w25, w26 181 ONE_ROUND w2, w26, w27, w20, w21, w22, w23, w24, w25 182 ONE_ROUND w3, w25, w26, w27, w20, w21, w22, w23, w24 183 184 ONE_ROUND w4, w24, w25, w26, w27, w20, w21, w22, w23 185 ONE_ROUND w5, w23, w24, w25, w26, w27, w20, w21, w22 186 ONE_ROUND w6, w22, w23, w24, w25, w26, w27, w20, w21 187 ONE_ROUND w7, w21, w22, w23, w24, w25, w26, w27, w20 188 189 ONE_ROUND w8, w20, w21, w22, w23, w24, w25, w26, w27 190 ONE_ROUND w9, w27, w20, w21, w22, w23, w24, w25, w26 191 ONE_ROUND w10, w26, w27, w20, w21, w22, w23, w24, w25 192 ONE_ROUND w11, w25, w26, w27, w20, w21, w22, w23, w24 193 194 ONE_ROUND w12, w24, w25, w26, w27, w20, w21, w22, w23 195 ONE_ROUND w13, w23, w24, w25, w26, w27, w20, w21, w22 196 ONE_ROUND w14, w22, w23, w24, w25, w26, w27, w20, w21 197 ONE_ROUND w15, w21, w22, w23, w24, w25, w26, w27, w20 198 199.Lloop_compress_16_63: 200 /* Start 16-31, 32-47, 48-63 compression */ 201 sub x30, x30, #16 202 203 /* 0 */ 204 UPDATE_W w0, w1, w9, w14 205 ONE_ROUND w0, w20, w21, w22, w23, w24, w25, w26, w27 206 207 /* 1 */ 208 UPDATE_W w1, w2, w10, w15 209 ONE_ROUND w1, w27, w20, w21, w22, w23, w24, w25, w26 210 211 /* 2 */ 212 UPDATE_W w2, w3, w11, w0 213 ONE_ROUND w2, w26, w27, w20, w21, w22, w23, w24, w25 214 215 /* 3 */ 216 UPDATE_W w3, w4, w12, w1 217 ONE_ROUND w3, w25, w26, w27, w20, w21, w22, w23, w24 218 219 /* 4 */ 220 UPDATE_W w4, w5, w13, w2 221 ONE_ROUND w4, w24, w25, w26, w27, w20, w21, w22, w23 222 223 /* 5 */ 224 UPDATE_W w5, w6, w14, w3 225 ONE_ROUND w5, w23, w24, w25, w26, w27, w20, w21, w22 226 227 /* 6 */ 228 UPDATE_W w6, w7, w15, w4 229 ONE_ROUND w6, w22, w23, w24, w25, w26, w27, w20, w21 230 231 /* 7 */ 232 UPDATE_W w7, w8, w0, w5 233 ONE_ROUND w7, w21, w22, w23, w24, w25, w26, w27, w20 234 235 /* 8 */ 236 UPDATE_W w8, w9, w1, w6 237 ONE_ROUND w8, w20, w21, w22, w23, w24, w25, w26, w27 238 239 /* 9 */ 240 UPDATE_W w9, w10, w2, w7 241 ONE_ROUND w9, w27, w20, w21, w22, w23, w24, w25, w26 242 243 /* 10 */ 244 UPDATE_W w10, w11, w3, w8 245 ONE_ROUND w10, w26, w27, w20, w21, w22, w23, w24, w25 246 247 /* 11 */ 248 UPDATE_W w11, w12, w4, w9 249 ONE_ROUND w11, w25, w26, w27, w20, w21, w22, w23, w24 250 251 /* 12 */ 252 UPDATE_W w12, w13, w5, w10 253 ONE_ROUND w12, w24, w25, w26, w27, w20, w21, w22, w23 254 255 /* 13 */ 256 UPDATE_W w13, w14, w6, w11 257 ONE_ROUND w13, w23, w24, w25, w26, w27, w20, w21, w22 258 259 /* 14 */ 260 UPDATE_W w14, w15, w7, w12 261 ONE_ROUND w14, w22, w23, w24, w25, w26, w27, w20, w21 262 263 /* 15 */ 264 UPDATE_W w15, w0, w8, w13 265 ONE_ROUND w15, w21, w22, w23, w24, w25, w26, w27, w20 266 267 /* If the processing length is less than 64 bytes, the loop continues. */ 268 tst x30, #63 269 bne .Lloop_compress_16_63 270 271 /* Stores a - h information. */ 272 ldr x0, [sp, #96] 273 274 ldp w10, w11, [x0] 275 ldp w12, w13, [x0, #4*2] 276 ldp w14, w15, [x0, #4*4] 277 ldp w16, w17, [x0, #4*6] 278 279 add w20, w20, w10 280 add w21, w21, w11 281 add w22, w22, w12 282 add w23, w23, w13 283 stp w20, w21, [x0] 284 add w24, w24, w14 285 add w25, w25, w15 286 stp w22, w23, [x0, #4*2] 287 add w26, w26, w16 288 add w27, w27, w17 289 stp w24, w25, [x0, #4*4] 290 stp w26, w27, [x0, #4*6] 291 292 ldr x16, [sp, #104] 293 /* If the remaining length is not processed, the processing continues for 64 rounds. */ 294 cbnz x30, .Lloop_compress_64 295 296 /* The function returns */ 297 ldp x19, x20, [sp, #8*2] 298 ldp x21, x22, [sp, #8*4] 299 ldp x23, x24, [sp, #8*6] 300 ldp x25, x26, [sp, #8*8] 301 ldp x27, x28, [sp, #8*10] 302 ldp x29, x30, [sp], #112 303.Lend_sha256: 304 ret 305 .size SHA256CompressMultiBlocks, .-SHA256CompressMultiBlocks 306 307/* 308 * Function Description:Performs 64 rounds of compression calculation based on the input plaintext data 309 * and updates the hash value 310 * Function prototype:void SHA256CryptoExt(uint32_t hash[8], const uint8_t *in, uint32_t num); 311 * Input register: 312 * x0: Storage address of the hash value 313 * x1: Pointer to the input data address 314 * x2: Number of 64 rounds of cycles 315 * Modify the register: x1-x4, v0-v5, v16-v23 316 * Output register: None 317 * Function/Macro Call: None 318 * 319 */ 320 .text 321 .balign 16 322 .type SHA256CryptoExt, %function 323SHA256CryptoExt: 324 ld1 {v4.4s-v5.4s}, [x0] 325.Lloop_compress_64_ext: 326 adrp x4, .K256 327 add x4, x4, :lo12:.K256 328 sub x2, x2, #1 329 /* 0-15 */ 330 ld1 {v16.16b-v19.16b}, [x1], #64 331 332 mov v0.16b, v4.16b 333 mov v1.16b, v5.16b 334 335 rev32 v16.16b, v16.16b 336 ld1 {v20.4s}, [x4], #16 337 rev32 v17.16b, v17.16b 338 ld1 {v21.4s}, [x4], #16 339 rev32 v18.16b, v18.16b 340 ld1 {v22.4s}, [x4], #16 341 342 add v20.4s, v20.4s, v16.4s 343 344 rev32 v19.16b, v19.16b 345 ld1 {v23.4s}, [x4], #16 346 347 sha256su0 v16.4s, v17.4s 348 mov v2.16b, v0.16b 349 sha256h q0, q1, v20.4s 350 sha256h2 q1, q2, v20.4s 351 add v21.4s, v21.4s, v17.4s 352 sha256su1 v16.4s, v18.4s, v19.4s 353 ld1 {v20.4s}, [x4], #16 354 355 sha256su0 v17.4s, v18.4s 356 mov v3.16b, v0.16b 357 sha256h q0, q1, v21.4s 358 sha256h2 q1, q3, v21.4s 359 add v22.4s, v22.4s, v18.4s 360 sha256su1 v17.4s, v19.4s, v16.4s 361 ld1 {v21.4s}, [x4], #16 362 363 sha256su0 v18.4s, v19.4s 364 mov v2.16b, v0.16b 365 sha256h q0, q1, v22.4s 366 sha256h2 q1, q2, v22.4s 367 add v23.4s, v23.4s, v19.4s 368 sha256su1 v18.4s, v16.4s, v17.4s 369 ld1 {v22.4s}, [x4], #16 370 371 sha256su0 v19.4s, v16.4s 372 mov v3.16b, v0.16b 373 sha256h q0, q1, v23.4s 374 sha256h2 q1, q3, v23.4s 375 add v20.4s, v20.4s, v16.4s 376 sha256su1 v19.4s, v17.4s, v18.4s 377 ld1 {v23.4s}, [x4], #16 378 379 /* 16-31 */ 380 sha256su0 v16.4s, v17.4s 381 mov v2.16b, v0.16b 382 sha256h q0, q1, v20.4s 383 sha256h2 q1, q2, v20.4s 384 add v21.4s, v21.4s, v17.4s 385 sha256su1 v16.4s, v18.4s, v19.4s 386 ld1 {v20.4s}, [x4], #16 387 388 sha256su0 v17.4s, v18.4s 389 mov v3.16b, v0.16b 390 sha256h q0, q1, v21.4s 391 sha256h2 q1, q3, v21.4s 392 add v22.4s, v22.4s, v18.4s 393 sha256su1 v17.4s, v19.4s, v16.4s 394 ld1 {v21.4s}, [x4], #16 395 396 mov v2.16b, v0.16b 397 sha256su0 v18.4s, v19.4s 398 sha256h q0, q1, v22.4s 399 sha256h2 q1, q2, v22.4s 400 add v23.4s, v23.4s, v19.4s 401 sha256su1 v18.4s, v16.4s, v17.4s 402 ld1 {v22.4s}, [x4], #16 403 404 sha256su0 v19.4s, v16.4s 405 mov v3.16b, v0.16b 406 sha256h q0, q1, v23.4s 407 sha256h2 q1, q3, v23.4s 408 add v20.4s, v20.4s, v16.4s 409 sha256su1 v19.4s, v17.4s, v18.4s 410 ld1 {v23.4s}, [x4], #16 411 412 /* 32-47 */ 413 sha256su0 v16.4s, v17.4s 414 mov v2.16b, v0.16b 415 sha256h q0, q1, v20.4s 416 sha256h2 q1, q2, v20.4s 417 add v21.4s, v21.4s, v17.4s 418 sha256su1 v16.4s, v18.4s, v19.4s 419 ld1 {v20.4s}, [x4], #16 420 421 sha256su0 v17.4s, v18.4s 422 mov v3.16b, v0.16b 423 sha256h q0, q1, v21.4s 424 sha256h2 q1, q3, v21.4s 425 add v22.4s, v22.4s, v18.4s 426 427 sha256su1 v17.4s, v19.4s, v16.4s 428 ld1 {v21.4s}, [x4], #16 429 430 sha256su0 v18.4s, v19.4s 431 mov v2.16b, v0.16b 432 sha256h q0, q1, v22.4s 433 sha256h2 q1, q2, v22.4s 434 add v23.4s, v23.4s, v19.4s 435 sha256su1 v18.4s, v16.4s, v17.4s 436 ld1 {v22.4s}, [x4], #16 437 438 439 sha256su0 v19.4s, v16.4s 440 mov v3.16b, v0.16b 441 sha256h q0, q1, v23.4s 442 sha256h2 q1, q3, v23.4s 443 add v20.4s, v20.4s, v16.4s 444 sha256su1 v19.4s, v17.4s, v18.4s 445 ld1 {v23.4s}, [x4], #16 446 /* 48-63 */ 447 mov v2.16b, v0.16b 448 sha256h q0, q1, v20.4s 449 add v21.4s, v21.4s, v17.4s 450 sha256h2 q1, q2, v20.4s 451 452 mov v3.16b, v0.16b 453 sha256h q0, q1, v21.4s 454 add v22.4s, v22.4s, v18.4s 455 sha256h2 q1, q3, v21.4s 456 457 mov v2.16b, v0.16b 458 sha256h q0, q1, v22.4s 459 add v23.4s, v23.4s, v19.4s 460 sha256h2 q1, q2, v22.4s 461 462 mov v3.16b, v0.16b 463 sha256h q0, q1, v23.4s 464 sha256h2 q1, q3, v23.4s 465 /* Add the original hash value */ 466 add v4.4s, v4.4s, v0.4s 467 add v5.4s, v5.4s, v1.4s 468 cbnz x2, .Lloop_compress_64_ext 469 470 /* Output result */ 471 st1 {v4.4s-v5.4s}, [x0] 472 ret 473 .size SHA256CryptoExt, .-SHA256CryptoExt 474#endif 475