1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#ifdef HITLS_CRYPTO_SHA1 18 19#include "crypt_arm.h" 20 21.arch armv8-a+crypto 22.extern g_cryptArmCpuInfo 23.hidden g_cryptArmCpuInfo 24/* SHA1 used constant value. For the data source, see the RFC3174 document. 25 * K(t) = 5A827999 ( 0 <= t <= 19) 26 * K(t) = 6ED9EBA1 (20 <= t <= 39) 27 * K(t) = 8F1BBCDC (40 <= t <= 59) 28 * K(t) = CA62C1D6 (60 <= t <= 79) 29 */ 30.data 31.balign 64 // Alignment based on the size of the read data block 32.type g_k, %object 33g_k: 34 .long 0x5a827999 35 .long 0x6ed9eba1 36 .long 0x8f1bbcdc 37 .long 0xca62c1d6 38.size g_k, .-g_k 39 40.balign 64 // Alignment based on the size of the read data block 41.type g_kExt, %object 42g_kExt: 43 .long 0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999 //K_00_19 44 .long 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1 //K_20_39 45 .long 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc //K_40_59 46 .long 0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6 //K_60_79 47.size g_kExt, .-g_kExt 48 49/** 50 * Macro Description: 32位Message block扩展Wi 51 * input register: 52 * wi_3: W[i-3] 53 * wi_8: W[i-8] 54 * wi_14: W[i-14] 55 * wi_16: W[i-16] 56 * temp1: temporary register 57 * temp2: temporary register 58 * Modify the register: wi_16 temp1 temp2 59 * Output register: 60 * wi_16: Latest W[i] value, W(i) = S^1(W(i-3) XOR W(i-8) XOR W(i-14) XOR W(i-16)) 61 * Function/Macro Call: NONE 62 */ 63.macro MESSAGE_EXPAND wi_16, wi_14, wi_8, wi_3, temp1, temp2 64 eor \temp1, \wi_14, \wi_16 // W(i-14) XOR W(i-16) 65 eor \temp2, \wi_3, \wi_8 // W(i-3) XOR W(i-8) 66 eor \wi_16, \temp1, \temp2 // W(i-3) XOR W(i-8) XOR W(i-14) XOR W(i-16) 67 ror \wi_16, \wi_16, #31 // Cyclic left shift 1 equals cyclic right shift 31 68.endm 69 70/** 71 * Macro Description: b、e Compute 72 * input register: 73 * k: Constant data 74 * wi: Message block 75 * a、b、e: Intermediate variable of hash value 76 * f: f(B, C, D) 77 * temp1-4: temporary register 78 * Modify the register: b e temp3-temp4 79 * Output register: 80 * b: Indicates the value after a cyclic update. 81 * e: Indicates the value after a cyclic update. 82 * Macro implementation: 83 * e = S^5(A) + f(B, C, D) + E + W(i) + K(i) 84 * b = S^30(B) 85 * Function/Macro Call: NONE 86 */ 87.macro CAL_B_E a, b, e, wi, k, f, temp3, temp4 88 add \temp3, \wi, \k // W(i) + K(i) 89 ror \temp4, \a, #27 // S^5(A) Cyclic shift left 5 equal Cyclic shift right 27 90 91 ror \b, \b, #2 // b = S^30(B) Cyclic shift left 30 equal Cyclic shift right 2 92 add \temp4, \temp4, \temp3 // S^5(A) + W(i) + K(i) 93 add \e, \e, \f // f(B, C, D) + E 94 add \e, \e, \temp4 // f(B, C, D) + E + S^5(A) + W(i) + K(i) 95.endm 96 97/** 98 * Macro Description: Message compression,0~19round data compression 99 * input register: 100 * k: Constant data 101 * wi: Message block 102 * a - h: Intermediate variable of hash value 103 * temp1-4: temporary register 104 * Modify the register: b e temp1-temp4 105 * Output register: 106 * b: Indicates the value after a cyclic update. 107 * e: Indicates the value after a cyclic update. 108 * Macro implementation: f(B, C, D) = (B AND C) OR ((NOT B) AND D) 109 * e = S^5(A) + f(B, C, D) + E + W(i) + K(i) 110 * b = S^30(B) 111 * Function/Macro Call: CAL_B_E 112 */ 113.macro DATA_COMPRE_0_19 a, b, c, d, e, wi, k, temp1, temp2, temp3, temp4 114 and \temp1, \b, \c // b&c 115 bic \temp2, \d, \b // d&(~b) 116 orr \temp1, \temp1, \temp2 // f(B, C, D) 117 118 CAL_B_E \a, \b, \e, \wi, \k, \temp1, \temp3, \temp4 119.endm 120 121/** 122 * Macro Description: Message compression,20~39、60~79round data compression 123 * input register: 124 * k: Constant data 125 * wi: Message block 126 * a - h: Intermediate variable of hash value 127 * temp1-4: temporary register 128 * Modify the register: b e temp1-temp4 129 * Output register: 130 * b: Indicates the value after a cyclic update. 131 * e: Indicates the value after a cyclic update. 132 * Macro implementation: f(B, C, D) = B XOR C XOR D 133 * e = S^5(A) + f(B, C, D) + E + W(i) + K(i) 134 * b = S^30(B) 135 * Function/Macro Call: CAL_B_E 136 */ 137.macro DATA_COMPRE_20_39_60_79 a, b, c, d, e, wi, k, temp1, temp2, temp3, temp4 138 eor \temp2, \b, \c // b&c 139 eor \temp1, \temp2, \d // f(B, C, D) = b&c&d 140 141 CAL_B_E \a, \b, \e, \wi, \k, \temp1, \temp3, \temp4 142.endm 143 144/** 145 * Macro Description: Message compression,40~59round data compression 146 * input register: 147 * k: Constant data 148 * wi: Message block 149 * a - h: Intermediate variable of hash value 150 * temp1-4: temporary register 151 * Modify the register: b e temp1-temp4 152 * Output register: 153 * b: Indicates the value after a cyclic update. 154 * e: Indicates the value after a cyclic update. 155 * Macro implementation: f(B, C, D) = (B AND C) OR (B AND D) OR (C AND D) 156 * e = S^5(A) + f(B, C, D) + E + W(i) + K(i) 157 * b = S^30(B) 158 * Function/Macro Call: CAL_B_E 159 */ 160.macro DATA_COMPRE_40_59 a, b, c, d, e, wi, k, temp1, temp2, temp3, temp4 161 and \temp1, \b, \c // b&c 162 and \temp2, \b, \d // b&d 163 and \temp3, \c, \d // c&d 164 orr \temp1, \temp1, \temp2 // (b&c) or (b&d) 165 orr \temp1, \temp1, \temp3 // f(B, C, D) 166 167 CAL_B_E \a, \b, \e, \wi, \k, \temp1, \temp3, \temp4 168.endm 169 170/** 171 * Function Description: Perform SHA1 compression calculation based on the input message and update the hash value. 172 * Function prototype: static const uint8_t *SHA1_Step(const uint8_t *input, uint32_t len, uint32_t *h) 173 * Input register: 174 * x0: Pointer to the input data address 175 * x1: Message length 176 * x2: Storage address of the hash value 177 * Register usage: w0–w15 store message blocks, x/w16, w17, w28, and w29 are temporary registers, 178 * and x30 stores the hash value address. a to e correspond to w20 to w24. w19 stores the k constant, 179 * x25 stores the message pointer, and x26 stores the remaining message length. 180 * Output register: x0 returns the address of the message for which sha1 calculation is not performed. 181 * Function/Macro Call: DATA_COMPRE_0_19、DATA_COMPRE_20_39_60_79、DATA_COMPRE_40_59、MESSAGE_EXPAND、SHA1CryptoExt 182 */ 183.text 184.balign 16 185.global SHA1_Step 186.type SHA1_Step, %function 187SHA1_Step: 188 .inst 0xd503233f // paciasp 189 cmp x1, #64 190 b.lo .Lend_sha1 191 192 /* If the SHA1 cryptography extension instruction is supported, go to. */ 193 adrp x5, g_cryptArmCpuInfo 194 add x5, x5, :lo12:g_cryptArmCpuInfo 195 ldr x6, [x5] 196 tst x6, #CRYPT_ARM_SHA1 197 bne SHA1CryptoExt 198 199 /* Extended instructions are not supported, Using Base Instructions, Open up stack space, push stack protection */ 200 stp x29, x30, [sp, #-96]! 201 stp x19, x20, [sp, #8*2] 202 stp x21, x22, [sp, #8*4] 203 stp x23, x24, [sp, #8*6] 204 stp x25, x26, [sp, #8*8] 205 stp x27, x28, [sp, #8*10] 206 207 /* load a - e */ 208 ldp w20, w21, [x2] 209 ldp w22, w23, [x2, #4*2] 210 ldr w24, [x2, #4*4] 211 212 mov x30, x2 // x30 address for storing hash values 213 mov x25, x0 // pointer to the x25 store message 214 mov x26, x1 // x26: stores the remaining message length. 215 216.Lloop_sha1_compress: 217 adrp x16, g_k 218 add x16, x16, :lo12:g_k 219 ldr w19, [x16] // load k1 220 221 ldp w0, w1, [x25] // load input value, load 64 bytes at a time 222 ldp w2, w3, [x25, #4*2] 223 ldp w4, w5, [x25, #4*4] 224 ldp w6, w7, [x25, #4*6] 225 ldp w8, w9, [x25, #4*8] 226 ldp w10, w11, [x25, #4*10] 227 ldp w12, w13, [x25, #4*12] 228 ldp w14, w15, [x25, #4*14] 229 230 add x25, x25, #64 // address offset: 64 bytes 231 sub x26, x26, #64 // update the remaining address length. 232 233#ifndef HITLS_BIG_ENDIAN 234 rev w0, w0 235 rev w1, w1 236 rev w2, w2 237 rev w3, w3 238 rev w4, w4 239 rev w5, w5 240 rev w6, w6 241 rev w7, w7 242 rev w8, w8 243 rev w9, w9 244 rev w10, w10 245 rev w11, w11 246 rev w12, w12 247 rev w13, w13 248 rev w14, w14 249 rev w15, w15 250#endif 251 /* 0~19round data compression */ 252 /* a, b, c, d, e, wi, k, temp1, temp2, temp3, temp4 */ 253 DATA_COMPRE_0_19 w20, w21, w22, w23, w24, w0, w19, w16, w17, w28, w29 254 DATA_COMPRE_0_19 w24, w20, w21, w22, w23, w1, w19, w16, w17, w28, w29 255 DATA_COMPRE_0_19 w23, w24, w20, w21, w22, w2, w19, w16, w17, w28, w29 256 DATA_COMPRE_0_19 w22, w23, w24, w20, w21, w3, w19, w16, w17, w28, w29 257 DATA_COMPRE_0_19 w21, w22, w23, w24, w20, w4, w19, w16, w17, w28, w29 258 259 DATA_COMPRE_0_19 w20, w21, w22, w23, w24, w5, w19, w16, w17, w28, w29 260 DATA_COMPRE_0_19 w24, w20, w21, w22, w23, w6, w19, w16, w17, w28, w29 261 DATA_COMPRE_0_19 w23, w24, w20, w21, w22, w7, w19, w16, w17, w28, w29 262 DATA_COMPRE_0_19 w22, w23, w24, w20, w21, w8, w19, w16, w17, w28, w29 263 DATA_COMPRE_0_19 w21, w22, w23, w24, w20, w9, w19, w16, w17, w28, w29 264 265 DATA_COMPRE_0_19 w20, w21, w22, w23, w24, w10, w19, w16, w17, w28, w29 266 DATA_COMPRE_0_19 w24, w20, w21, w22, w23, w11, w19, w16, w17, w28, w29 267 DATA_COMPRE_0_19 w23, w24, w20, w21, w22, w12, w19, w16, w17, w28, w29 268 DATA_COMPRE_0_19 w22, w23, w24, w20, w21, w13, w19, w16, w17, w28, w29 269 DATA_COMPRE_0_19 w21, w22, w23, w24, w20, w14, w19, w16, w17, w28, w29 270 271 DATA_COMPRE_0_19 w20, w21, w22, w23, w24, w15, w19, w16, w17, w28, w29 272 /* Message block extension calculation wi_16, wi_14, wi_8, wi_3, temp1, temp2 */ 273 MESSAGE_EXPAND w0, w2, w8, w13, w16, w17 274 DATA_COMPRE_0_19 w24, w20, w21, w22, w23, w0, w19, w16, w17, w28, w29 275 MESSAGE_EXPAND w1, w3, w9, w14, w16, w17 276 DATA_COMPRE_0_19 w23, w24, w20, w21, w22, w1, w19, w16, w17, w28, w29 277 MESSAGE_EXPAND w2, w4, w10, w15, w16, w17 278 DATA_COMPRE_0_19 w22, w23, w24, w20, w21, w2, w19, w16, w17, w28, w29 279 MESSAGE_EXPAND w3, w5, w11, w0, w16, w17 280 DATA_COMPRE_0_19 w21, w22, w23, w24, w20, w3, w19, w16, w17, w28, w29 281 282 /* 20~39 round data compression */ 283 adrp x16, g_k 284 add x16, x16, :lo12:g_k 285 ldr w19, [x16, #4] // load k2 286 MESSAGE_EXPAND w4, w6, w12, w1, w16, w17 287 DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w4, w19, w16, w17, w28, w29 288 MESSAGE_EXPAND w5, w7, w13, w2, w16, w17 289 DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w5, w19, w16, w17, w28, w29 290 MESSAGE_EXPAND w6, w8, w14, w3, w16, w17 291 DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w6, w19, w16, w17, w28, w29 292 MESSAGE_EXPAND w7, w9, w15, w4, w16, w17 293 DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w7, w19, w16, w17, w28, w29 294 MESSAGE_EXPAND w8, w10, w0, w5, w16, w17 295 DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w8, w19, w16, w17, w28, w29 296 297 MESSAGE_EXPAND w9, w11, w1, w6, w16, w17 298 DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w9, w19, w16, w17, w28, w29 299 MESSAGE_EXPAND w10, w12, w2, w7, w16, w17 300 DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w10, w19, w16, w17, w28, w29 301 MESSAGE_EXPAND w11, w13, w3, w8, w16, w17 302 DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w11, w19, w16, w17, w28, w29 303 MESSAGE_EXPAND w12, w14, w4, w9, w16, w17 304 DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w12, w19, w16, w17, w28, w29 305 MESSAGE_EXPAND w13, w15, w5, w10, w16, w17 306 DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w13, w19, w16, w17, w28, w29 307 308 MESSAGE_EXPAND w14, w0, w6, w11, w16, w17 309 DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w14, w19, w16, w17, w28, w29 310 MESSAGE_EXPAND w15, w1, w7, w12, w16, w17 311 DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w15, w19, w16, w17, w28, w29 312 MESSAGE_EXPAND w0, w2, w8, w13, w16, w17 313 DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w0, w19, w16, w17, w28, w29 314 MESSAGE_EXPAND w1, w3, w9, w14, w16, w17 315 DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w1, w19, w16, w17, w28, w29 316 MESSAGE_EXPAND w2, w4, w10, w15, w16, w17 317 DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w2, w19, w16, w17, w28, w29 318 319 MESSAGE_EXPAND w3, w5, w11, w0, w16, w17 320 DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w3, w19, w16, w17, w28, w29 321 MESSAGE_EXPAND w4, w6, w12, w1, w16, w17 322 DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w4, w19, w16, w17, w28, w29 323 MESSAGE_EXPAND w5, w7, w13, w2, w16, w17 324 DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w5, w19, w16, w17, w28, w29 325 MESSAGE_EXPAND w6, w8, w14, w3, w16, w17 326 DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w6, w19, w16, w17, w28, w29 327 MESSAGE_EXPAND w7, w9, w15, w4, w16, w17 328 DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w7, w19, w16, w17, w28, w29 329 330 /* 40~59 round data compression */ 331 adrp x16, g_k 332 add x16, x16, :lo12:g_k 333 ldr w19, [x16, #8] // load k3 334 MESSAGE_EXPAND w8, w10, w0, w5, w16, w17 335 DATA_COMPRE_40_59 w20, w21, w22, w23, w24, w8, w19, w16, w17, w28, w29 336 MESSAGE_EXPAND w9, w11, w1, w6, w16, w17 337 DATA_COMPRE_40_59 w24, w20, w21, w22, w23, w9, w19, w16, w17, w28, w29 338 MESSAGE_EXPAND w10, w12, w2, w7, w16, w17 339 DATA_COMPRE_40_59 w23, w24, w20, w21, w22, w10, w19, w16, w17, w28, w29 340 MESSAGE_EXPAND w11, w13, w3, w8, w16, w17 341 DATA_COMPRE_40_59 w22, w23, w24, w20, w21, w11, w19, w16, w17, w28, w29 342 MESSAGE_EXPAND w12, w14, w4, w9, w16, w17 343 DATA_COMPRE_40_59 w21, w22, w23, w24, w20, w12, w19, w16, w17, w28, w29 344 345 MESSAGE_EXPAND w13, w15, w5, w10, w16, w17 346 DATA_COMPRE_40_59 w20, w21, w22, w23, w24, w13, w19, w16, w17, w28, w29 347 MESSAGE_EXPAND w14, w0, w6, w11, w16, w17 348 DATA_COMPRE_40_59 w24, w20, w21, w22, w23, w14, w19, w16, w17, w28, w29 349 MESSAGE_EXPAND w15, w1, w7, w12, w16, w17 350 DATA_COMPRE_40_59 w23, w24, w20, w21, w22, w15, w19, w16, w17, w28, w29 351 MESSAGE_EXPAND w0, w2, w8, w13, w16, w17 352 DATA_COMPRE_40_59 w22, w23, w24, w20, w21, w0, w19, w16, w17, w28, w29 353 MESSAGE_EXPAND w1, w3, w9, w14, w16, w17 354 DATA_COMPRE_40_59 w21, w22, w23, w24, w20, w1, w19, w16, w17, w28, w29 355 356 MESSAGE_EXPAND w2, w4, w10, w15, w16, w17 357 DATA_COMPRE_40_59 w20, w21, w22, w23, w24, w2, w19, w16, w17, w28, w29 358 MESSAGE_EXPAND w3, w5, w11, w0, w16, w17 359 DATA_COMPRE_40_59 w24, w20, w21, w22, w23, w3, w19, w16, w17, w28, w29 360 MESSAGE_EXPAND w4, w6, w12, w1, w16, w17 361 DATA_COMPRE_40_59 w23, w24, w20, w21, w22, w4, w19, w16, w17, w28, w29 362 MESSAGE_EXPAND w5, w7, w13, w2, w16, w17 363 DATA_COMPRE_40_59 w22, w23, w24, w20, w21, w5, w19, w16, w17, w28, w29 364 MESSAGE_EXPAND w6, w8, w14, w3, w16, w17 365 DATA_COMPRE_40_59 w21, w22, w23, w24, w20, w6, w19, w16, w17, w28, w29 366 367 MESSAGE_EXPAND w7, w9, w15, w4, w16, w17 368 DATA_COMPRE_40_59 w20, w21, w22, w23, w24, w7, w19, w16, w17, w28, w29 369 MESSAGE_EXPAND w8, w10, w0, w5, w16, w17 370 DATA_COMPRE_40_59 w24, w20, w21, w22, w23, w8, w19, w16, w17, w28, w29 371 MESSAGE_EXPAND w9, w11, w1, w6, w16, w17 372 DATA_COMPRE_40_59 w23, w24, w20, w21, w22, w9, w19, w16, w17, w28, w29 373 MESSAGE_EXPAND w10, w12, w2, w7, w16, w17 374 DATA_COMPRE_40_59 w22, w23, w24, w20, w21, w10, w19, w16, w17, w28, w29 375 MESSAGE_EXPAND w11, w13, w3, w8, w16, w17 376 DATA_COMPRE_40_59 w21, w22, w23, w24, w20, w11, w19, w16, w17, w28, w29 377 378 /* 60~79 round data compression */ 379 adrp x16, g_k 380 add x16, x16, :lo12:g_k 381 ldr w19, [x16, #12] // load k4 382 MESSAGE_EXPAND w12, w14, w4, w9, w16, w17 383 DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w12, w19, w16, w17, w28, w29 384 MESSAGE_EXPAND w13, w15, w5, w10, w16, w17 385 DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w13, w19, w16, w17, w28, w29 386 MESSAGE_EXPAND w14, w0, w6, w11, w16, w17 387 DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w14, w19, w16, w17, w28, w29 388 MESSAGE_EXPAND w15, w1, w7, w12, w16, w17 389 DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w15, w19, w16, w17, w28, w29 390 MESSAGE_EXPAND w0, w2, w8, w13, w16, w17 391 DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w0, w19, w16, w17, w28, w29 392 393 MESSAGE_EXPAND w1, w3, w9, w14, w16, w17 394 DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w1, w19, w16, w17, w28, w29 395 MESSAGE_EXPAND w2, w4, w10, w15, w16, w17 396 DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w2, w19, w16, w17, w28, w29 397 MESSAGE_EXPAND w3, w5, w11, w0, w16, w17 398 DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w3, w19, w16, w17, w28, w29 399 MESSAGE_EXPAND w4, w6, w12, w1, w16, w17 400 DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w4, w19, w16, w17, w28, w29 401 MESSAGE_EXPAND w5, w7, w13, w2, w16, w17 402 DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w5, w19, w16, w17, w28, w29 403 404 MESSAGE_EXPAND w6, w8, w14, w3, w16, w17 405 DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w6, w19, w16, w17, w28, w29 406 MESSAGE_EXPAND w7, w9, w15, w4, w16, w17 407 DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w7, w19, w16, w17, w28, w29 408 MESSAGE_EXPAND w8, w10, w0, w5, w16, w17 409 DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w8, w19, w16, w17, w28, w29 410 MESSAGE_EXPAND w9, w11, w1, w6, w16, w17 411 DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w9, w19, w16, w17, w28, w29 412 MESSAGE_EXPAND w10, w12, w2, w7, w16, w17 413 DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w10, w19, w16, w17, w28, w29 414 415 MESSAGE_EXPAND w11, w13, w3, w8, w16, w17 416 DATA_COMPRE_20_39_60_79 w20, w21, w22, w23, w24, w11, w19, w16, w17, w28, w29 417 MESSAGE_EXPAND w12, w14, w4, w9, w16, w17 418 DATA_COMPRE_20_39_60_79 w24, w20, w21, w22, w23, w12, w19, w16, w17, w28, w29 419 MESSAGE_EXPAND w13, w15, w5, w10, w16, w17 420 DATA_COMPRE_20_39_60_79 w23, w24, w20, w21, w22, w13, w19, w16, w17, w28, w29 421 MESSAGE_EXPAND w14, w0, w6, w11, w16, w17 422 DATA_COMPRE_20_39_60_79 w22, w23, w24, w20, w21, w14, w19, w16, w17, w28, w29 423 MESSAGE_EXPAND w15, w1, w7, w12, w16, w17 424 DATA_COMPRE_20_39_60_79 w21, w22, w23, w24, w20, w15, w19, w16, w17, w28, w29 425 426 /* load a - e */ 427 ldp w0, w1, [x30] 428 ldp w2, w3, [x30, #4*2] 429 ldr w4, [x30, #4*4] 430 431 /* H0 = H0 + A, H1 = H1 + B, H2 = H2 + C, H3 = H3 + D, H4 = H4 + E */ 432 add w20, w20, w0 433 add w21, w21, w1 434 add w22, w22, w2 435 add w23, w23, w3 436 add w24, w24, w4 437 438 stp w20, w21, [x30] 439 stp w22, w23, [x30, #4*2] 440 str w24, [x30, #4*4] 441 442 cmp x26, #64 443 b.hs .Lloop_sha1_compress 444 445 /* returns the address of the message for which SHA1 calculation is not performed. */ 446 mov x0, x25 447 448 /* pop-stack */ 449 ldp x19, x20, [sp, #8*2] 450 ldp x21, x22, [sp, #8*4] 451 ldp x23, x24, [sp, #8*6] 452 ldp x25, x26, [sp, #8*8] 453 ldp x27, x28, [sp, #8*10] 454 ldp x29, x30, [sp], #96 455 456.Lend_sha1: 457 .inst 0xd50323bf // autiasp 458 ret 459.size SHA1_Step, .-SHA1_Step 460 461/** 462 * Function Description: Based on the input message, compress the SHA1 dedicated instruction and 463 * update the hash value. 464 * Function prototype: static const uint8_t *SHA1CryptoExt(const uint8_t *input, uint32_t len, uint32_t *h) 465 * Input register: 466 * x0: Pointer to the input data address 467 * x1: Message length 468 * x2: Storage address of the hash value 469 * Register usage: v0–v3 stores k0–k3, s5 stores e temporarily, v6 stores abcd, and v7 stores e, 470 * V23–V26 stores w0–w15 and recycles w16–w79. V19–v22 stores w+k calculation results. 471 * V16 is used as the 0 register. v17 stores abcd and v18 stores e. v16 is used together with v6 and v7. 472 * Output register: x0 returns the address of the message for which sha1 calculation is not performed. 473 * Function/Macro Call: NONE 474 */ 475.text 476.balign 16 477.type SHA1CryptoExt, %function 478SHA1CryptoExt: 479 /* load k */ 480 adrp x3, g_kExt 481 add x3, x3, :lo12:g_kExt 482 ld1 {v0.4s-v3.4s}, [x3] 483 484 /* load a - e */ 485 ld1 {v17.4s}, [x2] 486 ld1 {v6.4s}, [x2], #16 487 ld1 {v18.s}[0], [x2] 488 ld1 {v7.s}[0], [x2] 489 sub x2, x2, #16 490 491 eor v16.16b, v16.16b, v16.16b 492 493.Lloop_sha1_ext_compress: 494 495 /* load w */ 496 ld1 {v23.4s-v26.4s}, [x0], #64 497 sub x1, x1, #64 // update the remaining address length. 498 499 /* little endian inversion */ 500 501#ifndef HITLS_BIG_ENDIAN 502 rev32 v23.16b, v23.16b 503 rev32 v24.16b, v24.16b 504 rev32 v25.16b, v25.16b 505 rev32 v26.16b, v26.16b 506#endif 507 508 add v19.4s, v0.4s, v23.4s // k0+w[3:0] 509 add v20.4s, v0.4s, v24.4s // k0+w[4:7] 510 add v21.4s, v0.4s, v25.4s // k0+w[11:8] 511 add v22.4s, v0.4s, v26.4s // k0+w[15:12] 512 513 /* [0:16] data compression */ 514 sha1su0 v23.4s, v24.4s, v25.4s // w[16:20] 515 sha1h s5, s6 // a -> e 516 sha1c q6, s7, v19.4s // a, b, c, d -> a, b, c, d 517 sha1su1 v23.4s, v26.4s 518 519 sha1su0 v24.4s, v25.4s, v26.4s 520 sha1h s7, s6 521 sha1c q6, s5, v20.4s 522 sha1su1 v24.4s, v23.4s 523 524 sha1su0 v25.4s, v26.4s, v23.4s 525 sha1h s5, s6 526 sha1c q6, s7, v21.4s 527 sha1su1 v25.4s, v24.4s 528 529 sha1su0 v26.4s, v23.4s, v24.4s 530 sha1h s7, s6 531 sha1c q6, s5, v22.4s 532 sha1su1 v26.4s, v25.4s 533 534 add v19.4s, v0.4s, v23.4s // k0+w[19:16] 535 add v20.4s, v1.4s, v24.4s // k1+w[23:20] 536 add v21.4s, v1.4s, v25.4s // k1+w[27:24] 537 add v22.4s, v1.4s, v26.4s // k1+w[31:28] 538 539 /* [16:20] data compression */ 540 sha1su0 v23.4s, v24.4s, v25.4s 541 sha1h s5, s6 542 sha1c q6, s7, v19.4s 543 sha1su1 v23.4s, v26.4s 544 545 /* [20:40] data compression */ 546 sha1su0 v24.4s, v25.4s, v26.4s 547 sha1h s7, s6 548 sha1p q6, s5, v20.4s 549 sha1su1 v24.4s, v23.4s 550 551 sha1su0 v25.4s, v26.4s, v23.4s 552 sha1h s5, s6 553 sha1p q6, s7, v21.4s 554 sha1su1 v25.4s, v24.4s 555 556 sha1su0 v26.4s, v23.4s, v24.4s 557 sha1h s7, s6 558 sha1p q6, s5, v22.4s 559 sha1su1 v26.4s, v25.4s 560 561 add v19.4s, v1.4s, v23.4s // k1+w[35:32] 562 add v20.4s, v1.4s, v24.4s // k1+w[39:36] 563 add v21.4s, v2.4s, v25.4s // k2+w[43:40] 564 add v22.4s, v2.4s, v26.4s // k2+w[47:44] 565 566 sha1su0 v23.4s, v24.4s, v25.4s 567 sha1h s5, s6 568 sha1p q6, s7, v19.4s 569 sha1su1 v23.4s, v26.4s 570 571 sha1su0 v24.4s, v25.4s, v26.4s 572 sha1h s7, s6 573 sha1p q6, s5, v20.4s 574 sha1su1 v24.4s, v23.4s 575 576 /* [40:60] data compression */ 577 sha1su0 v25.4s, v26.4s, v23.4s 578 sha1h s5, s6 579 sha1m q6, s7, v21.4s 580 sha1su1 v25.4s, v24.4s 581 582 sha1su0 v26.4s, v23.4s, v24.4s 583 sha1h s7, s6 584 sha1m q6, s5, v22.4s 585 sha1su1 v26.4s, v25.4s 586 587 add v19.4s, v2.4s, v23.4s // k2+w[51:48] 588 add v20.4s, v2.4s, v24.4s // k2+w[55:52] 589 add v21.4s, v2.4s, v25.4s // k2+w[59:56] 590 add v22.4s, v3.4s, v26.4s // k3+w[63:60] 591 592 sha1su0 v23.4s, v24.4s, v25.4s 593 sha1h s5, s6 594 sha1m q6, s7, v19.4s 595 sha1su1 v23.4s, v26.4s 596 597 sha1su0 v24.4s, v25.4s, v26.4s 598 sha1h s7, s6 599 sha1m q6, s5, v20.4s 600 sha1su1 v24.4s, v23.4s 601 602 sha1su0 v25.4s, v26.4s, v23.4s 603 sha1h s5, s6 604 sha1m q6, s7, v21.4s 605 sha1su1 v25.4s, v24.4s 606 607 /* [60:80] data compression */ 608 sha1su0 v26.4s, v23.4s, v24.4s 609 sha1h s7, s6 610 sha1p q6, s5, v22.4s 611 sha1su1 v26.4s, v25.4s 612 613 add v19.4s, v3.4s, v23.4s // k3+w[67:64] 614 add v20.4s, v3.4s, v24.4s // k3+w[71:68] 615 add v21.4s, v3.4s, v25.4s // k3+w[75:72] 616 add v22.4s, v3.4s, v26.4s // k3+w[79:76] 617 618 sha1h s5, s6 619 sha1p q6, s7, v19.4s 620 621 sha1h s7, s6 622 sha1p q6, s5, v20.4s 623 624 sha1h s5, s6 625 sha1p q6, s7, v21.4s 626 627 sha1h s7, s6 628 sha1p q6, s5, v22.4s 629 630 /* calculate H0 H1 H2 H3 H4 */ 631 add v17.4s, v17.4s, v6.4s 632 add v18.4s, v18.4s, v7.4s 633 634 add v6.4s, v17.4s, v16.4s 635 add v7.4s, v18.4s, v16.4s 636 637 cmp x1, #64 638 b.hs .Lloop_sha1_ext_compress 639 640 st1 {v17.4s}, [x2], #16 641 st1 {v18.s}[0], [x2] 642 643 ret 644.size SHA1CryptoExt, .-SHA1CryptoExt 645 646#endif 647