1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#ifdef HITLS_CRYPTO_SM3 18 19.file "sm3_x86_64.s" 20.text 21 22.set A,%r8d 23.set B,%r9d 24.set C,%r10d 25.set D,%r11d 26.set E,%r12d 27.set F,%r13d 28.set G,%r14d 29.set H,%r15d 30 31.set STATE,%rdi 32.set DATA,%rsi 33.set NUM,%rdx 34 35.set ADDR,%rax 36.set BOOL_OUT,%eax 37.set SS1,%ebx 38.set SS2,%eax 39 40.set X0,%xmm0 41.set X1,%xmm1 42.set X2,%xmm2 43.set X3,%xmm3 44.set X4,%xmm4 45.set X5,%xmm5 46.set X6,%xmm6 47.set X7,%xmm7 48.set R16,%xmm13 49.set R24,%xmm14 50.set SHUFFLEMASK,%xmm15 51 52.macro FF0 X Y Z 53 # X ^ Y ^ Z 54 movl \X,%eax 55 xorl \Y,%eax 56 xorl \Z,%eax 57.endm 58 59.macro FF1 X Y Z 60 # (X & Y) | (X & Z) | (Y & Z) 61 # = (X & (Y | Z)) | (Y & Z) 62 movl \Y,%eax 63 movl %eax,%ebx 64 orl \Z,%eax 65 andl \Z,%ebx 66 andl \X,%eax 67 orl %ebx,%eax 68.endm 69 70.macro GG0 X Y Z 71 FF0 \X \Y \Z 72.endm 73 74.macro GG1 X Y Z 75 # (X & Y) | (~X & Z) 76 movl \X,%ebx 77 andn \Z,%ebx,%eax 78 andl \Y,%ebx 79 orl %ebx,%eax 80.endm 81 82.macro P0 X 83 rorx $15,\X,%eax 84 rorx $23,\X,%ebx 85 xorl %eax,\X 86 xorl %ebx,\X 87.endm 88 89.macro P1 X 90 rorx $9,\X,%eax 91 rorx $17,\X,%ebx 92 xorl %eax,\X 93 xorl %ebx,\X 94.endm 95 96.macro ROUND FF GG Ar Br Cr Dr Er Fr Gr Hr TJ 97 # A <<< 12 98 rorx $20,\Ar,%eax 99 # SS1 (%ebx) <- ((A <<< 12) + E + (Tj <<< (jmod32))) <<< 7 100 # pre-computed TJ = Tj <<< (jmod32) 101 movl %eax,%ebx 102 addl \Er,%ebx 103 addl $\TJ,%ebx 104 rorx $25,%ebx,SS1 105 # SS2 (%eax) <- SS1 ^ (A <<< 12) 106 xorl SS1,SS2 107 # TT1 (D) <- FF(A,B,C) + D + SS2 + W(i)' 108 # TT2 (H) <- GG(E,F,G) + H + SS1 + W(i) 109 addl SS2,\Dr 110 addl SS1,\Hr 111 # FF(A,B,C) 112 \FF \Ar \Br \Cr 113 addl BOOL_OUT,\Dr 114 # GG(E,F,G) 115 \GG \Er \Fr \Gr 116 addl BOOL_OUT,\Hr 117 # B <- B <<< 9 118 rorx $23,\Br,\Br 119 # F <- F <<< 19 120 rorx $13,\Fr,\Fr 121 # P0(TT2) 122 P0 \Hr 123.endm 124 125.macro ROUND_00_15 Ar Br Cr Dr Er Fr Gr Hr TJ WADDR WPADDR 126 # H <- H + W(i) 127 # D <- D + W(i)' 128 addl \WADDR(%rsp),\Hr 129 addl \WPADDR(%rsp),\Dr 130 ROUND FF0 GG0 \Ar \Br \Cr \Dr \Er \Fr \Gr \Hr \TJ 131.endm 132 133.macro ROUND_16_63 Ar Br Cr Dr Er Fr Gr Hr TJ WADDR WPADDR 134 # H <- H + W(i) 135 # D <- D + W(i)' 136 addl \WADDR(%rsp),\Hr 137 addl \WPADDR(%rsp),\Dr 138 ROUND FF1 GG1 \Ar \Br \Cr \Dr \Er \Fr \Gr \Hr \TJ 139.endm 140 141.macro ROTATE IN OUT LEFT RIGHT 142 vpslld $\LEFT,\IN,%xmm6 143 vpsrld $\RIGHT,\IN,%xmm7 144 vpxor %xmm6,%xmm7,\OUT 145.endm 146 147.macro WORD_SCHEDULER_00_11 I 148 # W'(i) <- W(i) ^ W(i+4) 149 # i = 0, ... ,11 150 movl \I(%rsp), %ecx # load W(i) 151 xorl \I+4*4(%rsp),%ecx # W'(i) <- W(i) ^ W(i+4) 152 movl %ecx,284(%rsp) # store W(i)' 153.endm 154 155.macro WORD_SCHEDULER_12_63 I 156 # W(i) <- P1( W(i-16) ^ W(i-9) ^ ( W(i-3) <<< 15 ) ) ^ ( W(i-13) <<< 7 ) ^ W(i-6) 157 # i = 12, ... ,63 158 rorx $17,\I+13*4(%rsp),%ecx # W(i-3) 159 xorl \I(%rsp),%ecx # W(i-16) 160 xorl \I+7*4(%rsp),%ecx # W(i-9) 161 P1 %ecx 162 rorx $25,\I+3*4(%rsp),%eax # W(i-13) 163 xorl \I+10*4(%rsp),%eax # W(i-6) 164 xorl %eax,%ecx 165 # Store W(i) and W'(i) 166 movl %ecx,\I+16*4(%rsp) # store W(i) 167 xorl \I+12*4(%rsp),%ecx # W'(i) <- W(i) ^ W(i+4) 168 movl %ecx,284(%rsp) # store W(i)' 169.endm 170 171.macro LOAD_WORD_FOR_SCHEDULER START 172 vmovdqu \START(%rsp),X0 173 vmovdqu \START+12(%rsp),X1 174 vmovdqu \START+28(%rsp),X2 175 vmovdqu \START+40(%rsp),X3 176 vmovdqu \START+48(%rsp),X4 177 vmovdqu \START+52(%rsp),X5 178.endm 179 180.macro LOAD_WORD_FOR_SCHEDULER_FAST START W0 W1 W2 W3 W4 W5 181 vmovdqu \START+12(%rsp),\W1 182 vmovdqu \START+48(%rsp),\W4 183 vmovdqu \START+52(%rsp),\W5 184.endm 185 186.macro MESSAGE_SCHEDULER START W0 W1 W2 W3 W4 W5 187 vpxor \W2,\W0,\W0 188 ROTATE \W5,\W2,15,17 189 vpxor \W2,\W0,\W0 190 191 # P1 192 vpshufb R16,\W0,X6 193 vpshufb R24,\W0,X7 194 vpxor X6,X7,X7 195 ROTATE X7,X7,31,1 196 vpxor X7,\W0,\W0 197 ROTATE \W1,\W2,7,25 198 vpxor \W2,\W0,\W0 199 vpxor \W3,\W0,\W0 200 # W'(i) <- W(i) ^ W(i+4) 201 vpxor \W0,\W4,\W4 202 203 vmovdqu \W0,\START+64(%rsp) 204 vmovdqu \W4,284(%rsp) 205.endm 206 207.macro MESSAGE_SCHEDULER_FAST START W0 W1 W2 W3 W4 W5 208 LOAD_WORD_FOR_SCHEDULER_FAST \START \W0 \W1 \W2 \W3 \W4 \W5 209 MESSAGE_SCHEDULER \START \W0 \W1 \W2 \W3 \W4 \W5 210.endm 211 212 213##### SM3 ##### 214# void SM3_CompressSIMD(uint32_t state[8], const uint8_t *data, uint32_t blockCnt) 215# state|out %rdi 32 bytes 216# p %rsi 217# num %rdx 218.globl SM3_CompressSIMD 219.type SM3_CompressSIMD, @function 220.align 64 221SM3_CompressSIMD: 222 testq NUM,NUM 223 jz .Lsm3_avx_ret 224 225 # Store Registers 226 subq $348,%rsp 227 movq %rbx,300(%rsp) 228 movq %rbp,8+300(%rsp) 229 movq %r12,16+300(%rsp) 230 movq %r13,24+300(%rsp) 231 movq %r14,32+300(%rsp) 232 movq %r15,40+300(%rsp) 233 234.Lsm3_avx_init: 235 leaq MASKS(%rip),ADDR 236 vmovdqa (ADDR),SHUFFLEMASK 237 vmovdqa 16(ADDR),R16 238 vmovdqa 32(ADDR),R24 239 240.Lsm3_avx_update: 241 # Load Data (Big Endian) 242 vmovdqu (DATA),%xmm0 243 vmovdqu 16(DATA),%xmm1 244 vmovdqu 32(DATA),%xmm2 245 vmovdqu 48(DATA),%xmm3 246 vpshufb SHUFFLEMASK,%xmm0,%xmm0 247 vpshufb SHUFFLEMASK,%xmm1,%xmm1 248 vpshufb SHUFFLEMASK,%xmm2,%xmm2 249 vpshufb SHUFFLEMASK,%xmm3,%xmm3 250 vmovdqu %xmm0,(%rsp) 251 vmovdqu %xmm1,16(%rsp) 252 vmovdqu %xmm2,32(%rsp) 253 vmovdqu %xmm3,48(%rsp) 254 vpxor %xmm1,%xmm0,%xmm0 255 vpxor %xmm2,%xmm1,%xmm1 256 vpxor %xmm3,%xmm2,%xmm2 257 258 # Load State 259 movl (STATE),A 260 movl 4(STATE),B 261 movl 8(STATE),C 262 movl 12(STATE),D 263 movl 16(STATE),E 264 movl 20(STATE),F 265 movl 24(STATE),G 266 movl 28(STATE),H 267 268 # ROUND 0-11 269 vmovdqu %xmm0,284(%rsp) 270 ROUND_00_15 A B C D E F G H 0x79CC4519 0 284 271 ROUND_00_15 D A B C H E F G 0xF3988A32 4 288 272 ROUND_00_15 C D A B G H E F 0xE7311465 8 292 273 ROUND_00_15 B C D A F G H E 0xCE6228CB 12 296 274 vmovdqu %xmm1,284(%rsp) 275 ROUND_00_15 A B C D E F G H 0x9CC45197 16 284 276 ROUND_00_15 D A B C H E F G 0x3988A32F 20 288 277 ROUND_00_15 C D A B G H E F 0x7311465E 24 292 278 ROUND_00_15 B C D A F G H E 0xE6228CBC 28 296 279 vmovdqu %xmm2,284(%rsp) 280 ROUND_00_15 A B C D E F G H 0xCC451979 32 284 281 ROUND_00_15 D A B C H E F G 0x988A32F3 36 288 282 ROUND_00_15 C D A B G H E F 0x311465E7 40 292 283 ROUND_00_15 B C D A F G H E 0x6228CBCE 44 296 284 # ROUND 12-15 285 LOAD_WORD_FOR_SCHEDULER 0 286 MESSAGE_SCHEDULER 0 X0 X1 X2 X3 X4 X5 287 ROUND_00_15 A B C D E F G H 0xC451979C 48 284 288 ROUND_00_15 D A B C H E F G 0x88A32F39 52 288 289 ROUND_00_15 C D A B G H E F 0x11465E73 56 292 290 MESSAGE_SCHEDULER_FAST 12 X1 X0 X3 X5 X4 X2 291 ROUND_00_15 B C D A F G H E 0x228CBCE6 60 284 292 # ROUND 16-63 293 ROUND_16_63 A B C D E F G H 0x9D8A7A87 64 288 294 ROUND_16_63 D A B C H E F G 0x3B14F50F 68 292 295 MESSAGE_SCHEDULER_FAST 24 X0 X1 X5 X2 X4 X3 296 ROUND_16_63 C D A B G H E F 0x7629EA1E 72 284 297 ROUND_16_63 B C D A F G H E 0xEC53D43C 76 288 298 ROUND_16_63 A B C D E F G H 0xD8A7A879 80 292 299 MESSAGE_SCHEDULER_FAST 36 X1 X0 X2 X3 X4 X5 300 ROUND_16_63 D A B C H E F G 0xB14F50F3 84 284 301 ROUND_16_63 C D A B G H E F 0x629EA1E7 88 288 302 ROUND_16_63 B C D A F G H E 0xC53D43CE 92 292 303 MESSAGE_SCHEDULER_FAST 48 X0 X1 X3 X5 X4 X2 304 ROUND_16_63 A B C D E F G H 0x8A7A879D 96 284 305 ROUND_16_63 D A B C H E F G 0x14F50F3B 100 288 306 ROUND_16_63 C D A B G H E F 0x29EA1E76 104 292 307 MESSAGE_SCHEDULER_FAST 60 X1 X0 X5 X2 X4 X3 308 ROUND_16_63 B C D A F G H E 0x53D43CEC 108 284 309 ROUND_16_63 A B C D E F G H 0xA7A879D8 112 288 310 ROUND_16_63 D A B C H E F G 0x4F50F3B1 116 292 311 MESSAGE_SCHEDULER_FAST 72 X0 X1 X2 X3 X4 X5 312 ROUND_16_63 C D A B G H E F 0x9EA1E762 120 284 313 ROUND_16_63 B C D A F G H E 0x3D43CEC5 124 288 314 ROUND_16_63 A B C D E F G H 0x7A879D8A 128 292 315 MESSAGE_SCHEDULER_FAST 84 X1 X0 X3 X5 X4 X2 316 ROUND_16_63 D A B C H E F G 0xF50F3B14 132 284 317 ROUND_16_63 C D A B G H E F 0xEA1E7629 136 288 318 ROUND_16_63 B C D A F G H E 0xD43CEC53 140 292 319 MESSAGE_SCHEDULER_FAST 96 X0 X1 X5 X2 X4 X3 320 ROUND_16_63 A B C D E F G H 0xA879D8A7 144 284 321 ROUND_16_63 D A B C H E F G 0x50F3B14F 148 288 322 ROUND_16_63 C D A B G H E F 0xA1E7629E 152 292 323 MESSAGE_SCHEDULER_FAST 108 X1 X0 X2 X3 X4 X5 324 ROUND_16_63 B C D A F G H E 0x43CEC53D 156 284 325 ROUND_16_63 A B C D E F G H 0x879D8A7A 160 288 326 ROUND_16_63 D A B C H E F G 0x0F3B14F5 164 292 327 MESSAGE_SCHEDULER_FAST 120 X0 X1 X3 X5 X4 X2 328 ROUND_16_63 C D A B G H E F 0x1E7629EA 168 284 329 ROUND_16_63 B C D A F G H E 0x3CEC53D4 172 288 330 ROUND_16_63 A B C D E F G H 0x79D8A7A8 176 292 331 MESSAGE_SCHEDULER_FAST 132 X1 X0 X5 X2 X4 X3 332 ROUND_16_63 D A B C H E F G 0xF3B14F50 180 284 333 ROUND_16_63 C D A B G H E F 0xE7629EA1 184 288 334 ROUND_16_63 B C D A F G H E 0xCEC53D43 188 292 335 MESSAGE_SCHEDULER_FAST 144 X0 X1 X2 X3 X4 X5 336 ROUND_16_63 A B C D E F G H 0x9D8A7A87 192 284 337 ROUND_16_63 D A B C H E F G 0x3B14F50F 196 288 338 ROUND_16_63 C D A B G H E F 0x7629EA1E 200 292 339 MESSAGE_SCHEDULER_FAST 156 X1 X0 X3 X5 X4 X2 340 ROUND_16_63 B C D A F G H E 0xEC53D43C 204 284 341 ROUND_16_63 A B C D E F G H 0xD8A7A879 208 288 342 ROUND_16_63 D A B C H E F G 0xB14F50F3 212 292 343 MESSAGE_SCHEDULER_FAST 168 X0 X1 X5 X2 X4 X3 344 ROUND_16_63 C D A B G H E F 0x629EA1E7 216 284 345 ROUND_16_63 B C D A F G H E 0xC53D43CE 220 288 346 ROUND_16_63 A B C D E F G H 0x8A7A879D 224 292 347 MESSAGE_SCHEDULER_FAST 180 X1 X0 X2 X3 X4 X5 348 ROUND_16_63 D A B C H E F G 0x14F50F3B 228 284 349 ROUND_16_63 C D A B G H E F 0x29EA1E76 232 288 350 ROUND_16_63 B C D A F G H E 0x53D43CEC 236 292 351 MESSAGE_SCHEDULER_FAST 192 X0 X1 X3 X5 X4 X2 352 ROUND_16_63 A B C D E F G H 0xA7A879D8 240 284 353 ROUND_16_63 D A B C H E F G 0x4F50F3B1 244 288 354 ROUND_16_63 C D A B G H E F 0x9EA1E762 248 292 355 WORD_SCHEDULER_12_63 204 356 ROUND_16_63 B C D A F G H E 0x3D43CEC5 252 284 357 358 xorl A,(STATE) 359 xorl B,4(STATE) 360 xorl C,8(STATE) 361 xorl D,12(STATE) 362 xorl E,16(STATE) 363 xorl F,20(STATE) 364 xorl G,24(STATE) 365 xorl H,28(STATE) 366 367 leaq 64(DATA),DATA 368 decq NUM 369 jz .Lsm3_avx_final 370 jmp .Lsm3_avx_update 371 372.Lsm3_avx_final: 373 vzeroall 374 375 # Clear Context 376 xorq %r8,%r8 377 xorq %r9,%r9 378 xorq %r10,%r10 379 xorq %r11,%r11 380 # Restore Registers 381 movq 300(%rsp),%rbx 382 movq 8+300(%rsp),%rbp 383 movq 16+300(%rsp),%r12 384 movq 24+300(%rsp),%r13 385 movq 32+300(%rsp),%r14 386 movq 40+300(%rsp),%r15 387 addq $348,%rsp 388 389.Lsm3_avx_ret: 390 ret 391.size SM3_CompressSIMD, .-SM3_CompressSIMD 392 393##### SM3 ##### 394# void SM3_CompressAsm(uint32_t state[8], const uint8_t *data, uint32_t blockCnt) 395# state|out %rdi 32 bytes 396# p %rsi 397# num %rdx 398.globl SM3_CompressAsm 399.type SM3_CompressAsm, @function 400.align 64 401SM3_CompressAsm: 402 testq NUM,NUM 403 jz .Lsm3_ret 404 405 # Store Registers 406 subq $348,%rsp 407 movq %rbx,300(%rsp) 408 movq %rbp,8+300(%rsp) 409 movq %r12,16+300(%rsp) 410 movq %r13,24+300(%rsp) 411 movq %r14,32+300(%rsp) 412 movq %r15,40+300(%rsp) 413 414.Lsm3_loop: 415 # Load Data (Big Endian) 416 movl (DATA),%r8d 417 movl 4(DATA),%r9d 418 movl 8(DATA),%r10d 419 movl 12(DATA),%r11d 420 movbe %r8d,(%rsp) 421 movbe %r9d,4(%rsp) 422 movbe %r10d,8(%rsp) 423 movbe %r11d,12(%rsp) 424 movl 16(DATA),%r8d 425 movl 20(DATA),%r9d 426 movl 24(DATA),%r10d 427 movl 28(DATA),%r11d 428 movbe %r8d,16(%rsp) 429 movbe %r9d,20(%rsp) 430 movbe %r10d,24(%rsp) 431 movbe %r11d,28(%rsp) 432 movl 32(DATA),%r8d 433 movl 36(DATA),%r9d 434 movl 40(DATA),%r10d 435 movl 44(DATA),%r11d 436 movbe %r8d,32(%rsp) 437 movbe %r9d,36(%rsp) 438 movbe %r10d,40(%rsp) 439 movbe %r11d,44(%rsp) 440 movl 48(DATA),%r8d 441 movl 52(DATA),%r9d 442 movl 56(DATA),%r10d 443 movl 60(DATA),%r11d 444 movbe %r8d,48(%rsp) 445 movbe %r9d,52(%rsp) 446 movbe %r10d,56(%rsp) 447 movbe %r11d,60(%rsp) 448 449 # Load State 450 movl (STATE),A 451 movl 4(STATE),B 452 movl 8(STATE),C 453 movl 12(STATE),D 454 movl 16(STATE),E 455 movl 20(STATE),F 456 movl 24(STATE),G 457 movl 28(STATE),H 458 459 # ROUND 0-11 460 WORD_SCHEDULER_00_11 0 461 ROUND_00_15 A B C D E F G H 0x79CC4519 0 284 462 WORD_SCHEDULER_00_11 4 463 ROUND_00_15 D A B C H E F G 0xF3988A32 4 284 464 WORD_SCHEDULER_00_11 8 465 ROUND_00_15 C D A B G H E F 0xE7311465 8 284 466 WORD_SCHEDULER_00_11 12 467 ROUND_00_15 B C D A F G H E 0xCE6228CB 12 284 468 WORD_SCHEDULER_00_11 16 469 ROUND_00_15 A B C D E F G H 0x9CC45197 16 284 470 WORD_SCHEDULER_00_11 20 471 ROUND_00_15 D A B C H E F G 0x3988A32F 20 284 472 WORD_SCHEDULER_00_11 24 473 ROUND_00_15 C D A B G H E F 0x7311465E 24 284 474 WORD_SCHEDULER_00_11 28 475 ROUND_00_15 B C D A F G H E 0xE6228CBC 28 284 476 WORD_SCHEDULER_00_11 32 477 ROUND_00_15 A B C D E F G H 0xCC451979 32 284 478 WORD_SCHEDULER_00_11 36 479 ROUND_00_15 D A B C H E F G 0x988A32F3 36 284 480 WORD_SCHEDULER_00_11 40 481 ROUND_00_15 C D A B G H E F 0x311465E7 40 284 482 WORD_SCHEDULER_00_11 44 483 ROUND_00_15 B C D A F G H E 0x6228CBCE 44 284 484 # ROUND 12-15 485 WORD_SCHEDULER_12_63 0 486 ROUND_00_15 A B C D E F G H 0xC451979C 48 284 487 WORD_SCHEDULER_12_63 4 488 ROUND_00_15 D A B C H E F G 0x88A32F39 52 284 489 WORD_SCHEDULER_12_63 8 490 ROUND_00_15 C D A B G H E F 0x11465E73 56 284 491 WORD_SCHEDULER_12_63 12 492 ROUND_00_15 B C D A F G H E 0x228CBCE6 60 284 493 # ROUND 16-63 494 WORD_SCHEDULER_12_63 16 495 ROUND_16_63 A B C D E F G H 0x9D8A7A87 64 284 496 WORD_SCHEDULER_12_63 20 497 ROUND_16_63 D A B C H E F G 0x3B14F50F 68 284 498 WORD_SCHEDULER_12_63 24 499 ROUND_16_63 C D A B G H E F 0x7629EA1E 72 284 500 WORD_SCHEDULER_12_63 28 501 ROUND_16_63 B C D A F G H E 0xEC53D43C 76 284 502 WORD_SCHEDULER_12_63 32 503 ROUND_16_63 A B C D E F G H 0xD8A7A879 80 284 504 WORD_SCHEDULER_12_63 36 505 ROUND_16_63 D A B C H E F G 0xB14F50F3 84 284 506 WORD_SCHEDULER_12_63 40 507 ROUND_16_63 C D A B G H E F 0x629EA1E7 88 284 508 WORD_SCHEDULER_12_63 44 509 ROUND_16_63 B C D A F G H E 0xC53D43CE 92 284 510 WORD_SCHEDULER_12_63 48 511 ROUND_16_63 A B C D E F G H 0x8A7A879D 96 284 512 WORD_SCHEDULER_12_63 52 513 ROUND_16_63 D A B C H E F G 0x14F50F3B 100 284 514 WORD_SCHEDULER_12_63 56 515 ROUND_16_63 C D A B G H E F 0x29EA1E76 104 284 516 WORD_SCHEDULER_12_63 60 517 ROUND_16_63 B C D A F G H E 0x53D43CEC 108 284 518 WORD_SCHEDULER_12_63 64 519 ROUND_16_63 A B C D E F G H 0xA7A879D8 112 284 520 WORD_SCHEDULER_12_63 68 521 ROUND_16_63 D A B C H E F G 0x4F50F3B1 116 284 522 WORD_SCHEDULER_12_63 72 523 ROUND_16_63 C D A B G H E F 0x9EA1E762 120 284 524 WORD_SCHEDULER_12_63 76 525 ROUND_16_63 B C D A F G H E 0x3D43CEC5 124 284 526 WORD_SCHEDULER_12_63 80 527 ROUND_16_63 A B C D E F G H 0x7A879D8A 128 284 528 WORD_SCHEDULER_12_63 84 529 ROUND_16_63 D A B C H E F G 0xF50F3B14 132 284 530 WORD_SCHEDULER_12_63 88 531 ROUND_16_63 C D A B G H E F 0xEA1E7629 136 284 532 WORD_SCHEDULER_12_63 92 533 ROUND_16_63 B C D A F G H E 0xD43CEC53 140 284 534 WORD_SCHEDULER_12_63 96 535 ROUND_16_63 A B C D E F G H 0xA879D8A7 144 284 536 WORD_SCHEDULER_12_63 100 537 ROUND_16_63 D A B C H E F G 0x50F3B14F 148 284 538 WORD_SCHEDULER_12_63 104 539 ROUND_16_63 C D A B G H E F 0xA1E7629E 152 284 540 WORD_SCHEDULER_12_63 108 541 ROUND_16_63 B C D A F G H E 0x43CEC53D 156 284 542 WORD_SCHEDULER_12_63 112 543 ROUND_16_63 A B C D E F G H 0x879D8A7A 160 284 544 WORD_SCHEDULER_12_63 116 545 ROUND_16_63 D A B C H E F G 0x0F3B14F5 164 284 546 WORD_SCHEDULER_12_63 120 547 ROUND_16_63 C D A B G H E F 0x1E7629EA 168 284 548 WORD_SCHEDULER_12_63 124 549 ROUND_16_63 B C D A F G H E 0x3CEC53D4 172 284 550 WORD_SCHEDULER_12_63 128 551 ROUND_16_63 A B C D E F G H 0x79D8A7A8 176 284 552 WORD_SCHEDULER_12_63 132 553 ROUND_16_63 D A B C H E F G 0xF3B14F50 180 284 554 WORD_SCHEDULER_12_63 136 555 ROUND_16_63 C D A B G H E F 0xE7629EA1 184 284 556 WORD_SCHEDULER_12_63 140 557 ROUND_16_63 B C D A F G H E 0xCEC53D43 188 284 558 WORD_SCHEDULER_12_63 144 559 ROUND_16_63 A B C D E F G H 0x9D8A7A87 192 284 560 WORD_SCHEDULER_12_63 148 561 ROUND_16_63 D A B C H E F G 0x3B14F50F 196 284 562 WORD_SCHEDULER_12_63 152 563 ROUND_16_63 C D A B G H E F 0x7629EA1E 200 284 564 WORD_SCHEDULER_12_63 156 565 ROUND_16_63 B C D A F G H E 0xEC53D43C 204 284 566 WORD_SCHEDULER_12_63 160 567 ROUND_16_63 A B C D E F G H 0xD8A7A879 208 284 568 WORD_SCHEDULER_12_63 164 569 ROUND_16_63 D A B C H E F G 0xB14F50F3 212 284 570 WORD_SCHEDULER_12_63 168 571 ROUND_16_63 C D A B G H E F 0x629EA1E7 216 284 572 WORD_SCHEDULER_12_63 172 573 ROUND_16_63 B C D A F G H E 0xC53D43CE 220 284 574 WORD_SCHEDULER_12_63 176 575 ROUND_16_63 A B C D E F G H 0x8A7A879D 224 284 576 WORD_SCHEDULER_12_63 180 577 ROUND_16_63 D A B C H E F G 0x14F50F3B 228 284 578 WORD_SCHEDULER_12_63 184 579 ROUND_16_63 C D A B G H E F 0x29EA1E76 232 284 580 WORD_SCHEDULER_12_63 188 581 ROUND_16_63 B C D A F G H E 0x53D43CEC 236 284 582 WORD_SCHEDULER_12_63 192 583 ROUND_16_63 A B C D E F G H 0xA7A879D8 240 284 584 WORD_SCHEDULER_12_63 196 585 ROUND_16_63 D A B C H E F G 0x4F50F3B1 244 284 586 WORD_SCHEDULER_12_63 200 587 ROUND_16_63 C D A B G H E F 0x9EA1E762 248 284 588 WORD_SCHEDULER_12_63 204 589 ROUND_16_63 B C D A F G H E 0x3D43CEC5 252 284 590 591 xorl A,(STATE) 592 xorl B,4(STATE) 593 xorl C,8(STATE) 594 xorl D,12(STATE) 595 xorl E,16(STATE) 596 xorl F,20(STATE) 597 xorl G,24(STATE) 598 xorl H,28(STATE) 599 600 leaq 64(DATA),DATA 601 decq NUM 602 jz .Lsm3_final 603 jmp .Lsm3_loop 604 605.Lsm3_final: 606 # Clear Context 607 xorq %r8,%r8 608 xorq %r9,%r9 609 xorq %r10,%r10 610 xorq %r11,%r11 611 # Restore Registers 612 movq 300(%rsp),%rbx 613 movq 8+300(%rsp),%rbp 614 movq 16+300(%rsp),%r12 615 movq 24+300(%rsp),%r13 616 movq 32+300(%rsp),%r14 617 movq 40+300(%rsp),%r15 618 addq $348,%rsp 619 620.Lsm3_ret: 621 ret 622.size SM3_CompressAsm, .-SM3_CompressAsm 623 624.section .rodata 625.align 64 626MASKS: 627# .shuffle_mask: (%rax) 628.byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12 629# left rotations 630# .r16: 16(%rax) 631.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 632# .r24: 32(%rax) 633.byte 1,2,3,0,5,6,7,4,9,10,11,8,13,14,15,12 634 635#endif 636