1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#ifdef HITLS_CRYPTO_SM4 18 19#include "crypt_sm4_modes_macro_x86_64.s" 20 21.file "crypt_sm4_modes_x86_64.S" 22.text 23.extern g_cpuState 24.hidden g_cpuState 25 26.set X0,%ymm0 27.set X1,%ymm1 28.set X2,%ymm2 29.set X3,%ymm3 30.set Y0,%ymm4 31.set Y1,%ymm5 32.set Y2,%ymm6 33.set Y3,%ymm7 34 35.set ADDR,%rax 36.set IN,%rdi 37.set OUT,%rsi 38.set LEN,%rdx 39.set BLOCKS,%rdx 40.set RK,%rcx 41.set IV,%r8 42.set TWEAK,%r8 43.set TWEAK_MASK,%r9 44.set ENC,%r9d 45.set HI,%r12 46.set LO,%r13 47.set HI_TMP,%r14 48.set LO_TMP,%r15 49 50.set T0,%r10d 51.set T0BL,%r10b 52.set T1,%r11d 53 54.set T0_64,%r10 55.set T1_64,%r11 56 57.set W0,%r12d 58.set W1,%r13d 59.set W2,%r14d 60.set W3,%r15d 61 62.macro LOAD_DATA 63 vmovdqu (IN),X0 64 vmovdqu 32(IN),X1 65 vmovdqu 64(IN),X2 66 vmovdqu 96(IN),X3 67 vmovdqu 128(IN),Y0 68 vmovdqu 128+32(IN),Y1 69 vmovdqu 128+64(IN),Y2 70 vmovdqu 128+96(IN),Y3 71.endm 72 73.macro XOR_DATA 74 vpxor (IN),X0,X0 75 vpxor 32(IN),X1,X1 76 vpxor 64(IN),X2,X2 77 vpxor 96(IN),X3,X3 78 vpxor 128(IN),Y0,Y0 79 vpxor 128+32(IN),Y1,Y1 80 vpxor 128+64(IN),Y2,Y2 81 vpxor 128+96(IN),Y3,Y3 82.endm 83 84.macro CHECK_GFNI re tmp 85 xorl \re, \re 86 87 movl $0x100, \tmp 88 andl g_cpuState+24(%rip), \tmp # get gfni flag 89 orl \tmp, \re 90 91 movl $0x20, \tmp 92 andl g_cpuState+20(%rip), \tmp # check avx2 flag 93 orl \tmp, \re 94 95 cmpl $0x120, \re # code7Out[EAX] & (1<<5)) | code7Out[ECX_OUT_IDX] & (1<<8)) 96.endm 97 98.macro SM4_CRYPT_GFNI_BLOCK16 99 # load affine matric 100 vpbroadcastq .PreAffinT(%rip),PreAffineTRegBLOCK16 101 vpbroadcastq .PostAffinT(%rip),PostAffineTRegBLOCK16 102 103 vmovdqa 32+4096(ADDR),TMP0 104 # vmovdqa 64+4096(ADDR),AES_MASK 105 # vmovdqa 96+4096(ADDR),AES_AND_MASK 106 107 vpshufb TMP0,X0,X0 108 vpshufb TMP0,X1,X1 109 vpshufb TMP0,X2,X2 110 vpshufb TMP0,X3,X3 111 vpshufb TMP0,Y0,Y0 112 vpshufb TMP0,Y1,Y1 113 vpshufb TMP0,Y2,Y2 114 vpshufb TMP0,Y3,Y3 115 116 # Pack SIMD Vectors 117 MATRIX_TRANSPOSE X0 X1 X2 X3 118 MATRIX_TRANSPOSE Y0 Y1 Y2 Y3 119 120 # AVX2 Rounds 121 SM4_AVX2_GFNI_2_ROUNDS 122 123 # Restore SIMD Vectors 124 MATRIX_TRANSPOSE X0 X1 X2 X3 125 MATRIX_TRANSPOSE Y0 Y1 Y2 Y3 126 127 # Reverse Transformation 128 vmovdqa 4096(ADDR),TMP0 129 vpshufb TMP0,X0,X0 130 vpshufb TMP0,X1,X1 131 vpshufb TMP0,X2,X2 132 vpshufb TMP0,X3,X3 133 vpshufb TMP0,Y0,Y0 134 vpshufb TMP0,Y1,Y1 135 vpshufb TMP0,Y2,Y2 136 vpshufb TMP0,Y3,Y3 137.endm 138 139.macro SM4_CRYPT_AESNI_BLOCK16 140 141 vmovdqa 32+4096(ADDR),TMP0 142 vmovdqa 64+4096(ADDR),AES_MASK 143 vmovdqa 96+4096(ADDR),AES_AND_MASK 144 145 vpshufb TMP0,X0,X0 146 vpshufb TMP0,X1,X1 147 vpshufb TMP0,X2,X2 148 vpshufb TMP0,X3,X3 149 vpshufb TMP0,Y0,Y0 150 vpshufb TMP0,Y1,Y1 151 vpshufb TMP0,Y2,Y2 152 vpshufb TMP0,Y3,Y3 153 154 # Pack SIMD Vectors 155 MATRIX_TRANSPOSE X0 X1 X2 X3 156 MATRIX_TRANSPOSE Y0 Y1 Y2 Y3 157 158 # AVX2 Rounds 159 SM4_AVX2_AES_2_ROUNDS 160 161 # Restore SIMD Vectors 162 MATRIX_TRANSPOSE X0 X1 X2 X3 163 MATRIX_TRANSPOSE Y0 Y1 Y2 Y3 164 165 # Reverse Transformation 166 vmovdqa 4096(ADDR),TMP0 167 vpshufb TMP0,X0,X0 168 vpshufb TMP0,X1,X1 169 vpshufb TMP0,X2,X2 170 vpshufb TMP0,X3,X3 171 vpshufb TMP0,Y0,Y0 172 vpshufb TMP0,Y1,Y1 173 vpshufb TMP0,Y2,Y2 174 vpshufb TMP0,Y3,Y3 175.endm 176 177.macro STORE_RESULTS 178 vmovdqu X0,0(OUT) 179 vmovdqu X1,32(OUT) 180 vmovdqu X2,64(OUT) 181 vmovdqu X3,96(OUT) 182 vmovdqu Y0,128(OUT) 183 vmovdqu Y1,128+32(OUT) 184 vmovdqu Y2,128+64(OUT) 185 vmovdqu Y3,128+96(OUT) 186.endm 187 188.macro CLEAR_CONTEXT 189 xorl T0,T0 190 xorl T1,T1 191 xorl W0,W0 192 xorl W1,W1 193 xorl W2,W2 194 xorl W3,W3 195.endm 196 197##### SM4-CBC ##### 198 # void SM4_CBC_Encrypt(const unsigned char *in, unsigned char *out, size_t len, const SM4_KEY *key, unsigned char *iv, const int enc) 199 # in %rdi 200 # out %rsi 201 # len %rdx 202 # rk %rcx 203 # iv %r8 204 # enc %r9d 205 .globl SM4_CBC_Encrypt 206 .type SM4_CBC_Encrypt, @function 207 .align 64 208 209SM4_CBC_Encrypt: 210 211 # Store Registers 212 subq $72,%rsp 213 movq %rbx,(%rsp) 214 movq %rbp,8(%rsp) 215 movq %r9,16(%rsp) 216 movq %r10,24(%rsp) 217 movq %r11,32(%rsp) 218 movq %r12,40(%rsp) 219 movq %r13,48(%rsp) 220 movq %r14,56(%rsp) 221 movq %r15,64(%rsp) 222 223 # Get Address 224 leaq SBOX4X_MASK(%rip),ADDR 225 226 testl ENC,ENC 227 jz .Lcbc_decrypt 228 229.Lcbc_encrypt: 230 231 cmpq $16,LEN 232 jl .Lcbc_ret 233 234 # Load Data 235 movl (IN),W0 236 movl 4(IN),W1 237 movl 8(IN),W2 238 movl 12(IN),W3 239 240 # XOR IV 241 xorl (IV),W0 242 xorl 4(IV),W1 243 xorl 8(IV),W2 244 xorl 12(IV),W3 245 246 bswap W0 247 bswap W1 248 bswap W2 249 bswap W3 250 251 # Serial Rounds 252 SM4_SERIAL_ROUNDS 253 254 # Store Results 255 bswap W0 256 bswap W1 257 bswap W2 258 bswap W3 259 260 movl W3,(OUT) 261 movl W2,4(OUT) 262 movl W1,8(OUT) 263 movl W0,12(OUT) 264 265 movl W3,(IV) 266 movl W2,4(IV) 267 movl W1,8(IV) 268 movl W0,12(IV) 269 270 leaq 16(IN),IN 271 leaq 16(OUT),OUT 272 subq $16,LEN 273 274 jmp .Lcbc_encrypt 275 276.Lcbc_decrypt: 277 278 cmpq $256,LEN 279 jl .Lcbc_dec 280 281.Lcbc_dec16: 282 283 LOAD_DATA 284 CHECK_GFNI %r9d %r10d 285 jl .Lcbc_dec_aesni 286.Lcbc_dec_gfni: 287 SM4_CRYPT_GFNI_BLOCK16 288 jmp .Lafter_cbc_dec 289.Lcbc_dec_aesni: 290 SM4_CRYPT_AESNI_BLOCK16 291.Lafter_cbc_dec: 292 293 vmovdqu (IV),TMP0x 294 vmovdqu (IN),TMP1x 295 vinserti128 $1,TMP1x,TMP0,TMP0 296 vmovdqu 240(IN),TMP2x 297 vmovdqu TMP2x,(IV) 298 299 vpxor TMP0,X0,X0 300 vpxor 16(IN),X1,X1 301 vpxor 32+16(IN),X2,X2 302 vpxor 64+16(IN),X3,X3 303 vpxor 96+16(IN),Y0,Y0 304 vpxor 128+16(IN),Y1,Y1 305 vpxor 160+16(IN),Y2,Y2 306 vpxor 192+16(IN),Y3,Y3 307 308 STORE_RESULTS 309 310 leaq 256(IN),IN 311 leaq 256(OUT),OUT 312 subq $256,LEN 313 cmpq $256,LEN 314 jl .Lcbc_dec16_ret 315 jmp .Lcbc_dec16 316 317.Lcbc_dec16_ret: 318 319 vzeroall 320 321.Lcbc_dec: 322 323 cmpq $16,LEN 324 jl .Lcbc_ret 325 326 # Load Data 327 movl (IN),W0 328 movl 4(IN),W1 329 movl 8(IN),W2 330 movl 12(IN),W3 331 332 bswap W0 333 bswap W1 334 bswap W2 335 bswap W3 336 337 # Serial Rounds 338 SM4_SERIAL_ROUNDS 339 340 # Store Result 341 bswap W0 342 bswap W1 343 bswap W2 344 bswap W3 345 346 xorl (IV),W3 347 xorl 4(IV),W2 348 xorl 8(IV),W1 349 xorl 12(IV),W0 350 351 movq (IN),%r10 352 movq %r10,(IV) 353 movq 8(IN),%r10 354 movq %r10,8(IV) 355 356 movl W3,(OUT) 357 movl W2,4(OUT) 358 movl W1,8(OUT) 359 movl W0,12(OUT) 360 361 leaq 16(IN),IN 362 leaq 16(OUT),OUT 363 subq $16,LEN 364 365 jmp .Lcbc_dec 366 367.Lcbc_ret: 368 369 CLEAR_CONTEXT 370 371 # Restore Registers 372 movq (%rsp),%rbx 373 movq 8(%rsp),%rbp 374 movq 16(%rsp),%rax 375 movq 24(%rsp),%r10 376 movq 32(%rsp),%r11 377 movq 40(%rsp),%r12 378 movq 48(%rsp),%r13 379 movq 56(%rsp),%r14 380 movq 64(%rsp),%r15 381 addq $72,%rsp 382 383 ret 384 .size SM4_CBC_Encrypt, .-SM4_CBC_Encrypt 385 386##### SM4-ECB ##### 387 # void SM4_ECB_Encrypt(const unsigned char *in, unsigned char *out, size_t len, const SM4_KEY *key) 388 # in %rdi 389 # out %rsi 390 # len %rdx 391 # key %rcx 392 .globl SM4_ECB_Encrypt 393 .type SM4_ECB_Encrypt, @function 394 .align 64 395 396SM4_ECB_Encrypt: 397 398 # Store Registers 399 subq $32,%rsp 400 movq %r12,(%rsp) 401 movq %r13,8(%rsp) 402 movq %r14,16(%rsp) 403 movq %r15,24(%rsp) 404 405 # Get Address 406 leaq SBOX4X_MASK(%rip),ADDR 407 408.Lecb_encrypt: 409 410 cmpq $256,LEN 411 jl .Lecb_enc 412 413.Lecb_enc16: 414 415 LOAD_DATA 416 417 CHECK_GFNI %r12d %r13d 418 jl .Lecb_enc_aesni 419.Lecb_enc_gfni: 420 SM4_CRYPT_GFNI_BLOCK16 421 jmp .Lafter_ecb_enc 422.Lecb_enc_aesni: 423 SM4_CRYPT_AESNI_BLOCK16 424.Lafter_ecb_enc: 425 STORE_RESULTS 426 427 leaq 256(IN),IN 428 leaq 256(OUT),OUT 429 subq $256,LEN 430 cmpq $256,LEN 431 jl .Lecb_enc16_ret 432 jmp .Lecb_enc16 433 434.Lecb_enc16_ret: 435 436 vzeroall 437 438.Lecb_enc: 439 440 cmpq $16,LEN 441 jl .Lecb_ret 442 443 # Load Data 444 movl (IN),W0 445 movl 4(IN),W1 446 movl 8(IN),W2 447 movl 12(IN),W3 448 449 bswap W0 450 bswap W1 451 bswap W2 452 bswap W3 453 454 # Serial Rounds 455 SM4_SERIAL_ROUNDS 456 457 # Store Result 458 bswap W0 459 bswap W1 460 bswap W2 461 bswap W3 462 463 movl W3,(OUT) 464 movl W2,4(OUT) 465 movl W1,8(OUT) 466 movl W0,12(OUT) 467 468 leaq 16(IN),IN 469 leaq 16(OUT),OUT 470 subq $16,LEN 471 472 jmp .Lecb_enc 473 474.Lecb_ret: 475 476 CLEAR_CONTEXT 477 478 # Restore Registers 479 movq (%rsp),%r12 480 movq 8(%rsp),%r13 481 movq 16(%rsp),%r14 482 movq 24(%rsp),%r15 483 addq $32,%rsp 484 485 ret 486 .size SM4_ECB_Encrypt, .-SM4_ECB_Encrypt 487 488##### SM4-CFB ENC ##### 489 # void SM4_CFB128_Encrypt(const unsigned char *in, unsigned char *out, size_t len, const SM4_KEY *key, unsigned char *iv, int *num) 490 # in %rdi 491 # out %rsi 492 # len %rdx 493 # rk %rcx 494 # iv %r8 495 # num %r9d 496 .globl SM4_CFB128_Encrypt 497 .type SM4_CFB128_Encrypt, @function 498 .align 64 499 500SM4_CFB128_Encrypt: 501 502 # Store Registers 503 subq $72,%rsp 504 movq %rbx,(%rsp) 505 movq %rbp,8(%rsp) 506 movq %r9,16(%rsp) 507 movq %r10,24(%rsp) 508 movq %r11,32(%rsp) 509 movq %r12,40(%rsp) 510 movq %r13,48(%rsp) 511 movq %r14,56(%rsp) 512 movq %r15,64(%rsp) 513 514 # Load Num 515 movl (%r9),%r9d 516 cmpl $0,%r9d 517 je .Lcfb128_enc_update 518 519.Lcfb128_enc_init: 520 521 movb 0(IV,%r9,1),%al 522 xorb (IN),%al 523 movb %al,(OUT) 524 movb %al,0(IV,%r9,1) 525 526 leaq 1(IN),IN 527 leaq 1(OUT),OUT 528 529 incl %r9d 530 decq LEN 531 cmpl $16,%r9d 532 je .Lcfb128_enc_update 533 cmpq $0,LEN 534 je .Lcfb128_enc_ret 535 536 jmp .Lcfb128_enc_init 537 538.Lcfb128_enc_update: 539 540 movl $0,%r9d 541 542 # Get Address 543 leaq SBOX4X_MASK(%rip),ADDR 544 545.Lcfb128_enc_loop: 546 547 cmpq $0,LEN 548 je .Lcfb128_enc_ret 549 550 movl $0,%r9d 551 552 # Load IV 553 movl (IV),W0 554 movl 4(IV),W1 555 movl 8(IV),W2 556 movl 12(IV),W3 557 558 bswap W0 559 bswap W1 560 bswap W2 561 bswap W3 562 563 # Serial Rounds 564 SM4_SERIAL_ROUNDS 565 566 # Store Results 567 bswap W0 568 bswap W1 569 bswap W2 570 bswap W3 571 572 movl W3,(IV) 573 movl W2,4(IV) 574 movl W1,8(IV) 575 movl W0,12(IV) 576 577 cmpq $16,LEN 578 jl .Lcfb128_enc_final 579 580 xorl (IN),W3 581 xorl 4(IN),W2 582 xorl 8(IN),W1 583 xorl 12(IN),W0 584 585 movl W3,(OUT) 586 movl W2,4(OUT) 587 movl W1,8(OUT) 588 movl W0,12(OUT) 589 590 movl W3,(IV) 591 movl W2,4(IV) 592 movl W1,8(IV) 593 movl W0,12(IV) 594 595 leaq 16(IN),IN 596 leaq 16(OUT),OUT 597 subq $16,LEN 598 599 jmp .Lcfb128_enc_loop 600 601.Lcfb128_enc_final: 602 603 movb 0(IV,%r9,1),%al 604 xorb (IN),%al 605 movb %al,(OUT) 606 movb %al,0(IV,%r9,1) 607 608 leaq 1(IN),IN 609 leaq 1(OUT),OUT 610 611 incl %r9d 612 decq LEN 613 jnz .Lcfb128_enc_final 614 615.Lcfb128_enc_ret: 616 617 CLEAR_CONTEXT 618 619 # Restore Registers 620 movq (%rsp),%rbx 621 movq 8(%rsp),%rbp 622 movq 16(%rsp),%rax 623 movq 24(%rsp),%r10 624 movq 32(%rsp),%r11 625 movq 40(%rsp),%r12 626 movq 48(%rsp),%r13 627 movq 56(%rsp),%r14 628 movq 64(%rsp),%r15 629 addq $72,%rsp 630 631 # Store Num 632 movl %r9d,(%rax) 633 634 ret 635 .size SM4_CFB128_Encrypt, .-SM4_CFB128_Encrypt 636 637##### SM4-CFB DEC ##### 638 # void SM4_CFB128_Decrypt(const unsigned char *in, unsigned char *out, size_t len, const SM4_KEY *key, unsigned char *iv, int *num) 639 # in %rdi 640 # out %rsi 641 # len %rdx 642 # rk %rcx 643 # iv %r8 644 # num %r9d 645 .globl SM4_CFB128_Decrypt 646 .type SM4_CFB128_Decrypt, @function 647 .align 64 648 649SM4_CFB128_Decrypt: 650 651 # Store Registers 652 subq $72,%rsp 653 movq %rbx,(%rsp) 654 movq %rbp,8(%rsp) 655 movq %r9,16(%rsp) 656 movq %r10,24(%rsp) 657 movq %r11,32(%rsp) 658 movq %r12,40(%rsp) 659 movq %r13,48(%rsp) 660 movq %r14,56(%rsp) 661 movq %r15,64(%rsp) 662 663 # Load Num 664 movl (%r9),%r9d 665 cmpl $0,%r9d 666 je .Lcfb128_dec_update 667 668.Lcfb128_dec_init: 669 670 movb 0(IV,%r9,1),%al 671 movb (IN),%bl 672 xorb %bl,%al 673 movb %al,(OUT) 674 movb %bl,0(IV,%r9,1) 675 676 leaq 1(IN),IN 677 leaq 1(OUT),OUT 678 679 incl %r9d 680 decq LEN 681 cmpl $16,%r9d 682 je .Lcfb128_dec_update 683 cmpq $0,LEN 684 je .Lcfb128_dec_ret 685 686 jmp .Lcfb128_dec_init 687 688.Lcfb128_dec_update: 689 690 # Get Address 691 leaq SBOX4X_MASK(%rip),ADDR 692 693 movl $0,%r9d 694 695 cmpq $256,LEN 696 jl .Lcfb128_dec 697 698.Lcfb128_dec16: 699 700 vmovdqu (IV),TMP0x 701 vmovdqu (IN),TMP1x 702 vinserti128 $1,TMP1x,TMP0,TMP0 703 vmovdqu 240(IN),TMP2x 704 vmovdqu TMP2x,(IV) 705 706 vmovdqu TMP0,X0 707 vmovdqu 16(IN),X1 708 vmovdqu 32+16(IN),X2 709 vmovdqu 64+16(IN),X3 710 vmovdqu 96+16(IN),Y0 711 vmovdqu 128+16(IN),Y1 712 vmovdqu 160+16(IN),Y2 713 vmovdqu 192+16(IN),Y3 714 715 CHECK_GFNI %r10d %r11d 716 jl .Lcfb128_dec_aesni 717.Lcfb128_dec_gfni: 718 SM4_CRYPT_GFNI_BLOCK16 719 jmp .Lafter_cfb128_dec 720.Lcfb128_dec_aesni: 721 SM4_CRYPT_AESNI_BLOCK16 722.Lafter_cfb128_dec: 723 XOR_DATA 724 STORE_RESULTS 725 726 leaq 256(IN),IN 727 leaq 256(OUT),OUT 728 subq $256,LEN 729 cmpq $256,LEN 730 jl .Lcfb128_dec16_ret 731 jmp .Lcfb128_dec16 732 733.Lcfb128_dec16_ret: 734 735 vzeroall 736 737.Lcfb128_dec: 738 739 cmpq $0,LEN 740 je .Lcfb128_dec_ret 741 742.Lcfb128_dec1: 743 744 # Load IV 745 movl (IV),W0 746 movl 4(IV),W1 747 movl 8(IV),W2 748 movl 12(IV),W3 749 750 bswap W0 751 bswap W1 752 bswap W2 753 bswap W3 754 755 # Serial Rounds 756 SM4_SERIAL_ROUNDS 757 758 # Store Results 759 bswap W0 760 bswap W1 761 bswap W2 762 bswap W3 763 764 movl W3,(IV) 765 movl W2,4(IV) 766 movl W1,8(IV) 767 movl W0,12(IV) 768 769 cmpq $16,LEN 770 jl .Lcfb128_dec_final 771 772 movq (IN),%rbx 773 movq %rbx,(IV) 774 movq 8(IN),%rbx 775 movq %rbx,8(IV) 776 xorq %rbx,%rbx 777 778 xorl (IN),W3 779 xorl 4(IN),W2 780 xorl 8(IN),W1 781 xorl 12(IN),W0 782 783 movl W3,(OUT) 784 movl W2,4(OUT) 785 movl W1,8(OUT) 786 movl W0,12(OUT) 787 788 leaq 16(IN),IN 789 leaq 16(OUT),OUT 790 subq $16,LEN 791 cmpq $0,LEN 792 je .Lcfb128_dec_ret 793 jmp .Lcfb128_dec1 794 795.Lcfb128_dec_final: 796 797 movb 0(IV,%r9,1),%al 798 movb (IN),%bl 799 xorb %bl,%al 800 movb %al,(OUT) 801 movb %bl,0(IV,%r9,1) 802 803 leaq 1(IN),IN 804 leaq 1(OUT),OUT 805 806 incl %r9d 807 decq LEN 808 jnz .Lcfb128_dec_final 809 810.Lcfb128_dec_ret: 811 812 CLEAR_CONTEXT 813 814 # Restore Registers 815 movq (%rsp),%rbx 816 movq 8(%rsp),%rbp 817 movq 16(%rsp),%rax 818 movq 24(%rsp),%r10 819 movq 32(%rsp),%r11 820 movq 40(%rsp),%r12 821 movq 48(%rsp),%r13 822 movq 56(%rsp),%r14 823 movq 64(%rsp),%r15 824 addq $72,%rsp 825 826 # Store Num 827 movl %r9d,(%rax) 828 829 ret 830 .size SM4_CFB128_Decrypt, .-SM4_CFB128_Decrypt 831 832##### SM4-OFB ##### 833 # void SM4_OFB_Encrypt(const unsigned char *in, unsigned char *out, size_t len, const SM4_KEY *key, unsigned char *iv, int *num) 834 # in %rdi 835 # out %rsi 836 # len %rdx 837 # rk %rcx 838 # iv %r8 839 # num %r9d 840 .globl SM4_OFB_Encrypt 841 .type SM4_OFB_Encrypt, @function 842 .align 64 843 844SM4_OFB_Encrypt: 845 846 # Store Registers 847 subq $72,%rsp 848 movq %rbx,(%rsp) 849 movq %rbp,8(%rsp) 850 movq %r9,16(%rsp) 851 movq %r10,24(%rsp) 852 movq %r11,32(%rsp) 853 movq %r12,40(%rsp) 854 movq %r13,48(%rsp) 855 movq %r14,56(%rsp) 856 movq %r15,64(%rsp) 857 858 # Load Num 859 movl (%r9),%r9d 860 cmpl $0,%r9d 861 jz .Lofb128_enc_update 862 863.Lofb128_enc_init: 864 865 movb 0(IV,%r9,1),%al 866 xorb (IN),%al 867 movb %al,(OUT) 868 869 leaq 1(IN),IN 870 leaq 1(OUT),OUT 871 872 incl %r9d 873 decq LEN 874 cmpl $16,%r9d 875 je .Lofb128_enc_update 876 cmpq $0,LEN 877 je .Lofb128_enc_ret 878 879 jmp .Lofb128_enc_init 880 881.Lofb128_enc_update: 882 883 movl $0,%r9d 884 885 # Get Address 886 leaq SBOX4X_MASK(%rip),ADDR 887 888.Lofb128_enc_loop: 889 890 cmpq $0,LEN 891 je .Lofb128_enc_ret 892 893 # Load IV 894 movl (IV),W0 895 movl 4(IV),W1 896 movl 8(IV),W2 897 movl 12(IV),W3 898 899 bswap W0 900 bswap W1 901 bswap W2 902 bswap W3 903 904 # Serial Rounds 905 SM4_SERIAL_ROUNDS 906 907 # Store Results 908 bswap W0 909 bswap W1 910 bswap W2 911 bswap W3 912 913 movl W3,(IV) 914 movl W2,4(IV) 915 movl W1,8(IV) 916 movl W0,12(IV) 917 918 cmpq $16,LEN 919 jl .Lofb128_enc_final 920 921 xorl (IN),W3 922 xorl 4(IN),W2 923 xorl 8(IN),W1 924 xorl 12(IN),W0 925 926 movl W3,(OUT) 927 movl W2,4(OUT) 928 movl W1,8(OUT) 929 movl W0,12(OUT) 930 931 leaq 16(IN),IN 932 leaq 16(OUT),OUT 933 subq $16,LEN 934 935 jmp .Lofb128_enc_loop 936 937.Lofb128_enc_final: 938 939 movb 0(IV,%r9,1),%al 940 xorb (IN),%al 941 movb %al,(OUT) 942 943 leaq 1(IN),IN 944 leaq 1(OUT),OUT 945 946 incl %r9d 947 decq LEN 948 jnz .Lofb128_enc_final 949 950.Lofb128_enc_ret: 951 952 CLEAR_CONTEXT 953 954 # Restore Registers 955 movq (%rsp),%rbx 956 movq 8(%rsp),%rbp 957 movq 16(%rsp),%rax 958 movq 24(%rsp),%r10 959 movq 32(%rsp),%r11 960 movq 40(%rsp),%r12 961 movq 48(%rsp),%r13 962 movq 56(%rsp),%r14 963 movq 64(%rsp),%r15 964 addq $72,%rsp 965 966 # Store Num 967 movl %r9d,(%rax) 968 969 ret 970 .size SM4_OFB_Encrypt, .-SM4_OFB_Encrypt 971 972##### SM4-CTR32 ##### 973# NOTE: the IV/counter CTR mode is big-endian. 974.align 64 975.Lmovbe12: 976.byte 0,1,2,3,4,5,6,7,8,9,10,11,15,14,13,12,0,1,2,3,4,5,6,7,8,9,10,11,15,14,13,12 977.Lone: 978.long 0,0,0,1 979 980.macro INCREMENT_COUNTER 981 movbe 12(IV),%ebx 982 incl %ebx 983 movbe %ebx,12(IV) 984.endm 985 986.macro LOAD_ECOUNT_BUF SINK 987 vpaddd TMP1x,TMP2x,TMP3x 988 vpaddd TMP1x,TMP3x,TMP4x 989 vinserti128 $1,TMP3x,TMP2,TMP2 990 vpshufb TMP0,TMP2,TMP2 991 vmovdqa TMP2,\SINK 992 vmovdqa TMP4x,TMP2x 993.endm 994 995.macro LOAD_ECOUNT_BUF_ALL 996 vmovdqa .Lmovbe12(%rip),TMP0 997 vmovdqa .Lone(%rip),TMP1x 998 vmovdqu (IV),TMP2x 999 vpshufb TMP0x,TMP2x,TMP2x 1000 LOAD_ECOUNT_BUF X0 1001 LOAD_ECOUNT_BUF X1 1002 LOAD_ECOUNT_BUF X2 1003 LOAD_ECOUNT_BUF X3 1004 LOAD_ECOUNT_BUF Y0 1005 LOAD_ECOUNT_BUF Y1 1006 LOAD_ECOUNT_BUF Y2 1007 LOAD_ECOUNT_BUF Y3 1008 vpshufb TMP0x,TMP2x,TMP2x 1009 vmovdqu TMP2x,(IV) 1010.endm 1011 1012 # void SM4_CTR_EncryptBlocks(const unsigned char *in, unsigned char *out, size_t blocks, const SM4_KEY *key, const unsigned char *iv) 1013 # in %rdi 1014 # out %rsi 1015 # blocks %rdx 1016 # rk %rcx 1017 # iv %r8 1018 .globl SM4_CTR_EncryptBlocks 1019 .type SM4_CTR_EncryptBlocks, @function 1020 .align 64 1021 1022SM4_CTR_EncryptBlocks: 1023 1024 # Get Address 1025 leaq SBOX4X_MASK(%rip),ADDR 1026 1027 # Store Registers 1028 subq $88,%rsp 1029 movq %rbx,(%rsp) 1030 movq %rbp,8(%rsp) 1031 movq %r8,16(%rsp) 1032 movq %r9,24(%rsp) 1033 movq %r10,32(%rsp) 1034 movq %r11,40(%rsp) 1035 movq %r12,48(%rsp) 1036 movq %r13,56(%rsp) 1037 movq %r14,64(%rsp) 1038 movq %r15,72(%rsp) 1039 movq %rdx,80(%rsp) 1040 1041 cmpq $16,BLOCKS 1042 jl .Lctr32_enc 1043 1044.Lctr32_enc16: 1045 1046 LOAD_ECOUNT_BUF_ALL 1047 CHECK_GFNI %r9d %r10d 1048 jl .Lctr32_enc_aesni 1049.Lctr32_enc_gfni: 1050 SM4_CRYPT_GFNI_BLOCK16 1051 jmp .Lafter_ctr32_enc 1052.Lctr32_enc_aesni: 1053 SM4_CRYPT_AESNI_BLOCK16 1054.Lafter_ctr32_enc: 1055 1056 XOR_DATA 1057 STORE_RESULTS 1058 1059 leaq 256(IN),IN 1060 leaq 256(OUT),OUT 1061 subq $16,BLOCKS 1062 cmpq $16,BLOCKS 1063 jl .Lctr32_enc16_ret 1064 jmp .Lctr32_enc16 1065 1066.Lctr32_enc16_ret: 1067 1068 vzeroall 1069 1070.Lctr32_enc: 1071 1072 cmpq $0,BLOCKS 1073 je .Lctr32_ret 1074 1075 # Load IV 1076 movl (IV),W0 1077 movl 4(IV),W1 1078 movl 8(IV),W2 1079 movl 12(IV),W3 1080 1081 bswap W0 1082 bswap W1 1083 bswap W2 1084 bswap W3 1085 1086 # Serial Rounds 1087 SM4_SERIAL_ROUNDS 1088 1089 # Store Results 1090 bswap W0 1091 bswap W1 1092 bswap W2 1093 bswap W3 1094 1095 xorl (IN),W3 1096 xorl 4(IN),W2 1097 xorl 8(IN),W1 1098 xorl 12(IN),W0 1099 1100 movl W3,(OUT) 1101 movl W2,4(OUT) 1102 movl W1,8(OUT) 1103 movl W0,12(OUT) 1104 1105 leaq 16(IN),IN 1106 leaq 16(OUT),OUT 1107 decq BLOCKS 1108 1109 INCREMENT_COUNTER 1110 1111 jmp .Lctr32_enc 1112 1113.Lctr32_ret: 1114 1115 CLEAR_CONTEXT 1116 1117 # Restore Registers 1118 movq (%rsp),%rbx 1119 movq 8(%rsp),%rbp 1120 movq 16(%rsp),%r8 1121 movq 24(%rsp),%r9 1122 movq 32(%rsp),%r10 1123 movq 40(%rsp),%r11 1124 movq 48(%rsp),%r12 1125 movq 56(%rsp),%r13 1126 movq 64(%rsp),%r14 1127 movq 72(%rsp),%r15 1128 movq 80(%rsp),%rdx 1129 addq $88,%rsp 1130 1131 ret 1132 .size SM4_CTR_EncryptBlocks, .-SM4_CTR_EncryptBlocks 1133 1134##### SM4-XTS ##### 1135 1136.align 16 1137.Lxts_tweak_mask: 1138.long 0,0xe1000000 1139 1140.macro GALOIS_FIELD_MUL Idx 1141 xorq LO_TMP,LO_TMP 1142 1143 testq $1,LO 1144 cmovnzq TWEAK_MASK,LO_TMP 1145 shrd $1,HI,LO 1146 shrq $1,HI 1147 xorq LO_TMP,HI 1148 1149 movbe HI,\Idx(TWEAK) 1150 movbe LO,\Idx+8(TWEAK) 1151.endm 1152 1153.macro GALOIS_FIELD_MUL_16_INNER 1154 GALOIS_FIELD_MUL 16 1155 # T2:T1->T2 1156 GALOIS_FIELD_MUL 32 1157 # T3:T2->T3 1158 GALOIS_FIELD_MUL 48 1159 # T4:T3->T4 1160 GALOIS_FIELD_MUL 64 1161 # T5:T4->T5 1162 GALOIS_FIELD_MUL 80 1163 # T6:T5->T6 1164 GALOIS_FIELD_MUL 96 1165 # T7:T6->T7 1166 GALOIS_FIELD_MUL 112 1167 # T8:T7->T8 1168 GALOIS_FIELD_MUL 128 1169 # T9:T8->T9 1170 GALOIS_FIELD_MUL 144 1171 # T10:T9->T10 1172 GALOIS_FIELD_MUL 160 1173 # T11:T10->T11 1174 GALOIS_FIELD_MUL 176 1175 # T12:T11->T12 1176 GALOIS_FIELD_MUL 192 1177 # T13:T12->T13 1178 GALOIS_FIELD_MUL 208 1179 # T14:T13->T14 1180 GALOIS_FIELD_MUL 224 1181 # T15:T14->T15 1182 GALOIS_FIELD_MUL 240 1183.endm 1184 1185.macro XOR_TWEAK 1186 vpxor (TWEAK),X0,X0 1187 vpxor 32(TWEAK),X1,X1 1188 vpxor 64(TWEAK),X2,X2 1189 vpxor 96(TWEAK),X3,X3 1190 vpxor 128(TWEAK),Y0,Y0 1191 vpxor 128+32(TWEAK),Y1,Y1 1192 vpxor 128+64(TWEAK),Y2,Y2 1193 vpxor 128+96(TWEAK),Y3,Y3 1194.endm 1195 1196.macro SM4_XTS_16_EN_INNER 1197 LOAD_DATA 1198 XOR_TWEAK 1199 CHECK_GFNI %r15d %r14d 1200 jl .Lxts_enc_aesni 1201.Lxts_enc_gfni: 1202 SM4_CRYPT_GFNI_BLOCK16 1203 jmp .Lafter_xts_enc 1204.Lxts_enc_aesni: 1205 SM4_CRYPT_AESNI_BLOCK16 1206.Lafter_xts_enc: 1207 XOR_TWEAK 1208 STORE_RESULTS 1209.endm 1210 1211 # void SM4_XTS_Encrypt_Blocks(const unsigned char *in, unsigned char *out, size_t len, const SM4_KEY *key, unsigned char *t) 1212 # in %rdi 1213 # out %rsi 1214 # len %rdx 1215 # key %rcx 1216 # t %r8 1217 .globl SM4_XTS_Encrypt_Blocks 1218 .type SM4_XTS_Encrypt_Blocks, @function 1219 .align 64 1220 1221SM4_XTS_Encrypt_Blocks: 1222 1223 cmpq $256,LEN 1224 jl .Lxts_ret 1225 1226 # Store Registers 1227 subq $56,%rsp 1228 movq %r9,(%rsp) 1229 movq %r10,8(%rsp) 1230 movq %r11,16(%rsp) 1231 movq %r12,24(%rsp) 1232 movq %r13,32(%rsp) 1233 movq %r14,40(%rsp) 1234 movq %r15,48(%rsp) 1235 1236 # Get Address 1237 leaq SBOX4X_MASK(%rip),ADDR 1238 1239 # Load tweak mask 1240 movq .Lxts_tweak_mask(%rip),TWEAK_MASK 1241 1242 # T0: Initial 1243 movbe (TWEAK),HI 1244 movbe 8(TWEAK),LO 1245 1246.Lxts_update: 1247 1248 GALOIS_FIELD_MUL_16_INNER 1249 SM4_XTS_16_EN_INNER 1250 1251 leaq 256(IN),IN 1252 leaq 256(OUT),OUT 1253 subq $256,LEN 1254 cmpq $256,LEN 1255 jl .Lxts_final 1256 1257 # T15: Initial 1258 movbe 240(TWEAK),HI 1259 movbe 248(TWEAK),LO 1260 # T0:T15->T0 1261 GALOIS_FIELD_MUL 0 1262 1263 jmp .Lxts_update 1264 1265.Lxts_final: 1266 1267 # Clear Context 1268 vzeroall 1269 1270 # Restore Registers 1271 movq (%rsp),%r9 1272 movq 8(%rsp),%r10 1273 movq 16(%rsp),%r11 1274 movq 24(%rsp),%r12 1275 movq 32(%rsp),%r13 1276 movq 40(%rsp),%r14 1277 movq 48(%rsp),%r15 1278 addq $56,%rsp 1279 1280.Lxts_ret: 1281 1282 ret 1283 .size SM4_XTS_Encrypt_Blocks, .-SM4_XTS_Encrypt_Blocks 1284 1285#endif 1286