1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_CTR) 18 19.file "crypt_aes_ctr_x86_64.S" 20.text 21 22.set KEY, %rdi 23.set INPUT, %rsi 24.set OUTPUT, %rdx 25.set LEN, %ecx 26.set CTR_IV, %r8 27 28.set RDK, %xmm0 29.set RDK2, %xmm1 30.set KTMP, %r13 31.set ROUNDS, %eax 32.set RET, %eax 33 34.set IV0, %xmm2 35.set IV1, %xmm3 36.set IV2, %xmm4 37.set IV3, %xmm5 38.set IV4, %xmm6 39.set IV5, %xmm7 40.set IV6, %xmm8 41.set IV7, %xmm9 42.set BLK0, %xmm10 43.set BLK1, %xmm11 44.set BLK2, %xmm12 45.set BLK3, %xmm13 46.set BLK4, %xmm14 47.set BLK5, %xmm15 48 49/** 50 * Macro description: Eight IVs are encrypted. 51 * Input register: 52 * Key: Round key. 53 * block0-7: Encrypted IV. 54 * Modify the register: block0-7. 55 * Output register: 56 * block0-7: IV after a round of encryption. 57 */ 58.macro ONE_ENC key block0 block1 block2 block3 block4 block5 block6 block7 59 aesenc \key, \block0 60 aesenc \key, \block1 61 aesenc \key, \block2 62 aesenc \key, \block3 63 aesenc \key, \block4 64 aesenc \key, \block5 65 aesenc \key, \block6 66 aesenc \key, \block7 67.endm 68 69/** 70 * Macro description: Obtains a new ctr and XORs it with the round key. 71 * input register: 72 * ctr32:Initialization vector. 73 * offset:Offset. 74 * temp:32-bit CTR temporary register. 75 * key32:32-bit round key. 76 * addrOffset:push stack address offset. 77 * addr:push stack address. 78 * Modify the register: Temp. 79 */ 80.macro XOR_KEY ctr32 offset temp key32 addrOffset addr 81 leal \offset(\ctr32), \temp // XOR 32-bit ctr and key, push into the stack 82 bswapl \temp 83 xorl \key32, \temp 84 movl \temp, \addrOffset+12(\addr) 85.endm 86 87/** 88 * Macro description: Obtain the round key, encrypt the IV, obtain the next round of ctr, and XOR the round key. 89 * Input register: 90 * Key: pointer to the key. 91 * Offset: round key offset. 92 * Temp: Temporary register for the round key. 93 * Ctr32: initialization vector. 94 * Offset2: Ctr offset. 95 * Temp2: 32-bit CTR temporary register. 96 * Key32: 32-bit round key. 97 * AddrOffset: Offest of entering the stack. 98 * Addr: Address for entering the stack. 99 * Modify register: Temp temp2 IV0-7. 100 * Output register: 101 * IV0-7: IV after a round of encryption. 102 */ 103.macro ONE_ENC_XOR_KEY key offset temp ctr32 offset2 temp2 key32 addrOffset addr 104 vmovdqu \offset(\key), \temp 105 aesenc \temp, IV0 106 leal \offset2(\ctr32), \temp2 // XOR 32-bit ctr and key, push stack. 107 aesenc \temp, IV1 108 bswapl \temp2 109 aesenc \temp, IV2 110 aesenc \temp, IV3 111 xorl \key32, \temp2 112 aesenc \temp, IV4 113 aesenc \temp, IV5 114 movl \temp2, \addrOffset+12(\addr) 115 aesenc \temp, IV6 116 aesenc \temp, IV7 117 118.endm 119 120/** 121 * Macro description: Update the in and out pointer offsets and the remaining length of len. 122 * Input register: 123 * Input:pointer to the input memory. 124 * Output:pointer to the output memory. 125 * Len:remaining data length. 126 * Offset:indicates the offset. 127 * Modify the register: Input output len. 128 * Output register: 129 * Input output len 130 */ 131.macro UPDATE_DATA input output len offset 132 leaq \offset(\input), \input 133 leaq \offset(\output), \output 134 subl $\offset, \len 135.endm 136 137/** 138 * Function description:Sets the AES encrypted assembly acceleration API, ctr mode. 139 * Function prototype:int32_t CRYPT_AES_CTR_Encrypt(const CRYPT_AES_Key *ctx, const uint8_t *in, uint8_t *out, 140 * uint32_t len, uint8_t *iv); 141 * Input register: 142 * rdi:Pointer to the input key structure. 143 * rsi:Points to the 128-bit input data. 144 * rdx:Points to the 128-bit output data. 145 * rcx:Length of the data block, that is, 16 bytes. 146 * r8: 16-byte initialization vector. 147 * Change register:xmm1, xmm3, xmm4, xmm5, xmm6, xmm10, xmm11, xmm12, xmm13. 148 * Output register:rdx, r8. 149 */ 150.globl CRYPT_AES_CTR_Encrypt 151 .type CRYPT_AES_CTR_Encrypt, @function 152CRYPT_AES_CTR_Encrypt: 153 .cfi_startproc 154 pushq %r12 155 pushq %r13 156 pushq %r14 157 pushq %r15 158 mov %rsp, %r12 159 subq $128, %rsp // Declare for 128-byte stack space. 160 andq $-16, %rsp 161 162 vmovdqu (KEY), RDK 163 vpxor (CTR_IV), RDK, IV0 164 vmovdqa IV0, 0(%rsp) 165 vmovdqa IV0, 16(%rsp) 166 vmovdqa IV0, 32(%rsp) 167 vmovdqa IV0, 48(%rsp) 168 vmovdqa IV0, 64(%rsp) 169 vmovdqa IV0, 80(%rsp) 170 vmovdqa IV0, 96(%rsp) 171 vmovdqa IV0, 112(%rsp) 172 173 movl 12(CTR_IV), %r11d // Read 32-bit ctr. 174 movl 12(KEY), %r9d // Read 32-bit key. 175 bswap %r11d 176 177 mov LEN, %r14d 178 shr $4, %r14d 179 and $7, %r14d 180 cmp $1, %r14d 181 je .Lctr_enc_proc_1_blk 182 cmp $2, %r14d 183 je .Lctr_enc_proc_2_blk 184 cmp $3, %r14d 185 je .Lctr_enc_proc_3_blk 186 cmp $4, %r14d 187 je .Lctr_enc_proc_4_blk 188 cmp $5, %r14d 189 je .Lctr_enc_proc_5_blk 190 cmp $6, %r14d 191 je .Lctr_enc_proc_6_blk 192 cmp $7, %r14d 193 je .Lctr_enc_proc_7_blk 194 195.Lctr_enc_proc_8_blk: 196 cmp $0, LEN 197 je .Lctr_aesenc_finish 198 199 leal 0(%r11d), %r15d 200 leal 1(%r11d), %r10d 201 bswapl %r15d 202 bswapl %r10d 203 xorl %r9d, %r15d 204 xorl %r9d, %r10d 205 leal 2(%r11d), %r14d 206 movl %r15d, 12(%rsp) 207 bswapl %r14d 208 movl %r10d, 16+12(%rsp) 209 xorl %r9d, %r14d 210 leal 3(%r11d), %r15d 211 leal 4(%r11d), %r10d 212 bswapl %r15d 213 bswapl %r10d 214 movl %r14d, 32+12(%rsp) 215 xorl %r9d, %r15d 216 xorl %r9d, %r10d 217 movl %r15d, 48+12(%rsp) 218 leal 5(%r11d), %r14d 219 bswapl %r14d 220 movl %r10d, 64+12(%rsp) 221 xorl %r9d, %r14d 222 leal 6(%r11d), %r15d 223 leal 7(%r11d), %r10d 224 movl %r14d, 80+12(%rsp) 225 bswapl %r15d 226 bswapl %r10d 227 xorl %r9d, %r15d 228 xorl %r9d, %r10d 229 movl %r15d, 96+12(%rsp) 230 movl %r10d, 112+12(%rsp) 231 232 vmovdqa (%rsp), IV0 233 vmovdqa 16(%rsp), IV1 234 vmovdqa 32(%rsp), IV2 235 vmovdqa 48(%rsp), IV3 236 vmovdqa 64(%rsp), IV4 237 vmovdqa 80(%rsp), IV5 238 vmovdqa 96(%rsp), IV6 239 vmovdqa 112(%rsp), IV7 240.align 16 241.Lctr_aesenc_8_blks_enc_loop: 242 addl $8, %r11d // ctr+8 243 movl 240(KEY), ROUNDS 244 ONE_ENC_XOR_KEY KEY, 16, RDK2, %r11d, 0, %r10d, %r9d, 0, %rsp // Round 1 encryption 245 ONE_ENC_XOR_KEY KEY, 32, RDK2, %r11d, 1, %r10d, %r9d, 16, %rsp // Round 2 encryption 246 ONE_ENC_XOR_KEY KEY, 48, RDK2, %r11d, 2, %r10d, %r9d, 32, %rsp // Round 3 encryption 247 ONE_ENC_XOR_KEY KEY, 64, RDK2, %r11d, 3, %r10d, %r9d, 48, %rsp // Round 4 encryption 248 ONE_ENC_XOR_KEY KEY, 80, RDK2, %r11d, 4, %r10d, %r9d, 64, %rsp // Round 5 encryption 249 ONE_ENC_XOR_KEY KEY, 96, RDK2, %r11d, 5, %r10d, %r9d, 80, %rsp // Round 6 encryption 250 ONE_ENC_XOR_KEY KEY, 112, RDK2, %r11d, 6, %r10d, %r9d, 96, %rsp // Round 7 encryption 251 ONE_ENC_XOR_KEY KEY, 128, RDK2, %r11d, 7, %r10d, %r9d, 112, %rsp // Round 8 encryption 252 253 vmovdqu 144(KEY), RDK // Round 9 key Load 254 vmovdqu 160(KEY), RDK2 // Round 10 key Load 255 cmp $12, ROUNDS 256 jb .Lctr_aesenc_8_blks_enc_last 257 258 ONE_ENC RDK, IV0, IV1, IV2, IV3, IV4, IV5, IV6, IV7 // Round 9 encryption 259 vmovdqu 176(KEY), RDK // Round 11 key Load 260 ONE_ENC RDK2, IV0, IV1, IV2, IV3, IV4, IV5, IV6, IV7 // Round 10 encryption 261 vmovdqu 192(KEY), RDK2 // Round 12 key Load 262 263 je .Lctr_aesenc_8_blks_enc_last 264 265 ONE_ENC RDK, IV0, IV1, IV2, IV3, IV4, IV5, IV6, IV7 // Round 11 encryption 266 vmovdqu 208(KEY), RDK // Round 13 key Load 267 ONE_ENC RDK2, IV0, IV1, IV2, IV3, IV4, IV5, IV6, IV7 // Round 12 encryption 268 vmovdqu 224(KEY), RDK2 // Round 14 key Load 269 270.align 16 271.Lctr_aesenc_8_blks_enc_last: 272 vpxor (INPUT), RDK2, BLK0 // Last round Key ^ Plaintext. 273 vpxor 16(INPUT), RDK2, BLK1 274 vpxor 32(INPUT), RDK2, BLK2 275 vpxor 48(INPUT), RDK2, BLK3 276 277 ONE_ENC RDK, IV0, IV1, IV2, IV3, IV4, IV5, IV6, IV7 278 279 aesenclast BLK0, IV0 // Last round of encryption. 280 aesenclast BLK1, IV1 281 aesenclast BLK2, IV2 282 aesenclast BLK3, IV3 283 aesenclast RDK2, IV4 284 aesenclast RDK2, IV5 285 aesenclast RDK2, IV6 286 aesenclast RDK2, IV7 287 288 vmovdqu IV0, (OUTPUT) // The first four ciphertexts are stored in out. 289 vmovdqu IV1, 16(OUTPUT) 290 vmovdqu IV2, 32(OUTPUT) 291 vmovdqu IV3, 48(OUTPUT) 292 vpxor 64(INPUT), IV4, BLK0 // Last Round Key ^ Plaintext. 293 vpxor 80(INPUT), IV5, BLK1 294 vpxor 96(INPUT), IV6, BLK2 295 vpxor 112(INPUT), IV7, BLK3 296 297 vmovdqu BLK0, 64(OUTPUT) 298 vmovdqu BLK1, 80(OUTPUT) 299 vmovdqu BLK2, 96(OUTPUT) // The last four ciphertexts are stored in out. 300 vmovdqu BLK3, 112(OUTPUT) 301 vmovdqa (%rsp), IV0 // Reads the next round of ctr from the stack. 302 vmovdqa 16(%rsp), IV1 303 vmovdqa 32(%rsp), IV2 304 vmovdqa 48(%rsp), IV3 305 vmovdqa 64(%rsp), IV4 306 vmovdqa 80(%rsp), IV5 307 vmovdqa 96(%rsp), IV6 308 vmovdqa 112(%rsp), IV7 309 UPDATE_DATA INPUT, OUTPUT, LEN, 128 310 cmpl $0, LEN 311 jbe .Lctr_aesenc_finish 312 jmp .Lctr_aesenc_8_blks_enc_loop 313 314.Lctr_enc_proc_1_blk: 315 movl 240(KEY), ROUNDS 316 movq KEY, KTMP 317 decl ROUNDS 318.align 16 319.Laesenc_loop: 320 leaq 16(KTMP), KTMP 321 vmovdqu (KTMP), RDK 322 aesenc RDK, IV0 323 decl ROUNDS 324 jnz .Laesenc_loop // Loop the loop until the ROUNDS is 0. 325 leaq 16(KTMP), KTMP 326 vmovdqu (KTMP), RDK 327 aesenclast RDK, IV0 328 addl $1, %r11d // Update ctr32. 329 vpxor (INPUT), IV0, BLK0 330 vmovdqu BLK0, (OUTPUT) // Ciphertext stored in out. 331 UPDATE_DATA INPUT, OUTPUT, LEN, 16 332 jmp .Lctr_enc_proc_8_blk 333.Lctr_enc_proc_2_blk: 334 movl 240(KEY), ROUNDS 335 movq KEY, KTMP 336 decl ROUNDS 337 XOR_KEY %r11d, 1, %r10d, %r9d, 16, %rsp 338 vmovdqa 16(%rsp), IV1 339.align 16 340.Laesenc_2_blks_loop: 341 leaq 16(KTMP), KTMP 342 vmovdqu (KTMP), RDK 343 aesenc RDK, IV0 344 aesenc RDK, IV1 345 decl ROUNDS 346 jnz .Laesenc_2_blks_loop 347 leaq 16(KTMP), KTMP 348 vmovdqu (KTMP), RDK 349 aesenclast RDK, IV0 350 aesenclast RDK, IV1 351 352 vpxor (INPUT), IV0, BLK0 353 vpxor 16(INPUT), IV1, BLK1 354 vmovdqu BLK0, (OUTPUT) 355 vmovdqu BLK1, 16(OUTPUT) 356 addl $2, %r11d 357 UPDATE_DATA INPUT, OUTPUT, LEN, 32 358 jmp .Lctr_enc_proc_8_blk 359.Lctr_enc_proc_3_blk: 360 movl 240(KEY), ROUNDS 361 movq KEY, KTMP 362 decl ROUNDS 363 XOR_KEY %r11d, 1, %r10d, %r9d, 16, %rsp 364 XOR_KEY %r11d, 2, %r10d, %r9d, 32, %rsp 365 vmovdqa 16(%rsp), IV1 366 vmovdqa 32(%rsp), IV2 367.align 16 368.Laesenc_3_blks_loop: 369 leaq 16(KTMP), KTMP 370 vmovdqu (KTMP), RDK 371 aesenc RDK, IV0 372 aesenc RDK, IV1 373 aesenc RDK, IV2 374 decl ROUNDS 375 jnz .Laesenc_3_blks_loop 376 leaq 16(KTMP), KTMP 377 vmovdqu (KTMP), RDK 378 aesenclast RDK, IV0 379 aesenclast RDK, IV1 380 aesenclast RDK, IV2 381 382 vpxor (INPUT), IV0, BLK0 383 vpxor 16(INPUT), IV1, BLK1 384 vpxor 32(INPUT), IV2, BLK2 385 386 vmovdqu BLK0, (OUTPUT) 387 vmovdqu BLK1, 16(OUTPUT) 388 vmovdqu BLK2, 32(OUTPUT) 389 addl $3, %r11d 390 UPDATE_DATA INPUT, OUTPUT, LEN, 48 391 jmp .Lctr_enc_proc_8_blk 392.Lctr_enc_proc_4_blk: 393 movl 240(KEY), ROUNDS 394 movq KEY, KTMP 395 decl ROUNDS 396 XOR_KEY %r11d, 1, %r10d, %r9d, 16, %rsp 397 XOR_KEY %r11d, 2, %r10d, %r9d, 32, %rsp 398 XOR_KEY %r11d, 3, %r10d, %r9d, 48, %rsp 399 vmovdqa 16(%rsp), IV1 400 vmovdqa 32(%rsp), IV2 401 vmovdqa 48(%rsp), IV3 402.align 16 403.Laesenc_4_blks_loop: 404 leaq 16(KTMP), KTMP 405 vmovdqu (KTMP), RDK 406 aesenc RDK, IV0 407 aesenc RDK, IV1 408 aesenc RDK, IV2 409 aesenc RDK, IV3 410 decl ROUNDS 411 jnz .Laesenc_4_blks_loop 412 leaq 16(KTMP), KTMP 413 vmovdqu (KTMP), RDK 414 aesenclast RDK, IV0 415 aesenclast RDK, IV1 416 aesenclast RDK, IV2 417 aesenclast RDK, IV3 418 419 vpxor (INPUT), IV0, BLK0 420 vpxor 16(INPUT), IV1, BLK1 421 vpxor 32(INPUT), IV2, BLK2 422 vpxor 48(INPUT), IV3, BLK3 423 424 vmovdqu BLK0, (OUTPUT) 425 vmovdqu BLK1, 16(OUTPUT) 426 vmovdqu BLK2, 32(OUTPUT) 427 vmovdqu BLK3, 48(OUTPUT) 428 addl $4, %r11d 429 UPDATE_DATA INPUT, OUTPUT, LEN, 64 430 jmp .Lctr_enc_proc_8_blk 431 432.Lctr_enc_proc_5_blk: 433 movl 240(KEY), ROUNDS 434 movq KEY, KTMP 435 decl ROUNDS 436 XOR_KEY %r11d, 1, %r10d, %r9d, 16, %rsp 437 XOR_KEY %r11d, 2, %r10d, %r9d, 32, %rsp 438 XOR_KEY %r11d, 3, %r10d, %r9d, 48, %rsp 439 XOR_KEY %r11d, 4, %r10d, %r9d, 64, %rsp 440 vmovdqa 16(%rsp), IV1 441 vmovdqa 32(%rsp), IV2 442 vmovdqa 48(%rsp), IV3 443 vmovdqa 64(%rsp), IV4 444.align 16 445.Laesenc_5_blks_loop: 446 leaq 16(KTMP), KTMP 447 vmovdqu (KTMP), RDK 448 aesenc RDK, IV0 449 aesenc RDK, IV1 450 aesenc RDK, IV2 451 aesenc RDK, IV3 452 aesenc RDK, IV4 453 decl ROUNDS 454 jnz .Laesenc_5_blks_loop 455 leaq 16(KTMP), KTMP 456 vmovdqu (KTMP), RDK 457 aesenclast RDK, IV0 458 aesenclast RDK, IV1 459 aesenclast RDK, IV2 460 aesenclast RDK, IV3 461 aesenclast RDK, IV4 462 463 vpxor (INPUT), IV0, BLK0 464 vpxor 16(INPUT), IV1, BLK1 465 vpxor 32(INPUT), IV2, BLK2 466 vpxor 48(INPUT), IV3, BLK3 467 vpxor 64(INPUT), IV4, BLK4 468 vmovdqu BLK0, (OUTPUT) 469 vmovdqu BLK1, 16(OUTPUT) 470 vmovdqu BLK2, 32(OUTPUT) 471 vmovdqu BLK3, 48(OUTPUT) 472 vmovdqu BLK4, 64(OUTPUT) 473 addl $5, %r11d 474 UPDATE_DATA INPUT, OUTPUT, LEN, 80 475 jmp .Lctr_enc_proc_8_blk 476.Lctr_enc_proc_6_blk: 477 movl 240(KEY), ROUNDS 478 movq KEY, KTMP 479 decl ROUNDS 480 XOR_KEY %r11d, 1, %r10d, %r9d, 16, %rsp 481 XOR_KEY %r11d, 2, %r10d, %r9d, 32, %rsp 482 XOR_KEY %r11d, 3, %r10d, %r9d, 48, %rsp 483 XOR_KEY %r11d, 4, %r10d, %r9d, 64, %rsp 484 XOR_KEY %r11d, 5, %r10d, %r9d, 80, %rsp 485 vmovdqa 16(%rsp), IV1 486 vmovdqa 32(%rsp), IV2 487 vmovdqa 48(%rsp), IV3 488 vmovdqa 64(%rsp), IV4 489 vmovdqa 80(%rsp), IV5 490.align 16 491.Laesenc_6_blks_loop: 492 leaq 16(KTMP), KTMP 493 vmovdqu (KTMP), RDK 494 aesenc RDK, IV0 495 aesenc RDK, IV1 496 aesenc RDK, IV2 497 aesenc RDK, IV3 498 aesenc RDK, IV4 499 aesenc RDK, IV5 500 decl ROUNDS 501 jnz .Laesenc_6_blks_loop 502 leaq 16(KTMP), KTMP 503 vmovdqu (KTMP), RDK 504 aesenclast RDK, IV0 505 aesenclast RDK, IV1 506 aesenclast RDK, IV2 507 aesenclast RDK, IV3 508 aesenclast RDK, IV4 509 aesenclast RDK, IV5 510 511 vpxor (INPUT), IV0, BLK0 512 vpxor 16(INPUT), IV1, BLK1 513 vpxor 32(INPUT), IV2, BLK2 514 vpxor 48(INPUT), IV3, BLK3 515 vpxor 64(INPUT), IV4, BLK4 516 vpxor 80(INPUT), IV5, BLK5 517 vmovdqu BLK0, (OUTPUT) 518 vmovdqu BLK1, 16(OUTPUT) 519 vmovdqu BLK2, 32(OUTPUT) 520 vmovdqu BLK3, 48(OUTPUT) 521 vmovdqu BLK4, 64(OUTPUT) 522 vmovdqu BLK5, 80(OUTPUT) 523 addl $6, %r11d 524 UPDATE_DATA INPUT, OUTPUT, LEN, 96 525 526 jmp .Lctr_enc_proc_8_blk 527.Lctr_enc_proc_7_blk: 528 movl 240(KEY), ROUNDS 529 movq KEY, KTMP 530 decl ROUNDS 531 XOR_KEY %r11d, 1, %r10d, %r9d, 16, %rsp 532 XOR_KEY %r11d, 2, %r10d, %r9d, 32, %rsp 533 XOR_KEY %r11d, 3, %r10d, %r9d, 48, %rsp 534 XOR_KEY %r11d, 4, %r10d, %r9d, 64, %rsp 535 XOR_KEY %r11d, 5, %r10d, %r9d, 80, %rsp 536 XOR_KEY %r11d, 6, %r10d, %r9d, 96, %rsp 537 vmovdqa 16(%rsp), IV1 538 vmovdqa 32(%rsp), IV2 539 vmovdqa 48(%rsp), IV3 540 vmovdqa 64(%rsp), IV4 541 vmovdqa 80(%rsp), IV5 542 vmovdqa 96(%rsp), IV6 543 544.align 16 545.Laesenc_7_blks_loop: 546 leaq 16(KTMP), KTMP 547 vmovdqu (KTMP), RDK 548 aesenc RDK, IV0 549 aesenc RDK, IV1 550 aesenc RDK, IV2 551 aesenc RDK, IV3 552 aesenc RDK, IV4 553 aesenc RDK, IV5 554 aesenc RDK, IV6 555 decl ROUNDS 556 jnz .Laesenc_7_blks_loop 557 leaq 16(KTMP), KTMP 558 vmovdqu (KTMP), RDK 559 aesenclast RDK, IV0 560 aesenclast RDK, IV1 561 aesenclast RDK, IV2 562 aesenclast RDK, IV3 563 aesenclast RDK, IV4 564 aesenclast RDK, IV5 565 aesenclast RDK, IV6 566 vpxor (INPUT), IV0, BLK0 567 vpxor 16(INPUT), IV1, BLK1 568 vpxor 32(INPUT), IV2, BLK2 569 vpxor 48(INPUT), IV3, BLK3 570 vmovdqu BLK0, (OUTPUT) 571 vmovdqu BLK1, 16(OUTPUT) 572 vmovdqu BLK2, 32(OUTPUT) 573 vmovdqu BLK3, 48(OUTPUT) 574 vpxor 64(INPUT), IV4, BLK0 575 vpxor 80(INPUT), IV5, BLK1 576 vpxor 96(INPUT), IV6, BLK2 577 vmovdqu BLK0, 64(OUTPUT) 578 vmovdqu BLK1, 80(OUTPUT) 579 vmovdqu BLK2, 96(OUTPUT) 580 addl $7, %r11d 581 UPDATE_DATA INPUT, OUTPUT, LEN, 112 582 jmp .Lctr_enc_proc_8_blk 583 584.Lctr_aesenc_finish: 585 bswap %r11d 586 movl %r11d, 12(CTR_IV) 587 vpxor IV0, IV0, IV0 588 vpxor IV1, IV1, IV1 589 vpxor IV2, IV2, IV2 590 vpxor IV3, IV3, IV3 591 vpxor IV4, IV4, IV4 592 vpxor IV5, IV5, IV5 593 vpxor IV6, IV6, IV6 594 vpxor IV7, IV7, IV7 595 vpxor RDK, RDK, RDK 596 vmovdqa IV0, 0(%rsp) 597 vmovdqa IV0, 16(%rsp) 598 vmovdqa IV0, 32(%rsp) 599 vmovdqa IV0, 48(%rsp) 600 vmovdqa IV0, 64(%rsp) 601 vmovdqa IV0, 80(%rsp) 602 vmovdqa IV0, 96(%rsp) 603 vmovdqa IV0, 112(%rsp) 604 605 movq %r12, %rsp 606 popq %r15 607 popq %r14 608 popq %r13 609 popq %r12 610 611 movl $0, RET 612 ret 613 .cfi_endproc 614 .size CRYPT_AES_CTR_Encrypt, .-CRYPT_AES_CTR_Encrypt 615 616#endif 617