1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_CBC) 18 19#include "crypt_aes_macro_x86_64.s" 20 21.file "crypt_aes_cbc_x86_64.S" 22.text 23 24.set ARG1, %rdi 25.set ARG2, %rsi 26.set ARG3, %rdx 27.set ARG4, %ecx 28.set ARG5, %r8 29.set ARG6, %r9 30 31.set RDK, %xmm3 32.set KEY, %rdi 33.set KTMP, %r9 34.set ROUNDS, %eax 35.set RET, %eax 36 37.set BLK0, %xmm1 38.set BLK1, %xmm4 39.set BLK2, %xmm5 40.set BLK3, %xmm6 41.set BLK4, %xmm10 42.set BLK5, %xmm11 43.set BLK6, %xmm12 44.set BLK7, %xmm13 45.set IV0, %xmm0 46.set IV1, %xmm7 47.set IV2, %xmm8 48.set IV3, %xmm9 49 50.set KEY1, %xmm4 51.set KEY2, %xmm5 52.set KEY3, %xmm6 53.set KEY4, %xmm10 54.set KEY5, %xmm11 55.set KEY6, %xmm12 56.set KEY7, %xmm13 57.set KEY8, %xmm14 58.set KEY9, %xmm15 59.set KEY10, %xmm2 60.set KEY11, %xmm7 61.set KEY12, %xmm8 62.set KEY13, %xmm9 63.set KEYTEMP, %xmm3 64 65/** 66 * Function description:AES encrypted assembly acceleration API in CBC mode. 67 * Function prototype:int32_t CRYPT_AES_CBC_Encrypt(const CRYPT_AES_Key *ctx, 68 * const uint8_t *in, 69 * uint8_t *out, 70 * uint32_t len, 71 * uint8_t *iv); 72 * Input register: 73 * rdi:pointer to the input key structure 74 * rsi:points to the input data address 75 * rdx:points to the output data address 76 * rcx:Length of the input data, which must be a multiple of 16 77 * r8: Points to the CBC mode mask address 78 * Change register:xmm0-xmm15 79 * Output register:eax 80 * Function/Macro Call: None 81 */ 82 .globl CRYPT_AES_CBC_Encrypt 83 .type CRYPT_AES_CBC_Encrypt, @function 84CRYPT_AES_CBC_Encrypt: 85 .cfi_startproc 86 .align 16 87 cmpl $16, ARG4 88 jb .Laescbcend_end 89 movl 240(KEY), ROUNDS 90 vmovdqu (ARG5), IV0 91 vmovdqu (KEY), KEY1 92 vmovdqu 16(KEY), KEY2 93 vmovdqu 32(KEY), KEY3 94 vmovdqu 48(KEY), KEY4 95 vmovdqu 64(KEY), KEY5 96 vmovdqu 80(KEY), KEY6 97 vmovdqu 96(KEY), KEY7 98 vmovdqu 112(KEY), KEY8 99 vmovdqu 128(KEY), KEY9 100 vmovdqu 144(KEY), KEY10 101 vmovdqu 160(KEY), KEY11 102 cmpl $12, ROUNDS 103 jb .Laes_128_cbc_start 104 je .Laes_192_cbc_start 105.align 16 106.Laes_256_cbc_start: 107 vmovdqu 176(KEY), KEY12 108 vmovdqu 192(KEY), KEY13 109.Laes_256_cbc_loop: 110 vpxor (ARG2), IV0, BLK0 111 vmovdqu 208(KEY), KEYTEMP 112 vpxor BLK0, KEY1, BLK0 113 aesenc KEY2, BLK0 114 aesenc KEY3, BLK0 115 aesenc KEY4, BLK0 116 aesenc KEY5, BLK0 117 aesenc KEY6, BLK0 118 aesenc KEY7, BLK0 119 aesenc KEY8, BLK0 120 aesenc KEY9, BLK0 121 aesenc KEY10, BLK0 122 aesenc KEY11, BLK0 123 aesenc KEY12, BLK0 124 aesenc KEY13, BLK0 125 aesenc KEYTEMP, BLK0 126 vmovdqu 224(KEY), KEYTEMP 127 aesenclast KEYTEMP, BLK0 128 leaq 16(ARG2), ARG2 129 vmovdqu BLK0, (ARG3) 130 movdqa BLK0, IV0 131 leaq 16(ARG3), ARG3 132 subl $16, ARG4 133 cmpl $16, ARG4 134 jnb .Laes_256_cbc_loop // Special value processing 135 vpxor KEY12, KEY12, KEY12 136 vpxor KEY13, KEY13, KEY13 137 vpxor KEYTEMP, KEYTEMP, KEYTEMP 138 jmp .Laescbcenc_finish 139 140.align 16 141.Laes_192_cbc_start: 142 vmovdqu 176(KEY), KEY12 143 vmovdqu 192(KEY), KEY13 144.Laes_192_cbc_loop: 145 vpxor (ARG2), IV0, BLK0 146 vpxor BLK0, KEY1, BLK0 147 aesenc KEY2, BLK0 148 aesenc KEY3, BLK0 149 aesenc KEY4, BLK0 150 aesenc KEY5, BLK0 151 aesenc KEY6, BLK0 152 aesenc KEY7, BLK0 153 aesenc KEY8, BLK0 154 aesenc KEY9, BLK0 155 aesenc KEY10, BLK0 156 aesenc KEY11, BLK0 157 aesenc KEY12, BLK0 158 aesenclast KEY13, BLK0 159 leaq 16(ARG2), ARG2 160 vmovdqu BLK0, (ARG3) 161 movdqa BLK0, IV0 162 leaq 16(ARG3), ARG3 163 subl $16 , ARG4 164 jnz .Laes_192_cbc_loop 165 vpxor KEY12, KEY12, KEY12 166 vpxor KEY13, KEY13, KEY13 167 jmp .Laescbcenc_finish 168 169.align 16 170.Laes_128_cbc_start: 171 vpxor (ARG2), IV0, BLK0 172 vpxor BLK0, KEY1, BLK0 173 aesenc KEY2, BLK0 174 aesenc KEY3, BLK0 175 aesenc KEY4, BLK0 176 aesenc KEY5, BLK0 177 aesenc KEY6, BLK0 178 aesenc KEY7, BLK0 179 aesenc KEY8, BLK0 180 aesenc KEY9, BLK0 181 aesenc KEY10, BLK0 182 aesenclast KEY11, BLK0 183 leaq 16(ARG2), ARG2 184 vmovdqu BLK0, (ARG3) 185 movdqa BLK0, IV0 186 leaq 16(ARG3), ARG3 187 subl $16, ARG4 188 jnz .Laes_128_cbc_start 189 jmp .Laescbcenc_finish 190 191.Laescbcenc_finish: 192 vmovdqu BLK0,(ARG5) 193 vpxor KEY1, KEY1, KEY1 194 vpxor KEY2, KEY2, KEY2 195 vpxor KEY3, KEY3, KEY3 196 vpxor KEY4, KEY4, KEY4 197 vpxor KEY5, KEY5, KEY5 198 vpxor KEY6, KEY6, KEY6 199 vpxor KEY7, KEY7, KEY7 200 vpxor KEY8, KEY8, KEY8 201 vpxor KEY9, KEY9, KEY9 202 vpxor KEY10, KEY10, KEY10 203 vpxor KEY11, KEY11, KEY11 204.Laescbcend_end: 205 movl $0, RET 206 ret 207 .cfi_endproc 208 .size CRYPT_AES_CBC_Encrypt, .-CRYPT_AES_CBC_Encrypt 209 210/** 211 * Function description: Sets the AES decryption and assembly accelerated implementation interface in CBC mode 212 * Function prototype:int32_t CRYPT_AES_CBC_Decrypt(const CRYPT_AES_Key *ctx, 213 * const uint8_t *in, 214 * uint8_t *out, 215 * uint32_t len, 216 * uint8_t *iv); 217 * Input register: 218 * rdi:pointer to the input key structure 219 * rsi:points to the input data address. 220 * rdx:points to the output data address. 221 * rcx:Length of the input data, which must be a multiple of 16 222 * r8: Points to the CBC mode mask address 223 * Change register:xmm0-xmm13 224 * Output register:eax 225 * Function/Macro Call: None 226 */ 227 .globl CRYPT_AES_CBC_Decrypt 228 .type CRYPT_AES_CBC_Decrypt, @function 229CRYPT_AES_CBC_Decrypt: 230 .cfi_startproc 231.align 16 232 vmovdqu (ARG5), IV0 233.Laes_cbc_dec_start: 234 cmpl $64, ARG4 235 jae .Labove_equal_4_blks 236 cmpl $32, ARG4 237 jae .Labove_equal_2_blks 238 cmpl $0, ARG4 239 je .Laes_cbc_dec_finish 240 jmp .Lproc_1_blk 241 242.Labove_equal_2_blks: 243 cmpl $48, ARG4 244 jb .Lproc_2_blks 245 jmp .Lproc_3_blks 246 247.Labove_equal_4_blks: 248 cmpl $96, ARG4 249 jae .Labove_equal_6_blks 250 cmpl $80, ARG4 251 jb .Lproc_4_blks 252 jmp .Lproc_5_blks 253 254.Labove_equal_6_blks: 255 cmpl $112, ARG4 256 jb .Lproc_6_blks 257 cmpl $128, ARG4 258 jb .Lproc_7_blks 259 260.align 16 261.Lproc_8_blks: 262.Laescbcdec_8_blks_loop: 263 vmovdqu (ARG2), BLK0 264 vmovdqu 16(ARG2), BLK1 265 vmovdqu 32(ARG2), BLK2 266 movdqa BLK0, IV1 267 movdqa BLK1, IV2 268 movdqa BLK2, IV3 269 movq KEY, KTMP 270 movl 240(KEY), ROUNDS 271 vmovdqu (KEY), RDK 272 vpxor BLK0, RDK, BLK0 273 vpxor BLK1, RDK, BLK1 274 vpxor BLK2, RDK, BLK2 275 vpxor 48(ARG2), RDK, BLK3 276 vpxor 64(ARG2), RDK, BLK4 277 vpxor 80(ARG2), RDK, BLK5 278 vpxor 96(ARG2), RDK, BLK6 279 vpxor 112(ARG2), RDK, BLK7 280 decl ROUNDS 281 AES_DEC_8_BLKS KTMP ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 BLK5 BLK6 BLK7 282 vpxor BLK0, IV0, BLK0 283 vpxor BLK1, IV1, BLK1 284 vpxor BLK2, IV2, BLK2 285 vpxor BLK3, IV3, BLK3 286 vpxor 48(ARG2), BLK4, BLK4 287 vpxor 64(ARG2), BLK5, BLK5 288 vpxor 80(ARG2), BLK6, BLK6 289 vpxor 96(ARG2), BLK7, BLK7 290 vmovdqu 112(ARG2), IV0 291 vmovdqu BLK0, (ARG3) 292 vmovdqu BLK1, 16(ARG3) 293 vmovdqu BLK2, 32(ARG3) 294 vmovdqu BLK3, 48(ARG3) 295 vmovdqu BLK4, 64(ARG3) 296 vmovdqu BLK5, 80(ARG3) 297 vmovdqu BLK6, 96(ARG3) 298 vmovdqu BLK7, 112(ARG3) 299 subl $128, ARG4 300 leaq 128(ARG2), ARG2 301 leaq 128(ARG3), ARG3 302 cmpl $128, ARG4 303 jb .Laes_cbc_dec_start 304 jmp .Laescbcdec_8_blks_loop 305 306.align 16 307.Lproc_1_blk: 308 movl 240(KEY), ROUNDS 309 vmovdqu (KEY), RDK 310 vpxor (ARG2), RDK, BLK0 311 decl ROUNDS 312 AES_DEC_1_BLK KEY ROUNDS RDK BLK0 313 vpxor BLK0, IV0, BLK0 314 vmovdqu (ARG2), IV0 315 vmovdqu BLK0, (ARG3) 316 jmp .Laes_cbc_dec_finish 317 318.align 16 319.Lproc_2_blks: 320 vmovdqu (ARG2), BLK0 321 movl 240(KEY), ROUNDS 322 vmovdqu (KEY), RDK 323 movdqa BLK0, IV1 324 vpxor BLK0, RDK, BLK0 325 vpxor 16(ARG2), RDK, BLK1 326 decl ROUNDS 327 AES_DEC_2_BLKS KEY ROUNDS RDK BLK0 BLK1 328 vpxor BLK0, IV0, BLK0 329 vpxor BLK1, IV1, BLK1 330 vmovdqu 16(ARG2), IV0 331 vmovdqu BLK0, (ARG3) 332 vmovdqu BLK1, 16(ARG3) 333 jmp .Laes_cbc_dec_finish 334 335.align 16 336.Lproc_3_blks: 337 vmovdqu (ARG2), BLK0 338 vmovdqu 16(ARG2), BLK1 339 movl 240(KEY), ROUNDS 340 vmovdqu (KEY), RDK 341 movdqa BLK0, IV1 342 movdqa BLK1, IV2 343 vpxor BLK0, RDK, BLK0 344 vpxor BLK1, RDK, BLK1 345 vpxor 32(ARG2), RDK, BLK2 346 decl ROUNDS 347 AES_DEC_3_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2 348 vpxor BLK0, IV0, BLK0 349 vpxor BLK1, IV1, BLK1 350 vpxor BLK2, IV2, BLK2 351 vmovdqu 32(ARG2), IV0 352 vmovdqu BLK0, (ARG3) 353 vmovdqu BLK1, 16(ARG3) 354 vmovdqu BLK2, 32(ARG3) 355 jmp .Laes_cbc_dec_finish 356 357.align 16 358.Lproc_4_blks: 359 vmovdqu (ARG2), BLK0 360 vmovdqu 16(ARG2), BLK1 361 vmovdqu 32(ARG2), BLK2 362 movl 240(KEY), ROUNDS 363 vmovdqu (KEY), RDK 364 movdqa BLK0, IV1 365 movdqa BLK1, IV2 366 movdqa BLK2, IV3 367 vpxor BLK0, RDK, BLK0 368 vpxor BLK1, RDK, BLK1 369 vpxor BLK2, RDK, BLK2 370 vpxor 48(ARG2), RDK, BLK3 371 decl ROUNDS 372 AES_DEC_4_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3 373 vpxor BLK0, IV0, BLK0 374 vpxor BLK1, IV1, BLK1 375 vpxor BLK2, IV2, BLK2 376 vpxor BLK3, IV3, BLK3 377 vmovdqu 48(ARG2), IV0 378 vmovdqu BLK0, (ARG3) 379 vmovdqu BLK1, 16(ARG3) 380 vmovdqu BLK2, 32(ARG3) 381 vmovdqu BLK3, 48(ARG3) 382 jmp .Laes_cbc_dec_finish 383 384.align 16 385.Lproc_5_blks: 386 vmovdqu (ARG2), BLK0 387 vmovdqu 16(ARG2), BLK1 388 vmovdqu 32(ARG2), BLK2 389 movl 240(KEY), ROUNDS 390 vmovdqu (KEY), RDK 391 movdqa BLK0, IV1 392 movdqa BLK1, IV2 393 movdqa BLK2, IV3 394 vpxor BLK0, RDK, BLK0 395 vpxor BLK1, RDK, BLK1 396 vpxor BLK2, RDK, BLK2 397 vpxor 48(ARG2), RDK, BLK3 398 vpxor 64(ARG2), RDK, BLK4 399 decl ROUNDS 400 AES_DEC_5_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 401 vpxor BLK0, IV0, BLK0 402 vpxor BLK1, IV1, BLK1 403 vpxor BLK2, IV2, BLK2 404 vpxor BLK3, IV3, BLK3 405 vpxor 48(ARG2), BLK4, BLK4 406 vmovdqu 64(ARG2), IV0 407 vmovdqu BLK0, (ARG3) 408 vmovdqu BLK1, 16(ARG3) 409 vmovdqu BLK2, 32(ARG3) 410 vmovdqu BLK3, 48(ARG3) 411 vmovdqu BLK4, 64(ARG3) 412 jmp .Laes_cbc_dec_finish 413 414.align 16 415.Lproc_6_blks: 416 vmovdqu (ARG2), BLK0 417 vmovdqu 16(ARG2), BLK1 418 vmovdqu 32(ARG2), BLK2 419 movl 240(KEY), ROUNDS 420 vmovdqu (KEY), RDK 421 movdqa BLK0, IV1 422 movdqa BLK1, IV2 423 movdqa BLK2, IV3 424 vpxor (ARG2), RDK, BLK0 425 vpxor 16(ARG2), RDK, BLK1 426 vpxor 32(ARG2), RDK, BLK2 427 vpxor 48(ARG2), RDK, BLK3 428 vpxor 64(ARG2), RDK, BLK4 429 vpxor 80(ARG2), RDK, BLK5 430 decl ROUNDS 431 AES_DEC_6_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 BLK5 432 vpxor BLK0, IV0, BLK0 433 vpxor BLK1, IV1, BLK1 434 vpxor BLK2, IV2, BLK2 435 vpxor BLK3, IV3, BLK3 436 vpxor 48(ARG2), BLK4, BLK4 437 vpxor 64(ARG2), BLK5, BLK5 438 vmovdqu 80(ARG2), IV0 439 vmovdqu BLK0, (ARG3) 440 vmovdqu BLK1, 16(ARG3) 441 vmovdqu BLK2, 32(ARG3) 442 vmovdqu BLK3, 48(ARG3) 443 vmovdqu BLK4, 64(ARG3) 444 vmovdqu BLK5, 80(ARG3) 445 jmp .Laes_cbc_dec_finish 446 447.align 16 448.Lproc_7_blks: 449 vmovdqu (ARG2), BLK0 450 vmovdqu 16(ARG2), BLK1 451 vmovdqu 32(ARG2), BLK2 452 movl 240(KEY), ROUNDS 453 vmovdqu (KEY), RDK 454 movdqa BLK0, IV1 455 movdqa BLK1, IV2 456 movdqa BLK2, IV3 457 vpxor (ARG2), RDK, BLK0 458 vpxor 16(ARG2), RDK, BLK1 459 vpxor 32(ARG2), RDK, BLK2 460 vpxor 48(ARG2), RDK, BLK3 461 vpxor 64(ARG2), RDK, BLK4 462 vpxor 80(ARG2), RDK, BLK5 463 vpxor 96(ARG2), RDK, BLK6 464 decl ROUNDS 465 AES_DEC_7_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 BLK5 BLK6 466 vpxor BLK0, IV0, BLK0 467 vpxor BLK1, IV1, BLK1 468 vpxor BLK2, IV2, BLK2 469 vpxor BLK3, IV3, BLK3 470 vpxor 48(ARG2), BLK4, BLK4 471 vpxor 64(ARG2), BLK5, BLK5 472 vpxor 80(ARG2), BLK6, BLK6 473 vmovdqu 96(ARG2), IV0 474 vmovdqu BLK0, (ARG3) 475 vmovdqu BLK1, 16(ARG3) 476 vmovdqu BLK2, 32(ARG3) 477 vmovdqu BLK3, 48(ARG3) 478 vmovdqu BLK4, 64(ARG3) 479 vmovdqu BLK5, 80(ARG3) 480 vmovdqu BLK6, 96(ARG3) 481 482.align 16 483.Laes_cbc_dec_finish: 484 vmovdqu IV0, (ARG5) 485 vpxor BLK0, BLK0, BLK0 486 vpxor BLK1, BLK1, BLK1 487 vpxor BLK2, BLK2, BLK2 488 vpxor BLK3, BLK3, BLK3 489 vpxor BLK4, BLK4, BLK4 490 vpxor BLK5, BLK5, BLK5 491 vpxor BLK6, BLK6, BLK6 492 vpxor BLK7, BLK7, BLK7 493 vpxor RDK, RDK, RDK 494 movl $0, RET 495 ret 496 .cfi_endproc 497 .size CRYPT_AES_CBC_Decrypt, .-CRYPT_AES_CBC_Decrypt 498 499#endif 500