1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_XTS) 18 19#include "crypt_aes_macro_armv8.s" 20#include "crypt_arm.h" 21 22.file "crypt_aes_xts_armv8.S" 23.text 24.arch armv8-a+crypto 25 26KEY .req x0 27IN .req x1 28OUT .req x2 29LEN .req x3 30TWEAK .req x4 31TMPOUT .req x17 32WP .req w11 33WC .req w12 34 35KTMP .req x5 36LTMP .req x6 37TAILNUM .req x8 38POS .req x16 39 40ROUNDS .req w7 41XROUNDS .req x7 42TROUNDS .req w15 43 44WTMP0 .req w9 45WTMP1 .req w10 46WTMP2 .req w11 47WTMP3 .req w12 48 49XTMP1 .req x10 50XTMP2 .req x11 51 52TWX0 .req x13 53TWX1 .req x14 54TWW1 .req w14 55 56BLK0 .req v0 57BLK1 .req v1 58BLK2 .req v2 59BLK3 .req v3 60BLK4 .req v4 61 62IN0 .req v5 63IN1 .req v6 64IN2 .req v7 65IN3 .req v30 66IN4 .req v31 67 68TWK0 .req v8 69TWK1 .req v9 70TWK2 .req v10 71TWK3 .req v11 72TWK4 .req v12 73 74TWKD00 .req d8 75TWKD10 .req d9 76 77TWKD20 .req d10 78TWKD30 .req d11 79TWKD40 .req d12 80 81#define TWKD01 v8.d[1] 82#define TWKD11 v9.d[1] 83#define TWKD21 v10.d[1] 84#define TWKD31 v11.d[1] 85#define TWKD41 v12.d[1] 86 87RDK0 .req v16 88RDK1 .req v17 89RDK2 .req v18 90RDK3 .req v19 91RDK4 .req v20 92RDK5 .req v21 93RDK6 .req v22 94RDK7 .req v23 95RDK8 .req v24 96 97TMP0 .req v25 98TMP1 .req v26 99TMP2 .req v27 100TMP3 .req v28 101TMP4 .req v29 102 103 104#define MOV_REG_TO_VEC(SRC0, SRC1, DES0, DES1) \ 105 fmov DES0,SRC0 ; \ 106 fmov DES1,SRC1 ; \ 107 108.macro NextTweak twkl, twkh, twkd0, twkd1 109asr XTMP2,\twkh,#63 110extr \twkh,\twkh,\twkl,#63 111and WTMP1,WTMP0,WTMP2 112eor \twkl,XTMP1,\twkl,lsl#1 113fmov \twkd0,\twkl // must set lower bits of 'q' register first.1 114fmov \twkd1,\twkh // Set lower bits using 'd' register will clear higer bits. 115.endm 116 117.macro AesCrypt1x en, mc, d0, rk 118aes\en \d0\().16b, \rk\().16b 119aes\mc \d0\().16b, \d0\().16b 120.endm 121 122.macro AesEncrypt1x d0, rk 123AesCrypt1x e, mc, \d0, \rk 124.endm 125 126.macro AesDecrypt1x d0, rk 127AesCrypt1x d, imc, \d0, \rk 128.endm 129 130/** 131 * int32_t CRYPT_AES_XTS_Encrypt(const CRYPT_AES_Key *ctx, const uint8_t *in, uint8_t *out, uint32_t len, const uint8_t *tweak); 132 */ 133.globl CRYPT_AES_XTS_Encrypt 134.type CRYPT_AES_XTS_Encrypt, %function 135.align 4 136CRYPT_AES_XTS_Encrypt: 137AARCH64_PACIASP 138 stp x29, x30, [sp,#-80]! 139 add x29, sp, #0 140 stp d8, d9, [sp,#16] 141 stp d10, d11, [sp,#32] 142 stp d12, d13, [sp,#48] 143 stp d14, d15, [sp,#64] 144 145 ld1 {TWK0.16b}, [TWEAK] 146 and TAILNUM, LEN, #0xF // get tail num, LEN % 16 147 and LTMP, LEN, #-16 148 mov WTMP0,0x87 149 ldr ROUNDS,[KEY,#240] 150 fmov TWX0,TWKD00 151 fmov TWX1,TWKD01 152 153 sub ROUNDS,ROUNDS,#6 // perload last 7 rounds key 154 add KTMP,KEY,XROUNDS,lsl#4 155 ld1 {RDK2.4s,RDK3.4s},[KTMP],#32 156 ld1 {RDK4.4s,RDK5.4s},[KTMP],#32 157 ld1 {RDK6.4s,RDK7.4s},[KTMP],#32 158 ld1 {RDK8.4s},[KTMP] 159 160.Lxts_aesenc_start: 161 cmp LTMP, #80 162 b.ge .Lxts_enc_proc_5_blks 163 cmp LTMP, #48 164 b.ge .Lxts_enc_proc_3_blks 165 cmp LTMP, #32 166 b.eq .Lxts_enc_proc_2_blks 167 cmp LTMP, #16 168 b.eq .Lxts_enc_proc_1blk 169 170.Lxtx_tail_blk: 171 fmov TWX0,TWKD00 // reset already computed tweak 172 fmov TWX1,TWKD01 173 cbz TAILNUM,.Lxts_aesenc_finish 174 // prepare encrypt tail block 175 sub TMPOUT,OUT,#16 176.Lxtx_tail_blk_loop: 177 subs TAILNUM,TAILNUM,1 178 ldrb WC,[TMPOUT,TAILNUM] 179 ldrb WP,[IN,TAILNUM] 180 strb WC,[OUT,TAILNUM] 181 strb WP,[TMPOUT,TAILNUM] 182 b.gt .Lxtx_tail_blk_loop 183 ld1 {BLK0.16b}, [TMPOUT] 184 mov LTMP,#16 185 mov OUT,TMPOUT 186 b .Lxts_enc_proc_1blk_loaded 187 188 cbz LTMP,.Lxts_aesenc_finish 189 190.Lxts_enc_proc_1blk: 191 ld1 {BLK0.16b},[IN],#16 192.Lxts_enc_proc_1blk_loaded: 193 eor BLK0.16b,BLK0.16b,TWK0.16b 194 mov KTMP, KEY 195 ld1 {RDK0.4s,RDK1.4s},[KTMP],#32 196 sub TROUNDS,ROUNDS,#2 197.Lxts_rounds_1blks: 198 AesEncrypt1x BLK0,RDK0 199 ld1 {RDK0.4s},[KTMP],#16 200 subs TROUNDS,TROUNDS,#2 201 202 AesEncrypt1x BLK0,RDK1 203 ld1 {RDK1.4s},[KTMP],#16 204 b.gt .Lxts_rounds_1blks 205 206 AesEncrypt1x BLK0,RDK0 207 AesEncrypt1x BLK0,RDK1 208 209 // last 7 rounds 210 AesEncrypt1x BLK0,RDK2 211 AesEncrypt1x BLK0,RDK3 212 AesEncrypt1x BLK0,RDK4 213 AesEncrypt1x BLK0,RDK5 214 AesEncrypt1x BLK0,RDK6 215 216 aese BLK0.16b,RDK7.16b // final round 217 eor BLK0.16b,BLK0.16b,RDK8.16b 218 eor BLK0.16b,BLK0.16b,TWK0.16b 219 220 st1 {BLK0.16b}, [OUT], #16 221 222 NextTweak TWX0,TWX1,TWKD00,TWKD01 223 224 subs LTMP,LTMP,#16 225 b.hs .Lxts_aesenc_start 226 227.Lxts_enc_proc_2_blks: 228 ld1 {BLK0.16b, BLK1.16b}, [IN], #32 229 mov KTMP, KEY 230 NextTweak TWX0,TWX1,TWKD10,TWKD11 231 ld1 {RDK0.4s,RDK1.4s},[KTMP],#32 232 sub TROUNDS,ROUNDS,#2 233 eor BLK0.16b, BLK0.16b, TWK0.16b 234 eor BLK1.16b, BLK1.16b, TWK1.16b 235.Lxts_rounds_2blks: 236 AesEncrypt1x BLK0,RDK0 237 AesEncrypt1x BLK1,RDK0 238 ld1 {RDK0.4s},[KTMP],#16 239 subs TROUNDS,TROUNDS,#2 240 241 AesEncrypt1x BLK0,RDK1 242 AesEncrypt1x BLK1,RDK1 243 ld1 {RDK1.4s},[KTMP],#16 244 b.gt .Lxts_rounds_2blks 245 246 AesEncrypt1x BLK0,RDK0 247 AesEncrypt1x BLK1,RDK0 248 249 AesEncrypt1x BLK0,RDK1 250 AesEncrypt1x BLK1,RDK1 251 252 // last 7 rounds 253 AesEncrypt1x BLK0,RDK2 254 AesEncrypt1x BLK1,RDK2 255 256 AesEncrypt1x BLK0,RDK3 257 AesEncrypt1x BLK1,RDK3 258 259 AesEncrypt1x BLK0,RDK4 260 AesEncrypt1x BLK1,RDK4 261 262 AesEncrypt1x BLK0,RDK5 263 AesEncrypt1x BLK1,RDK5 264 265 AesEncrypt1x BLK0,RDK6 266 AesEncrypt1x BLK1,RDK6 267 268 eor TWK0.16b,TWK0.16b,RDK8.16b 269 eor TWK1.16b,TWK1.16b,RDK8.16b 270 271 aese BLK0.16b,RDK7.16b // final round 272 aese BLK1.16b,RDK7.16b 273 274 eor BLK0.16b,BLK0.16b,TWK0.16b 275 eor BLK1.16b,BLK1.16b,TWK1.16b 276 277 st1 {BLK0.16b, BLK1.16b}, [OUT], #32 278 NextTweak TWX0,TWX1,TWKD00,TWKD01 279 subs LTMP,LTMP,#32 280 b.hs .Lxts_aesenc_start 281 282.Lxts_enc_proc_3_blks: 283 ld1 {BLK0.16b}, [IN], #16 // first block 284 NextTweak TWX0,TWX1,TWKD10,TWKD11 285 eor BLK0.16b,BLK0.16b,TWK0.16b 286 287 ld1 {BLK1.16b}, [IN], #16 // second block 288 NextTweak TWX0,TWX1,TWKD20,TWKD21 289 eor BLK1.16b,BLK1.16b,TWK1.16b 290 291 ld1 {BLK2.16b}, [IN], #16 // third block 292 eor BLK2.16b,BLK2.16b,TWK2.16b 293 294 mov KTMP, KEY 295 ld1 {RDK0.4s,RDK1.4s},[KTMP],#32 296 sub TROUNDS,ROUNDS,#2 297 298.Lxts_rounds_3blks: 299 AesEncrypt1x BLK0,RDK0 300 AesEncrypt1x BLK1,RDK0 301 AesEncrypt1x BLK2,RDK0 302 ld1 {RDK0.4s},[KTMP],#16 303 subs TROUNDS,TROUNDS,#2 304 305 AesEncrypt1x BLK0,RDK1 306 AesEncrypt1x BLK1,RDK1 307 AesEncrypt1x BLK2,RDK1 308 ld1 {RDK1.4s},[KTMP],#16 309 b.gt .Lxts_rounds_3blks 310 311 AesEncrypt1x BLK0,RDK0 312 AesEncrypt1x BLK1,RDK0 313 AesEncrypt1x BLK2,RDK0 314 315 AesEncrypt1x BLK0,RDK1 316 AesEncrypt1x BLK1,RDK1 317 AesEncrypt1x BLK2,RDK1 318 319 // last 7 rounds 320 AesEncrypt1x BLK0,RDK2 321 AesEncrypt1x BLK1,RDK2 322 AesEncrypt1x BLK2,RDK2 323 324 AesEncrypt1x BLK0,RDK3 325 AesEncrypt1x BLK1,RDK3 326 AesEncrypt1x BLK2,RDK3 327 328 AesEncrypt1x BLK0,RDK4 329 AesEncrypt1x BLK1,RDK4 330 AesEncrypt1x BLK2,RDK4 331 332 AesEncrypt1x BLK0,RDK5 333 AesEncrypt1x BLK1,RDK5 334 AesEncrypt1x BLK2,RDK5 335 336 AesEncrypt1x BLK0,RDK6 337 AesEncrypt1x BLK1,RDK6 338 AesEncrypt1x BLK2,RDK6 339 340 eor TWK0.16b,TWK0.16b,RDK8.16b 341 eor TWK1.16b,TWK1.16b,RDK8.16b 342 eor TWK2.16b,TWK2.16b,RDK8.16b 343 344 aese BLK0.16b,RDK7.16b 345 aese BLK1.16b,RDK7.16b 346 aese BLK2.16b,RDK7.16b 347 348 eor BLK0.16b,BLK0.16b,TWK0.16b 349 eor BLK1.16b,BLK1.16b,TWK1.16b 350 eor BLK2.16b,BLK2.16b,TWK2.16b 351 352 st1 {BLK0.16b, BLK1.16b, BLK2.16b}, [OUT], #48 353 354 NextTweak TWX0,TWX1,TWKD00,TWKD01 355 356 subs LTMP,LTMP,#48 357 b.hs .Lxts_aesenc_start 358 359.align 4 360.Lxts_enc_proc_5_blks: 361 ld1 {BLK0.16b}, [IN], #16 // first block 362 NextTweak TWX0,TWX1,TWKD10,TWKD11 363 eor BLK0.16b,BLK0.16b,TWK0.16b 364 365 ld1 {BLK1.16b}, [IN], #16 // second block 366 NextTweak TWX0,TWX1,TWKD20,TWKD21 367 eor BLK1.16b,BLK1.16b,TWK1.16b 368 sub LTMP,LTMP,#32 369 370 ld1 {BLK2.16b}, [IN], #16 // third block 371 NextTweak TWX0,TWX1,TWKD30,TWKD31 372 eor BLK2.16b,BLK2.16b,TWK2.16b 373 374 ld1 {BLK3.16b}, [IN], #16 // fourth block 375 NextTweak TWX0,TWX1,TWKD40,TWKD41 376 eor BLK3.16b,BLK3.16b,TWK3.16b 377 sub LTMP,LTMP,#32 378 379 ld1 {BLK4.16b}, [IN], #16 // fifth block 380 eor BLK4.16b, BLK4.16b, TWK4.16b 381 sub LTMP,LTMP,#16 382 383 mov KTMP, KEY 384 ld1 {RDK0.4s,RDK1.4s},[KTMP],#32 385 sub TROUNDS,ROUNDS,#2 386.align 4 387.Lxts_rounds_5blks: 388 AesEncrypt1x BLK0,RDK0 389 AesEncrypt1x BLK1,RDK0 390 AesEncrypt1x BLK2,RDK0 391 AesEncrypt1x BLK3,RDK0 392 AesEncrypt1x BLK4,RDK0 393 ld1 {RDK0.4s},[KTMP],#16 394 subs TROUNDS,TROUNDS,#2 395 396 AesEncrypt1x BLK0,RDK1 397 AesEncrypt1x BLK1,RDK1 398 AesEncrypt1x BLK2,RDK1 399 AesEncrypt1x BLK3,RDK1 400 AesEncrypt1x BLK4,RDK1 401 ld1 {RDK1.4s},[KTMP],#16 402 b.gt .Lxts_rounds_5blks 403 404 AesEncrypt1x BLK0,RDK0 405 AesEncrypt1x BLK1,RDK0 406 AesEncrypt1x BLK2,RDK0 407 AesEncrypt1x BLK3,RDK0 408 AesEncrypt1x BLK4,RDK0 409 subs LTMP,LTMP,#80 410 411 AesEncrypt1x BLK0,RDK1 412 AesEncrypt1x BLK1,RDK1 413 AesEncrypt1x BLK2,RDK1 414 AesEncrypt1x BLK3,RDK1 415 AesEncrypt1x BLK4,RDK1 416 417 // last 7 rounds 418 AesEncrypt1x BLK0,RDK2 419 AesEncrypt1x BLK1,RDK2 420 AesEncrypt1x BLK2,RDK2 421 AesEncrypt1x BLK3,RDK2 422 AesEncrypt1x BLK4,RDK2 423 csel POS,xzr,LTMP,gt // 424 425 AesEncrypt1x BLK0,RDK3 426 AesEncrypt1x BLK1,RDK3 427 AesEncrypt1x BLK2,RDK3 428 AesEncrypt1x BLK3,RDK3 429 AesEncrypt1x BLK4,RDK3 430 add IN,IN,POS 431 432 AesEncrypt1x BLK0,RDK4 433 AesEncrypt1x BLK1,RDK4 434 AesEncrypt1x BLK2,RDK4 435 AesEncrypt1x BLK3,RDK4 436 AesEncrypt1x BLK4,RDK4 437 438 AesEncrypt1x BLK0,RDK5 439 AesEncrypt1x BLK1,RDK5 440 AesEncrypt1x BLK2,RDK5 441 AesEncrypt1x BLK3,RDK5 442 AesEncrypt1x BLK4,RDK5 443 444 AesEncrypt1x BLK0,RDK6 445 AesEncrypt1x BLK1,RDK6 446 AesEncrypt1x BLK2,RDK6 447 AesEncrypt1x BLK3,RDK6 448 AesEncrypt1x BLK4,RDK6 449 450 eor TMP0.16b,TWK0.16b,RDK8.16b 451 aese BLK0.16b,RDK7.16b // final round 452 NextTweak TWX0,TWX1,TWKD00,TWKD01 // perform operations of next 5blks in advance 453 454 eor TMP1.16b,TWK1.16b,RDK8.16b 455 ld1 {IN0.16b}, [IN], #16 456 aese BLK1.16b,RDK7.16b 457 NextTweak TWX0,TWX1,TWKD10,TWKD11 458 459 eor TMP2.16b,TWK2.16b,RDK8.16b 460 ld1 {IN1.16b}, [IN], #16 461 aese BLK2.16b,RDK7.16b 462 NextTweak TWX0,TWX1,TWKD20,TWKD21 463 464 eor TMP3.16b,TWK3.16b,RDK8.16b 465 ld1 {IN2.16b}, [IN], #16 466 aese BLK3.16b,RDK7.16b 467 NextTweak TWX0,TWX1,TWKD30,TWKD31 468 469 eor TMP4.16b,TWK4.16b,RDK8.16b 470 ld1 {IN3.16b}, [IN], #16 471 aese BLK4.16b,RDK7.16b 472 NextTweak TWX0,TWX1,TWKD40,TWKD41 473 474 ld1 {IN4.16b}, [IN], #16 475 mov KTMP, KEY 476 ld1 {RDK0.4s,RDK1.4s},[KTMP],#32 477 eor TMP0.16b,TMP0.16b,BLK0.16b 478 eor BLK0.16b,IN0.16b,TWK0.16b // blk0 = in0 ^ twk0 479 eor TMP1.16b,TMP1.16b,BLK1.16b 480 eor BLK1.16b,IN1.16b,TWK1.16b 481 st1 {TMP0.16b}, [OUT], #16 482 eor TMP2.16b,TMP2.16b,BLK2.16b 483 eor BLK2.16b,IN2.16b,TWK2.16b 484 eor TMP3.16b,TMP3.16b,BLK3.16b 485 eor BLK3.16b,IN3.16b,TWK3.16b 486 st1 {TMP1.16b}, [OUT], #16 487 eor TMP4.16b,TMP4.16b,BLK4.16b 488 eor BLK4.16b,IN4.16b,TWK4.16b 489 st1 {TMP2.16b}, [OUT], #16 490 sub TROUNDS,ROUNDS,#2 491 st1 {TMP3.16b,TMP4.16b}, [OUT], #32 492 493 b.hs .Lxts_rounds_5blks 494 add LTMP,LTMP,#80 // add 5 blocks length back if LTMP < 0 495 cbz LTMP,.Lxtx_tail_blk 496 cmp LTMP, #16 497 b.eq .Lxts_pre_last_1blks 498 cmp LTMP,#32 499 b.eq .Lxts_pre_last_2blks 500 cmp LTMP,#48 501 b.eq .Lxts_pre_last_3blks 502 cmp LTMP,#64 503 b.eq .Lxts_pre_last_4blks 504.Lxts_pre_last_1blks: 505 eor IN0.16b,IN0.16b,IN4.16b //in0 = in0 ^ in41 506 eor BLK0.16b,BLK0.16b,IN0.16b // blk0 = in0 ^ twk0 ^ in0 ^ in4 507 fmov TWX0,TWKD00 // reset already computed tweak 508 fmov TWX1,TWKD01 509 b .Lxts_rounds_1blks 510.Lxts_pre_last_2blks: 511 eor BLK0.16b,BLK0.16b,IN0.16b 512 eor BLK1.16b,BLK1.16b,IN1.16b 513 eor BLK0.16b,BLK0.16b,IN3.16b // in3 -> blk0 514 eor BLK1.16b,BLK1.16b,IN4.16b // in4 -> blk1 515 fmov TWX0,TWKD10 // reset already computed tweak 516 fmov TWX1,TWKD11 517 b .Lxts_rounds_2blks 518.Lxts_pre_last_3blks: 519 eor BLK0.16b,BLK0.16b,IN0.16b 520 eor BLK1.16b,BLK1.16b,IN1.16b 521 eor BLK2.16b,BLK2.16b,IN2.16b 522 eor BLK0.16b,BLK0.16b,IN2.16b // in2 -> blk0 523 eor BLK1.16b,BLK1.16b,IN3.16b // in3 -> blk1 524 eor BLK2.16b,BLK2.16b,IN4.16b // in4 -> blk2 525 fmov TWX0,TWKD20 // reset already computed tweak 526 fmov TWX1,TWKD21 527 b .Lxts_rounds_3blks 528.Lxts_pre_last_4blks: 529 eor BLK0.16b,BLK0.16b,IN0.16b 530 eor BLK1.16b,BLK1.16b,IN1.16b 531 eor BLK2.16b,BLK2.16b,IN2.16b 532 eor BLK3.16b,BLK3.16b,IN3.16b 533 sub IN,IN,#16 // have loaded 4blks, using 3blks to process, so step back 1blk here 534 eor BLK0.16b,BLK0.16b,IN1.16b // in1 -> blk0 535 eor BLK1.16b,BLK1.16b,IN2.16b // in2 -> blk1 536 eor BLK2.16b,BLK2.16b,IN3.16b // in3 -> blk2 537 eor BLK3.16b,BLK3.16b,IN4.16b // in4 -> blk3 538 fmov TWX0,TWKD20 // reset already computed tweak 539 fmov TWX1,TWKD21 540 b .Lxts_rounds_3blks 541 542.Lxts_aesenc_finish: 543 MOV_REG_TO_VEC(TWX0,TWX1,TWKD00,TWKD01) 544 st1 {TWK0.16b}, [TWEAK] 545 546 mov x0, #0 // return value ? no need 547 548 ldp d14, d15, [sp,#64] 549 ldp d12, d13, [sp, #48] 550 ldp d10, d11, [sp, #32] 551 ldp d8, d9, [sp, #16] 552 ldp x29, x30, [sp], #80 553 554AARCH64_AUTIASP 555 ret 556.size CRYPT_AES_XTS_Encrypt, .-CRYPT_AES_XTS_Encrypt 557 558 559/** 560 * int32_t CRYPT_AES_XTS_Decrypt(const CRYPT_AES_Key *ctx, const uint8_t *in, uint8_t *out, uint32_t len, const uint8_t *t); 561 */ 562.globl CRYPT_AES_XTS_Decrypt 563.type CRYPT_AES_XTS_Decrypt, %function 564.align 4 565CRYPT_AES_XTS_Decrypt: 566AARCH64_PACIASP 567 stp x29, x30, [sp,#-80]! 568 add x29, sp, #0 569 stp d8, d9, [sp,#16] 570 stp d10, d11, [sp,#32] 571 stp d12, d13, [sp,#48] 572 stp d14, d15, [sp,#64] 573 574 ld1 {TWK0.16b}, [TWEAK] 575 and LTMP, LEN, #-16 576 ands TAILNUM, LEN, #0xF // get tail num, LEN % 16 577 sub XTMP1,LTMP,#16 // preserve last and tail block 578 csel LTMP,XTMP1,LTMP,ne // if tailnum != 0, len -= 16 579 580 mov WTMP0,0x87 581 ldr ROUNDS,[KEY,#240] 582 fmov TWX0,TWKD00 583 fmov TWX1,TWKD01 584 585 sub ROUNDS,ROUNDS,#6 // perload last 7 rounds key 586 add KTMP,KEY,XROUNDS,lsl#4 587 ld1 {RDK2.4s,RDK3.4s},[KTMP],#32 588 ld1 {RDK4.4s,RDK5.4s},[KTMP],#32 589 ld1 {RDK6.4s,RDK7.4s},[KTMP],#32 590 ld1 {RDK8.4s},[KTMP] 591 592.Lxts_aesdec_start: 593 cmp LTMP, #80 594 b.gt .Lxts_dec_proc_5_blks 595 cmp LTMP, #48 596 b.ge .Lxts_dec_proc_3_blks 597 cmp LTMP, #32 598 b.eq .Lxts_dec_proc_2_blks 599 cmp LTMP, #16 600 b.eq .Lxts_dec_proc_1blk 601 cmp LTMP, #0 602 b.eq .Lxts_dec_last_secondblk 603.Lxtx_dec_tail_blk: 604 fmov TWX0,TWKD00 // reset already computed tweak 605 fmov TWX1,TWKD01 606 cbz TAILNUM,.Lxts_aesdec_finish 607 // prepare encrypt tail block 608 sub TMPOUT,OUT,#16 609.Lxtx_dec_tail_blk_loop: 610 subs TAILNUM,TAILNUM,1 611 ldrb WC,[TMPOUT,TAILNUM] 612 ldrb WP,[IN,TAILNUM] 613 strb WC,[OUT,TAILNUM] 614 strb WP,[TMPOUT,TAILNUM] 615 b.gt .Lxtx_dec_tail_blk_loop 616 ld1 {BLK0.16b}, [TMPOUT] 617 mov OUT,TMPOUT 618 mov TWK0.16b,TWK1.16b // load pre-tweak back 619 b .Lxts_dec_proc_1blk_loaded 620 621 cbz LTMP,.Lxts_aesdec_finish 622 623.Lxts_dec_last_secondblk: 624 cbz TAILNUM,.Lxts_aesdec_finish 625 mov TWK1.16b,TWK0.16b // save last second tweak 626 NextTweak TWX0,TWX1,TWKD00,TWKD01 627.Lxts_dec_proc_1blk: 628 ld1 {BLK0.16b}, [IN],#16 629.Lxts_dec_proc_1blk_loaded: 630 mov KTMP, KEY 631 eor BLK0.16b,BLK0.16b,TWK0.16b 632 ld1 {RDK0.4s},[KTMP],#16 633 sub TROUNDS,ROUNDS,#2 634 ld1 {RDK1.4s},[KTMP],#16 635.Lxts_dec_rounds_1blks: 636 AesDecrypt1x BLK0,RDK0 637 ld1 {RDK0.4s},[KTMP],#16 638 subs TROUNDS,TROUNDS,#2 639 640 AesDecrypt1x BLK0,RDK1 641 ld1 {RDK1.4s},[KTMP],#16 642 b.gt .Lxts_dec_rounds_1blks 643 644 AesDecrypt1x BLK0,RDK0 645 AesDecrypt1x BLK0,RDK1 646 647 // last 7 rounds 648 AesDecrypt1x BLK0,RDK2 649 AesDecrypt1x BLK0,RDK3 650 AesDecrypt1x BLK0,RDK4 651 AesDecrypt1x BLK0,RDK5 652 AesDecrypt1x BLK0,RDK6 653 654 aesd BLK0.16b,RDK7.16b // final round 655 eor BLK0.16b,BLK0.16b,RDK8.16b 656 eor BLK0.16b,BLK0.16b,TWK0.16b 657 658 st1 {BLK0.16b}, [OUT], #16 659 660 NextTweak TWX0,TWX1,TWKD00,TWKD01 661 662 subs LTMP,LTMP,#16 663 b.lt .Lxtx_dec_tail_blk 664 b.hs .Lxts_aesdec_start 665 666.Lxts_dec_proc_2_blks: 667 ld1 {BLK0.16b, BLK1.16b}, [IN], #32 668 mov KTMP, KEY 669 NextTweak TWX0,TWX1,TWKD10,TWKD11 670 ld1 {RDK0.4s,RDK1.4s},[KTMP],#32 671 sub TROUNDS,ROUNDS,#2 672 eor BLK0.16b, BLK0.16b, TWK0.16b 673 eor BLK1.16b, BLK1.16b, TWK1.16b 674.Lxts_dec_rounds_2blks: 675 AesDecrypt1x BLK0,RDK0 676 AesDecrypt1x BLK1,RDK0 677 ld1 {RDK0.4s},[KTMP],#16 678 subs TROUNDS,TROUNDS,#2 679 680 AesDecrypt1x BLK0,RDK1 681 AesDecrypt1x BLK1,RDK1 682 ld1 {RDK1.4s},[KTMP],#16 683 b.gt .Lxts_dec_rounds_2blks 684 685 AesDecrypt1x BLK0,RDK0 686 AesDecrypt1x BLK1,RDK0 687 688 AesDecrypt1x BLK0,RDK1 689 AesDecrypt1x BLK1,RDK1 690 691 // last 7 rounds 692 AesDecrypt1x BLK0,RDK2 693 AesDecrypt1x BLK1,RDK2 694 695 AesDecrypt1x BLK0,RDK3 696 AesDecrypt1x BLK1,RDK3 697 698 AesDecrypt1x BLK0,RDK4 699 AesDecrypt1x BLK1,RDK4 700 701 AesDecrypt1x BLK0,RDK5 702 AesDecrypt1x BLK1,RDK5 703 704 AesDecrypt1x BLK0,RDK6 705 AesDecrypt1x BLK1,RDK6 706 707 eor TWK0.16b,TWK0.16b,RDK8.16b 708 eor TWK1.16b,TWK1.16b,RDK8.16b 709 710 aesd BLK0.16b,RDK7.16b // final round 711 aesd BLK1.16b,RDK7.16b 712 713 eor BLK0.16b,BLK0.16b,TWK0.16b 714 eor BLK1.16b,BLK1.16b,TWK1.16b 715 716 st1 {BLK0.16b, BLK1.16b}, [OUT], #32 717 NextTweak TWX0,TWX1,TWKD00,TWKD01 718 subs LTMP,LTMP,#32 719 b.hs .Lxts_aesdec_start 720 721.Lxts_dec_proc_3_blks: 722 ld1 {BLK0.16b}, [IN], #16 // first block 723 NextTweak TWX0,TWX1,TWKD10,TWKD11 724 eor BLK0.16b,BLK0.16b,TWK0.16b 725 726 ld1 {BLK1.16b}, [IN], #16 // second block 727 NextTweak TWX0,TWX1,TWKD20,TWKD21 728 eor BLK1.16b,BLK1.16b,TWK1.16b 729 730 ld1 {BLK2.16b}, [IN], #16 // third block 731 eor BLK2.16b,BLK2.16b,TWK2.16b 732 733 mov KTMP, KEY 734 ld1 {RDK0.4s,RDK1.4s},[KTMP],#32 735 sub TROUNDS,ROUNDS,#2 736 737.Lxts_dec_rounds_3blks: 738 AesDecrypt1x BLK0,RDK0 739 AesDecrypt1x BLK1,RDK0 740 AesDecrypt1x BLK2,RDK0 741 ld1 {RDK0.4s},[KTMP],#16 742 subs TROUNDS,TROUNDS,#2 743 744 AesDecrypt1x BLK0,RDK1 745 AesDecrypt1x BLK1,RDK1 746 AesDecrypt1x BLK2,RDK1 747 ld1 {RDK1.4s},[KTMP],#16 748 b.gt .Lxts_dec_rounds_3blks 749 750 AesDecrypt1x BLK0,RDK0 751 AesDecrypt1x BLK1,RDK0 752 AesDecrypt1x BLK2,RDK0 753 754 AesDecrypt1x BLK0,RDK1 755 AesDecrypt1x BLK1,RDK1 756 AesDecrypt1x BLK2,RDK1 757 758 // last 7 rounds 759 AesDecrypt1x BLK0,RDK2 760 AesDecrypt1x BLK1,RDK2 761 AesDecrypt1x BLK2,RDK2 762 763 AesDecrypt1x BLK0,RDK3 764 AesDecrypt1x BLK1,RDK3 765 AesDecrypt1x BLK2,RDK3 766 767 AesDecrypt1x BLK0,RDK4 768 AesDecrypt1x BLK1,RDK4 769 AesDecrypt1x BLK2,RDK4 770 771 AesDecrypt1x BLK0,RDK5 772 AesDecrypt1x BLK1,RDK5 773 AesDecrypt1x BLK2,RDK5 774 775 AesDecrypt1x BLK0,RDK6 776 AesDecrypt1x BLK1,RDK6 777 AesDecrypt1x BLK2,RDK6 778 779 eor TWK0.16b,TWK0.16b,RDK8.16b 780 eor TWK1.16b,TWK1.16b,RDK8.16b 781 eor TWK2.16b,TWK2.16b,RDK8.16b 782 783 aesd BLK0.16b,RDK7.16b 784 aesd BLK1.16b,RDK7.16b 785 aesd BLK2.16b,RDK7.16b 786 787 eor BLK0.16b,BLK0.16b,TWK0.16b 788 eor BLK1.16b,BLK1.16b,TWK1.16b 789 eor BLK2.16b,BLK2.16b,TWK2.16b 790 791 st1 {BLK0.16b, BLK1.16b, BLK2.16b}, [OUT], #48 792 793 NextTweak TWX0,TWX1,TWKD00,TWKD01 794 795 subs LTMP,LTMP,#48 796 b.hs .Lxts_aesdec_start 797 798.align 4 799.Lxts_dec_proc_5_blks: 800 ld1 {BLK0.16b}, [IN], #16 // first block 801 NextTweak TWX0,TWX1,TWKD10,TWKD11 802 eor BLK0.16b,BLK0.16b,TWK0.16b 803 804 ld1 {BLK1.16b}, [IN], #16 // second block 805 NextTweak TWX0,TWX1,TWKD20,TWKD21 806 eor BLK1.16b,BLK1.16b,TWK1.16b 807 sub LTMP,LTMP,#32 808 809 ld1 {BLK2.16b}, [IN], #16 // third block 810 NextTweak TWX0,TWX1,TWKD30,TWKD31 811 eor BLK2.16b,BLK2.16b,TWK2.16b 812 813 ld1 {BLK3.16b}, [IN], #16 // fourth block 814 NextTweak TWX0,TWX1,TWKD40,TWKD41 815 eor BLK3.16b,BLK3.16b,TWK3.16b 816 sub LTMP,LTMP,#32 817 818 ld1 {BLK4.16b}, [IN], #16 // fifth block 819 eor BLK4.16b, BLK4.16b, TWK4.16b 820 sub LTMP,LTMP,#16 821 822 mov KTMP, KEY 823 ld1 {RDK0.4s,RDK1.4s},[KTMP],#32 824 sub TROUNDS,ROUNDS,#2 825.align 4 826.Lxts_dec_rounds_5blks: 827 AesDecrypt1x BLK0,RDK0 828 AesDecrypt1x BLK1,RDK0 829 AesDecrypt1x BLK2,RDK0 830 AesDecrypt1x BLK3,RDK0 831 AesDecrypt1x BLK4,RDK0 832 ld1 {RDK0.4s},[KTMP],#16 833 subs TROUNDS,TROUNDS,#2 834 835 AesDecrypt1x BLK0,RDK1 836 AesDecrypt1x BLK1,RDK1 837 AesDecrypt1x BLK2,RDK1 838 AesDecrypt1x BLK3,RDK1 839 AesDecrypt1x BLK4,RDK1 840 ld1 {RDK1.4s},[KTMP],#16 841 b.gt .Lxts_dec_rounds_5blks 842 843 AesDecrypt1x BLK0,RDK0 844 AesDecrypt1x BLK1,RDK0 845 AesDecrypt1x BLK2,RDK0 846 AesDecrypt1x BLK3,RDK0 847 AesDecrypt1x BLK4,RDK0 848 subs LTMP,LTMP,#80 849 850 AesDecrypt1x BLK0,RDK1 851 AesDecrypt1x BLK1,RDK1 852 AesDecrypt1x BLK2,RDK1 853 AesDecrypt1x BLK3,RDK1 854 AesDecrypt1x BLK4,RDK1 855 856 // last 7 rounds 857 AesDecrypt1x BLK0,RDK2 858 AesDecrypt1x BLK1,RDK2 859 AesDecrypt1x BLK2,RDK2 860 AesDecrypt1x BLK3,RDK2 861 AesDecrypt1x BLK4,RDK2 862 csel POS,xzr,LTMP,gt // 863 864 AesDecrypt1x BLK0,RDK3 865 AesDecrypt1x BLK1,RDK3 866 AesDecrypt1x BLK2,RDK3 867 AesDecrypt1x BLK3,RDK3 868 AesDecrypt1x BLK4,RDK3 869 add IN,IN,POS 870 871 AesDecrypt1x BLK0,RDK4 872 AesDecrypt1x BLK1,RDK4 873 AesDecrypt1x BLK2,RDK4 874 AesDecrypt1x BLK3,RDK4 875 AesDecrypt1x BLK4,RDK4 876 877 AesDecrypt1x BLK0,RDK5 878 AesDecrypt1x BLK1,RDK5 879 AesDecrypt1x BLK2,RDK5 880 AesDecrypt1x BLK3,RDK5 881 AesDecrypt1x BLK4,RDK5 882 883 AesDecrypt1x BLK0,RDK6 884 AesDecrypt1x BLK1,RDK6 885 AesDecrypt1x BLK2,RDK6 886 AesDecrypt1x BLK3,RDK6 887 AesDecrypt1x BLK4,RDK6 888 889 eor TMP0.16b,TWK0.16b,RDK8.16b 890 aesd BLK0.16b,RDK7.16b // final round 891 NextTweak TWX0,TWX1,TWKD00,TWKD01 // perform operations of next 5blks in advance 892 893 eor TMP1.16b,TWK1.16b,RDK8.16b 894 ld1 {IN0.16b}, [IN], #16 895 aesd BLK1.16b,RDK7.16b 896 NextTweak TWX0,TWX1,TWKD10,TWKD11 897 898 eor TMP2.16b,TWK2.16b,RDK8.16b 899 ld1 {IN1.16b}, [IN], #16 900 aesd BLK2.16b,RDK7.16b 901 NextTweak TWX0,TWX1,TWKD20,TWKD21 902 903 eor TMP3.16b,TWK3.16b,RDK8.16b 904 ld1 {IN2.16b}, [IN], #16 905 aesd BLK3.16b,RDK7.16b 906 NextTweak TWX0,TWX1,TWKD30,TWKD31 907 908 eor TMP4.16b,TWK4.16b,RDK8.16b 909 ld1 {IN3.16b}, [IN], #16 910 aesd BLK4.16b,RDK7.16b 911 NextTweak TWX0,TWX1,TWKD40,TWKD41 912 913 ld1 {IN4.16b}, [IN], #16 914 mov KTMP, KEY 915 ld1 {RDK0.4s,RDK1.4s},[KTMP],#32 916 eor TMP0.16b,TMP0.16b,BLK0.16b 917 eor BLK0.16b,IN0.16b,TWK0.16b // blk0 = in0 ^ twk0 918 eor TMP1.16b,TMP1.16b,BLK1.16b 919 eor BLK1.16b,IN1.16b,TWK1.16b 920 st1 {TMP0.16b}, [OUT], #16 921 eor TMP2.16b,TMP2.16b,BLK2.16b 922 eor BLK2.16b,IN2.16b,TWK2.16b 923 eor TMP3.16b,TMP3.16b,BLK3.16b 924 eor BLK3.16b,IN3.16b,TWK3.16b 925 st1 {TMP1.16b}, [OUT], #16 926 eor TMP4.16b,TMP4.16b,BLK4.16b 927 eor BLK4.16b,IN4.16b,TWK4.16b 928 st1 {TMP2.16b}, [OUT], #16 929 sub TROUNDS,ROUNDS,#2 930 st1 {TMP3.16b,TMP4.16b}, [OUT], #32 931 932 b.hs .Lxts_dec_rounds_5blks 933 add LTMP,LTMP,#80 // add 5 blocks length back if LTMP < 0 934 cbz LTMP,.Lxtx_dec_tail_blk 935 cmp LTMP, #16 936 b.eq .Lxts_dec_pre_last_1blks 937 cmp LTMP,#32 938 b.eq .Lxts_dec_pre_last_2blks 939 cmp LTMP,#48 940 b.eq .Lxts_dec_pre_last_3blks 941 cmp LTMP,#64 942 b.eq .Lxts_dec_pre_last_4blks 943.Lxts_dec_pre_last_1blks: 944 eor IN0.16b,IN0.16b,IN4.16b //in0 = in0 ^ in41 945 eor BLK0.16b,BLK0.16b,IN0.16b // blk0 = in0 ^ twk0 ^ in0 ^ in4 946 fmov TWX0,TWKD00 // reset already computed tweak 947 fmov TWX1,TWKD01 948 b .Lxts_dec_rounds_1blks 949.Lxts_dec_pre_last_2blks: 950 eor BLK0.16b,BLK0.16b,IN0.16b 951 eor BLK1.16b,BLK1.16b,IN1.16b 952 eor BLK0.16b,BLK0.16b,IN3.16b // in3 -> blk0 953 eor BLK1.16b,BLK1.16b,IN4.16b // in4 -> blk1 954 fmov TWX0,TWKD10 // reset already computed tweak 955 fmov TWX1,TWKD11 956 b .Lxts_dec_rounds_2blks 957.Lxts_dec_pre_last_3blks: 958 eor BLK0.16b,BLK0.16b,IN0.16b 959 eor BLK1.16b,BLK1.16b,IN1.16b 960 eor BLK2.16b,BLK2.16b,IN2.16b 961 eor BLK0.16b,BLK0.16b,IN2.16b // in2 -> blk0 962 eor BLK1.16b,BLK1.16b,IN3.16b // in3 -> blk1 963 eor BLK2.16b,BLK2.16b,IN4.16b // in4 -> blk2 964 fmov TWX0,TWKD20 // reset already computed tweak 965 fmov TWX1,TWKD21 966 b .Lxts_dec_rounds_3blks 967.Lxts_dec_pre_last_4blks: 968 eor BLK0.16b,BLK0.16b,IN0.16b 969 eor BLK1.16b,BLK1.16b,IN1.16b 970 eor BLK2.16b,BLK2.16b,IN2.16b 971 eor BLK3.16b,BLK3.16b,IN3.16b 972 sub IN,IN,#16 // have loaded 4blks, using 3blks to process, so step back 1blk here 973 eor BLK0.16b,BLK0.16b,IN1.16b // in1 -> blk0 974 eor BLK1.16b,BLK1.16b,IN2.16b // in2 -> blk1 975 eor BLK2.16b,BLK2.16b,IN3.16b // in3 -> blk2 976 eor BLK3.16b,BLK3.16b,IN4.16b // in4 -> blk3 977 fmov TWX0,TWKD20 // reset already computed tweak 978 fmov TWX1,TWKD21 979 b .Lxts_dec_rounds_3blks 980 981.Lxts_aesdec_finish: 982 MOV_REG_TO_VEC(TWX0,TWX1,TWKD00,TWKD01) 983 st1 {TWK0.16b}, [TWEAK] 984 985 mov x0, #0 986 987 ldp d14, d15, [sp,#64] 988 ldp d12, d13, [sp, #48] 989 ldp d10, d11, [sp, #32] 990 ldp d8, d9, [sp, #16] 991 ldp x29, x30, [sp], #80 992AARCH64_AUTIASP 993 ret 994.size CRYPT_AES_XTS_Decrypt, .-CRYPT_AES_XTS_Decrypt 995 996#endif 997