1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_GCM) 18 19.text 20INPUT .req x1 21OUT00 .req x2 22INLEN .req x3 23KEY00 .req x4 24IVCTR .req w4 25HTABLE .req x5 26IVEC0 .req x0 27ROUNDS .req w8 28COUNT .req x15 29COUNTW .req w15 30IV_H .req x10 // high 64 bits 31IV_L .req x11 // lower 64 bits 32IV_C .req x12 33IV_W .req w12 34IV_CW .req w9 35IV_CX .req x9 36CTR0 .req v0 37CTR1 .req v1 38CTR2 .req v2 39CTR3 .req v3 40OUT0 .req v4 41OUT1 .req v5 42OUT2 .req v6 43OUT3 .req v7 44KEY0 .req v18 45KEY1 .req v19 46KEY2 .req v20 47KEY3 .req v21 48KEY4 .req v22 49KEY5 .req v23 50KEY6 .req v24 51KEY7 .req v25 52KEY8 .req v26 53KEY9 .req v27 54KEY10 .req v28 55KEY11 .req v29 56KEY12 .req v30 57KEY13 .req v31 58KEND0 .req x13 59KEND1 .req x14 60HASH0 .req v11 61HASH1 .req v12 62HASH2 .req v13 63HASH3 .req v14 64HASH4 .req v15 65MULL_C2 .req v13 66HASH1_2 .req v12 67 68.macro IN_STP 69 stp x19, x20, [sp, #-112]! 70 stp x21, x22, [sp, #16] 71 stp x23, x24, [sp, #32] 72 stp d8, d9, [sp, #48] 73 stp d10, d11, [sp, #64] 74 stp d12, d13, [sp, #80] 75 stp d14, d15, [sp, #96] 76.endm 77 78.macro OUT_STP 79 ldp x21, x22, [sp, #16] 80 ldp x23, x24, [sp, #32] 81 ldp d8, d9, [sp, #48] 82 ldp d10, d11, [sp, #64] 83 ldp d12, d13, [sp, #80] 84 ldp d14, d15, [sp, #96] 85 ldp x19, x20, [sp], #112 86.endm 87 88.macro REV_2S REG0, REG1 89 rev \REG0, \REG0 90 rev \REG1, \REG1 91.endm 92 93.macro LOAD_KEY 94 ld1 {KEY0.4s, KEY1.4s}, [KEY00], #32 // load key-0-1 95 ld1 {KEY2.4s, KEY3.4s}, [KEY00], #32 // load key-2-3 96 ld1 {KEY4.4s, KEY5.4s}, [KEY00], #32 // load key-4-5 97 ld1 {KEY6.4s, KEY7.4s}, [KEY00], #32 // load key-6-7 98 ld1 {KEY8.4s, KEY9.4s}, [KEY00], #32 // load key-8-9 99.endm 100 101.macro LOAD_GHASH_TABLE 102 ld1 {HASH0.16b}, [HTABLE], #16 // load ghash 103 ld1 {HASH1.2d}, [HTABLE], #16 // load h^1 104 add HTABLE, HTABLE, #16 105 ld1 {HASH2.2d}, [HTABLE], #16 // load h^2 106 ld1 {HASH3.2d}, [HTABLE], #16 // load h^3 107 add HTABLE, HTABLE, #16 108 ld1 {HASH4.2d}, [HTABLE] // load h^4 109.endm 110 111.macro ROUND4 BLOCK0, BLOCK1, BLOCK2, BLOCK3, KEY 112 aese \BLOCK0, \KEY 113 aesmc \BLOCK0, \BLOCK0 114 aese \BLOCK1, \KEY 115 aesmc \BLOCK1, \BLOCK1 116 aese \BLOCK2, \KEY 117 aesmc \BLOCK2, \BLOCK2 118 aese \BLOCK3, \KEY 119 aesmc \BLOCK3, \BLOCK3 120.endm 121 122.macro ROUND4_END BLOCK0, BLOCK1, BLOCK2, BLOCK3, KEY 123 aese \BLOCK0, \KEY 124 aese \BLOCK1, \KEY 125 aese \BLOCK2, \KEY 126 aese \BLOCK3, \KEY 127.endm 128 129.macro ROUND BLOCK, KEY 130 aese \BLOCK, \KEY 131 aesmc \BLOCK, \BLOCK 132.endm 133 134.macro LOAD_CTR DI, VI 135 rev IV_CW, IV_W 136 fmov \DI, IV_H // set h64 137 orr IV_CX, IV_L, IV_CX, lsl #32 138 add IV_W, IV_W, #1 // CTR++ 139 fmov \VI, IV_CX // set l64 140.endm 141 142.macro BEFORE_ROUND 143 ext HASH0.16b, HASH0.16b, HASH0.16b, #8 // xi 144 ext HASH1.16b, HASH1.16b, HASH1.16b, #8 // h^1 145 rev IV_W, IV_W // rev_ctr32 146 ext HASH2.16b, HASH2.16b, HASH2.16b, #8 // h^2 147 ext HASH3.16b, HASH3.16b, HASH3.16b, #8 // h^3 148 add IVCTR, IV_W, IVCTR 149 ext HASH4.16b, HASH4.16b, HASH4.16b, #8 // h^4 150 add IV_W, IV_W, #1 // ctr++ 151 rev64 HASH0.16b, HASH0.16b // 152 orr w11, w11, w11 // 153 trn2 v17.2d, HASH3.2d, HASH4.2d // h4l | h3l 154 LOAD_CTR d1, CTR1.d[1] // CTR bolck 1 155 trn1 v9.2d, HASH3.2d, HASH4.2d // h4h | h3h 156 LOAD_CTR d2, CTR2.d[1] // CTR bolck 2 157 trn2 v16.2d, HASH1.2d, HASH2.2d // h2l | h1l 158 LOAD_CTR d3, CTR3.d[1] // CTR bolck 3 159 trn1 v8.2d, HASH1.2d, HASH2.2d // h2h | h1h 160.endm 161 162.macro FIRST_ROUND 163 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY0.16b // round 0 164 ldp x6, x7, [INPUT, #0] // load INPUT 0 165#ifdef HITLS_BIG_ENDIAN 166 REV_2S x6, x7 167#endif 168 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY1.16b // round 1 169 ldp x19, x20, [INPUT, #16] // AES[1] - load plaintext 170#ifdef HITLS_BIG_ENDIAN 171 REV_2S x19, x20 172#endif 173 eor x6, x6, KEND0 // round 10 low 174 eor x7, x7, KEND1 // round 10 high 175 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY2.16b // round 2 176 ldp x21, x22, [INPUT, #32] // AES[2] - load plaintext 177#ifdef HITLS_BIG_ENDIAN 178 REV_2S x21, x22 179#endif 180 eor x19, x19, KEND0 // AES[1] - round 10 low 181 eor x20, x20, KEND1 // AES[1] - round 10 high 182 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY3.16b // round 3 183 ldp x23, x24, [INPUT, #48] // AES[3] - load plaintext 184#ifdef HITLS_BIG_ENDIAN 185 REV_2S x23, x24 186#endif 187 eor x21, x21, KEND0 // AES[2] - round 10 low 188 eor x22, x22, KEND1 // AES[2] - round 10 high 189 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY4.16b // round 4 190 eor x23, x23, KEND0 // AES[3] - round 10 low 191 eor x24, x24, KEND1 // AES[3] - round 10 high 192 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY5.16b // round 5 193 fmov d4, x6 // INPUT 0 - mov low 194 fmov d5, x19 // AES[1] - mov low 195 fmov d6, x21 // AES[2] - mov low 196 fmov d7, x23 // AES[3] - mov low 197 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY6.16b // round 6 198 fmov OUT0.d[1], x7 // AES[0] - mov high 199 fmov OUT1.d[1], x20 // AES[1] - mov high 200 fmov OUT2.d[1], x22 // AES[2] - mov high 201 fmov OUT3.d[1], x24 // AES[3] - mov high 202.endm 203 204.macro STORE_RESULT 205 add INPUT, INPUT, #64 // AES input_ptr update 206 eor OUT0.16b, OUT0.16b, CTR0.16b // AES[0] - result 207 eor OUT1.16b, OUT1.16b, CTR1.16b // AES[1] - result 208 eor OUT2.16b, OUT2.16b, CTR2.16b // AES[2] - result 209 fmov d0, x10 // CTR[0] 210 eor OUT3.16b, OUT3.16b, CTR3.16b // AES[3] - result 211 subs COUNT, COUNT, #1 // count-- 212 fmov CTR0.d[1], x9 // CTR[0]--OK 213 rev w9, IV_W // CTR[1]--Start 214 st1 {OUT0.16b}, [OUT00], #16 // AES[0] - store result 215 orr x9, x11, x9, lsl #32 // CTR[1] 216 st1 {OUT1.16b}, [OUT00], #16 // AES[1] - store result 217 add IV_W, IV_W, #1 // CTR++ 218 fmov d1, x10 // CTR[1] 219 st1 {OUT2.16b}, [OUT00], #16 // AES[2] - store result 220 fmov v1.d[1], x9 // CTR[1]--OK 221 rev w9, IV_W // CTR[2]--Start 222 st1 {OUT3.16b}, [OUT00], #16 // AES[3] - store result 223 orr x9, x11, x9, lsl #32 // CTR[2] 224 add IV_W, IV_W, #1 // CTR++ 225 fmov d2, x10 // CTR2-0 226 fmov v2.d[1], x9 // CTR[2]--OK 227 rev w9, IV_W // CTR[3]--Start 228 orr x9, x11, x9, lsl #32 // CTR[3] // <= 0 229.endm 230 231.macro STORE_DEC_RESULT 232 ld1 {OUT0.16b}, [INPUT], #16 233 ld1 {OUT1.16b}, [INPUT], #16 234 ld1 {OUT2.16b}, [INPUT], #16 235 eor CTR0.16b, CTR0.16b, OUT0.16b 236 ld1 {OUT3.16b}, [INPUT], #16 237 eor CTR1.16b, CTR1.16b, OUT1.16b 238 eor CTR2.16b, CTR2.16b, OUT2.16b 239 mov x6, CTR0.d[0] 240 mov x7, CTR0.d[1] 241 mov x19, CTR1.d[0] 242 mov x20, CTR1.d[1] 243#ifdef HITLS_BIG_ENDIAN 244 REV_2S x6, x7 245 REV_2S x19, x20 246#endif 247 rev w9, IV_W // CTR[0] 248 eor x6, x6, KEND0 249 orr x9, x11, x9, lsl #32 // CTR[0] 250 eor x7, x7, KEND1 251 add IV_W, IV_W, #1 // CTR++ 252 fmov d0, x10 // CTR[0] 253 eor x19, x19, KEND0 254 fmov CTR0.d[1], x9 // CTR[0]--OK 255 rev w9, IV_W // CTR[1] 256 eor x20, x20, KEND1 257 orr x9, x11, x9, lsl #32 // CTR[1] 258 subs COUNT, COUNT, #1 // count-- 259 add IV_W, IV_W, #1 // CTR++ 260 fmov d1, x10 // CTR[1] 261 stp x6, x7, [OUT00], #16 262 fmov v1.d[1], x9 // CTR[1]--OK 263 stp x19, x20, [OUT00], #16 264 rev w9, IV_W // CTR[2] 265 rev64 OUT0.16b, OUT0.16b 266 add IV_W, IV_W, #1 // CTR++ 267 rev64 OUT1.16b, OUT1.16b 268 orr x9, x11, x9, lsl #32 // CTR[2] 269.endm 270 271.macro GHASH_BLOCK 272 ext HASH0.16b, HASH0.16b, HASH0.16b, #8 // PRE 0 273 mov d30, OUT1.d[1] // GHASH block 4k+1 - mid 274 mov d31, OUT2.d[1] // GHASH[2] - mid 275 eor OUT0.16b, OUT0.16b, HASH0.16b // PRE 1 tag ^ out 276 pmull2 v28.1q, OUT1.2d, HASH3.2d // GHASH block 4k+1 - high 277 eor v30.8b, v30.8b, OUT1.8b // GHASH block 4k+1 - mid 278 eor v31.8b, v31.8b, OUT2.8b // GHASH[2] - mid 279 mov d8, OUT0.d[1] // GHASH block 4k - mid 280 mov d10, v17.d[1] // GHASH block 4k - mid 281 pmull2 v9.1q, OUT0.2d, HASH4.2d // GHASH block 4k - high 282 pmull HASH0.1q, OUT0.1d, HASH4.1d // GHASH block 4k - low 283 eor v8.8b, v8.8b, OUT0.8b // GHASH block 4k - mid 284 eor v9.16b, v9.16b, v28.16b // GHASH block 4k+1 - high 285 pmull v29.1q, OUT1.1d, HASH3.1d // GHASH block 4k+1 - low 286 pmull v28.1q, OUT2.1d, HASH2.1d // GHASH[2] - low 287 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid 288 pmull v30.1q, v30.1d, v17.1d // GHASH block 4k+1 - mid 289 ins v31.d[1], v31.d[0] // GHASH[2] - mid 290 pmull2 v8.1q, OUT2.2d, HASH2.2d // GHASH[2] - high 291 eor v10.16b, v10.16b, v30.16b // GHASH block 4k+1 - mid 292 mov d30, OUT3.d[1] // GHASH[0] - mid 293 eor HASH0.16b, HASH0.16b, v29.16b // GHASH block 4k+1 - low 294 eor v30.8b, v30.8b, OUT3.8b // GHASH[0] - mid 295 pmull2 OUT0.1q, OUT3.2d, HASH1.2d // GHASH[0] - high 296 eor v9.16b, v9.16b, v8.16b // GHASH[2] - high 297 pmull2 v31.1q, v31.2d, v16.2d // GHASH[2] - mid 298 pmull v29.1q, OUT3.1d, HASH1.1d // GHASH[0] - low 299 movi v8.8b, #0xc2 300 pmull v30.1q, v30.1d, v16.1d // GHASH[0] - mid 301 eor HASH0.16b, HASH0.16b, v28.16b // GHASH[2] - low 302 shl d8, d8, #56 // mod_constant 303 eor v9.16b, v9.16b, OUT0.16b // GHASH[0] - high 304 eor v10.16b, v10.16b, v31.16b // GHASH[2] - mid 305 pmull v31.1q, v9.1d, v8.1d // MODULO - top 64b align with mid 306 eor HASH0.16b, HASH0.16b, v29.16b // GHASH[0] - low 307 eor v10.16b, v10.16b, v30.16b // GHASH[0] - mid 308 eor v30.16b, HASH0.16b, v9.16b // MODULO - karatsuba tidy up 309 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment 310 eor v10.16b, v10.16b, v30.16b // MODULO - karatsuba tidy up 311 eor v10.16b, v10.16b, v31.16b // MODULO - fold into mid 312 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid 313 pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low 314 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment 315 eor HASH0.16b, HASH0.16b, v9.16b // MODULO - fold into low 316 eor HASH0.16b, HASH0.16b, v10.16b // MODULO - fold into low 317.endm 318 319.macro GHASH_DEC_BLOCK 320 ext HASH0.16b, HASH0.16b, HASH0.16b, #8 // PRE 0 321 mov x21, v2.d[0] // AES[2] block - mov low 322 mov x22, v2.d[1] // AES[2] block - mov high 323 rev64 v6.16b, v6.16b // GHASH[2] 324#ifdef HITLS_BIG_ENDIAN 325 REV_2S x21, x22 326#endif 327 eor v4.16b, v4.16b, HASH0.16b // PRE 1 328 eor CTR3.16b, OUT3.16b, CTR3.16b // AES[3] block - result 329 eor x21, x21, KEND0 // AES[2] - round 14 low 330 eor x22, x22, KEND1 // AES[2] - round 14 high 331 pmull2 v9.1q, v4.2d, HASH4.2d // GHASH block 4k - high 332 mov d8, v4.d[1] // GHASH block 4k - mid 333 mov d10, v17.d[1] // GHASH block 4k - mid 334 mov x24, CTR3.d[1] // AES[3] block - mov high 335 pmull HASH0.1q, v4.1d, HASH4.1d // GHASH block 4k - low 336 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid 337 pmull2 v4.1q, v5.2d, HASH3.2d // GHASH block 4k+1 - high 338 mov x23, CTR3.d[0] // AES[3] block - mov low 339 rev64 v7.16b, v7.16b // GHASH[0] 340#ifdef HITLS_BIG_ENDIAN 341 REV_2S x24, x23 342#endif 343 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid 344 eor x23, x23, KEND0 // AES[3] block - round 14 low 345 pmull v8.1q, v5.1d, HASH3.1d // GHASH block 4k+1 - low 346 eor x24, x24, KEND1 // AES[3] block - round 14 high 347 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high 348 mov d4, v5.d[1] // GHASH block 4k+1 - mid 349 eor HASH0.16b, HASH0.16b, v8.16b // GHASH block 4k+1 - low 350 mov d8, v6.d[1] // GHASH[2] - mid 351 eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid 352 pmull v5.1q, v6.1d, HASH2.1d // GHASH[2] - low 353 eor v8.8b, v8.8b, v6.8b // GHASH[2] - mid 354 eor HASH0.16b, HASH0.16b, v5.16b // GHASH[2] - low 355 pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid 356 ins v8.d[1], v8.d[0] // GHASH[2] - mid 357 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid 358 pmull2 v4.1q, v6.2d, HASH2.2d // GHASH[2] - high 359 mov d6, v7.d[1] // GHASH[0] - mid 360 pmull2 v8.1q, v8.2d, v16.2d // GHASH[2] - mid 361 eor v9.16b, v9.16b, v4.16b // GHASH[2] - high 362 pmull v4.1q, v7.1d, HASH1.1d // GHASH[0] - low 363 eor v10.16b, v10.16b, v8.16b // GHASH[2] - mid 364 pmull2 v5.1q, v7.2d, HASH1.2d // GHASH[0] - high 365 eor v6.8b, v6.8b, v7.8b // GHASH[0] - mid 366 eor v9.16b, v9.16b, v5.16b // GHASH[0] - high 367 pmull v6.1q, v6.1d, v16.1d // GHASH[0] - mid 368 movi v8.8b, #0xc2 369 eor HASH0.16b, HASH0.16b, v4.16b // GHASH[0] - low 370 shl d8, d8, #56 // mod_constant 371 eor v10.16b, v10.16b, v6.16b // GHASH[0] - mid 372 pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid 373 eor v6.16b, HASH0.16b, v9.16b // MODULO - karatsuba tidy up 374 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment 375 eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up 376 eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid 377 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid 378 pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low 379 eor HASH0.16b, HASH0.16b, v8.16b // MODULO - fold into low 380 stp x21, x22, [OUT00], #16 // AES[2] block - store result 381 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment 382 stp x23, x24, [OUT00], #16 // AES[3] block - store result 383 eor HASH0.16b, HASH0.16b, v10.16b // MODULO - fold into low 384.endm 385 386.macro FIRST16_ROUND 387 ROUND CTR0.16b, KEY0.16b 388 ROUND CTR0.16b, KEY1.16b 389 ROUND CTR0.16b, KEY2.16b 390 ROUND CTR0.16b, KEY3.16b 391 ROUND CTR0.16b, KEY4.16b 392 ROUND CTR0.16b, KEY5.16b 393 ROUND CTR0.16b, KEY6.16b 394 ROUND CTR0.16b, KEY7.16b 395 ROUND CTR0.16b, KEY8.16b 396.endm 397 398.macro DEC16_BLOCK 399 ld1 {OUT0.16b}, [INPUT], #16 400 eor CTR0.16b, CTR0.16b, OUT0.16b // data->out[i] = data->in[i] ^ data->ctr[i]; 401 subs COUNT, COUNT, #1 // COUNT-- 402 mov x6, CTR0.d[0] 403 mov x7, CTR0.d[1] 404#ifdef HITLS_BIG_ENDIAN 405 REV_2S x6, x7 406#endif 407 rev w9, IV_W // CTR[0] 408 eor x6, x6, KEND0 409 orr x9, x11, x9, lsl #32 // CTR[0] 410 eor x7, x7, KEND1 411 stp x6, x7, [OUT00], #16 // OUT OK 412 add IV_W, IV_W, #1 // CTR++ 413 fmov d0, x10 // CTR[0] 414 fmov CTR0.d[1], x9 // CTR[0]--OK 415 ext v8.16b, HASH0.16b, HASH0.16b, #8 // prepare final partial tag 416 movi v11.8b, #0 417 movi v9.8b, #0 418 movi v10.8b, #0 419 rev64 v4.16b, OUT0.16b // GHASH final block 420 mov CTR1.16b, CTR0.16b 421 eor v4.16b, v4.16b, v8.16b // feed in partial tag 422 mov d8, v4.d[1] // GHASH final block - mid 423 pmull v6.1q, v4.1d, HASH1_2.1d // GHASH final block - low 424 eor v8.8b, v8.8b, v4.8b // GHASH final block - mid 425 pmull2 v5.1q, v4.2d, HASH1_2.2d // GHASH final block - high 426 pmull v8.1q, v8.1d, v16.1d // GHASH final block - mid 427 eor HASH0.16b, HASH0.16b, v6.16b // GHASH final block - low 428 eor v9.16b, v9.16b, v5.16b // GHASH final block - high 429 eor v10.16b, v10.16b, v8.16b // GHASH final block - mid 430 movi v8.8b, #0xc2 431 eor v7.16b, HASH0.16b, v9.16b // MODULO - karatsuba tidy up 432 shl d8, d8, #56 // mod_constant 433 eor v10.16b, v10.16b, v7.16b // MODULO - karatsuba tidy up 434 pmull v5.1q, v9.1d, v8.1d // MODULO - top 64b align with mid 435 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment 436 eor v10.16b, v10.16b, v5.16b // MODULO - fold into mid 437 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid 438 pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low 439 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment 440 eor HASH0.16b, HASH0.16b, v9.16b // MODULO - fold into low 441 eor HASH0.16b, HASH0.16b, v10.16b // MODULO - fold into low 442.endm 443 444.macro ENC16_BLOCK 445 eor x6, x6, KEND0 // round 10 low 446 eor x7, x7, KEND1 // round 10 high 447 rev w9, IV_W // CTR[0] 448 fmov d4, x6 // INPUT 0 - mov low 449 fmov OUT0.d[1], x7 // AES[0] - mov high 450 orr x9, x11, x9, lsl #32 // CTR[0] 451 add IV_W, IV_W, #1 // CTR++ 452 eor OUT0.16b, OUT0.16b, CTR0.16b // AES[0] - result 453 st1 {OUT0.16b}, [OUT00], #16 // AES[0] - store result 454 fmov d0, x10 // CTR[0] 455 fmov CTR0.d[1], x9 // CTR[0]--OK 456 ext v8.16b, HASH0.16b, HASH0.16b, #8 // prepare final partial tag 457 movi v11.8b, #0 458 movi v9.8b, #0 459 movi v10.8b, #0 460 rev64 v4.16b, OUT0.16b // GHASH final block 461 mov CTR1.16b, CTR0.16b 462 eor v4.16b, v4.16b, v8.16b // feed in partial tag 463 mov d8, v4.d[1] // GHASH final block - mid 464 pmull v6.1q, v4.1d, HASH1_2.1d // GHASH final block - low 465 eor v8.8b, v8.8b, v4.8b // GHASH final block - mid 466 pmull2 v5.1q, v4.2d, HASH1_2.2d // GHASH final block - high 467 pmull v8.1q, v8.1d, v16.1d // GHASH final block - mid 468 eor HASH0.16b, HASH0.16b, v6.16b // GHASH final block - low 469 eor v9.16b, v9.16b, v5.16b // GHASH final block - high 470 eor v10.16b, v10.16b, v8.16b // GHASH final block - mid 471 movi v8.8b, #0xc2 472 eor v7.16b, HASH0.16b, v9.16b // MODULO - karatsuba tidy up 473 shl d8, d8, #56 // mod_constant 474 eor v10.16b, v10.16b, v7.16b // MODULO - karatsuba tidy up 475 pmull v5.1q, v9.1d, v8.1d // MODULO - top 64b align with mid 476 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment 477 eor v10.16b, v10.16b, v5.16b // MODULO - fold into mid 478 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid 479 pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low 480 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment 481 eor HASH0.16b, HASH0.16b, v9.16b // MODULO - fold into low 482 eor HASH0.16b, HASH0.16b, v10.16b // MODULO - fold into low 483.endm 484 485.macro BEFORE16_ROUND 486 ext HASH0.16b, HASH0.16b, HASH0.16b, #8 // xi 487 ext HASH1.16b, HASH1.16b, HASH1.16b, #8 // h^1 // rev_ctr32 488 ext HASH2.16b, HASH2.16b, HASH2.16b, #8 // h^2 489 ldp KEND0, KEND1, [KEY00] // load key-10 490#ifdef HITLS_BIG_ENDIAN 491 ror KEND0, KEND0, #32 492 ror KEND1, KEND1, #32 493#endif 494 ldp IV_H, IV_L, [IVEC0] // load IV 495#ifdef HITLS_BIG_ENDIAN 496 rev IV_H, IV_H 497 rev IV_L, IV_L 498#endif 499 lsr IV_C, IV_L, #32 500 ld1 {CTR0.16b}, [IVEC0] // CTR[0] 501 rev IV_W, IV_W // rev_ctr32 502 trn1 v8.2d, HASH1.2d, HASH2.2d // h2h | h1h 503 trn2 v16.2d, HASH1.2d, HASH2.2d // h2l | h1l 504 orr w11, w11, w11 // 505 rev64 HASH0.16b, HASH0.16b // 506 add IV_W, IV_W, #1 // ctr++ 507 eor v16.16b, v16.16b, v8.16b //h2k | h1k 508.endm 509 510#endif