1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_GCM) 18 19.macro GCM_ENC256_LOOP 20 ROUND CTR0.16b, KEY0.16b 21 rev64 v4.16b, v4.16b // GHASH block 4k (only t0 is free) 22 ROUND CTR1.16b, KEY0.16b 23 fmov d3, x10 // CTR[3] 24 ROUND CTR2.16b, KEY0.16b 25 26 ext HASH0.16b, HASH0.16b, HASH0.16b, #8 // PRE 0 27 ROUND CTR0.16b, KEY1.16b 28 fmov CTR3.d[1], x9 // CTR[3] - OK 29 30 ROUND CTR1.16b, KEY1.16b 31 ldp x6, x7, [INPUT], #16 // AES[0] - load plaintext 32 ROUND CTR2.16b, KEY1.16b 33 ldp x19, x20, [INPUT], #16 // AES[1] - load plaintext 34 ROUND CTR0.16b, KEY2.16b 35#ifdef HITLS_BIG_ENDIAN 36 rev x6, x6 37 rev x7, x7 38 rev x19, x19 39 rev x20, x20 40#endif 41 eor v4.16b, v4.16b, HASH0.16b // PRE 1 42 ROUND CTR1.16b, KEY2.16b 43 ROUND CTR3.16b, KEY0.16b 44 eor x6, x6, KEND0 // AES[0] - round 14 low 45 46 ROUND CTR0.16b, KEY3.16b 47 mov d10, v17.d[1] // GHASH block 4k - mid 48 pmull2 v9.1q, v4.2d, HASH4.2d // GHASH block 4k - high 49 eor x7, x7, KEND1 // AES[0] - round 14 high 50 mov d8, v4.d[1] // GHASH block 4k - mid 51 ROUND CTR3.16b, KEY1.16b 52 rev64 v5.16b, v5.16b // GHASH block 4k+1 (t0 and t1 free) 53 ROUND CTR0.16b, KEY4.16b 54 pmull HASH0.1q, v4.1d, HASH4.1d // GHASH block 4k - low 55 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid 56 ROUND CTR2.16b, KEY2.16b 57 ROUND CTR0.16b, KEY5.16b 58 rev64 v7.16b, v7.16b // GHASH[0] (t0, t1, t2 and t3 free) 59 60 pmull2 v4.1q, v5.2d, HASH3.2d // GHASH block 4k+1 - high 61 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid 62 rev64 v6.16b, v6.16b // GHASH[2] (t0, t1, and t2 free) 63 pmull v8.1q, v5.1d, HASH3.1d // GHASH block 4k+1 - low 64 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high 65 mov d4, v5.d[1] // GHASH block 4k+1 - mid 66 ROUND CTR1.16b, KEY3.16b 67 ROUND CTR3.16b, KEY2.16b 68 eor HASH0.16b, HASH0.16b, v8.16b // GHASH block 4k+1 - low 69 ROUND CTR2.16b, KEY3.16b 70 ROUND CTR1.16b, KEY4.16b 71 mov d8, v6.d[1] // GHASH[2] - mid 72 ROUND CTR3.16b, KEY3.16b 73 eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid 74 ROUND CTR2.16b, KEY4.16b 75 ROUND CTR0.16b, KEY6.16b 76 eor v8.8b, v8.8b, v6.8b // GHASH[2] - mid 77 ROUND CTR3.16b, KEY4.16b 78 pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid 79 ROUND CTR0.16b, KEY7.16b 80 ROUND CTR3.16b, KEY5.16b 81 ins v8.d[1], v8.d[0] // GHASH[2] - mid 82 ROUND CTR1.16b, KEY5.16b 83 ROUND CTR0.16b, KEY8.16b 84 ROUND CTR2.16b, KEY5.16b 85 ROUND CTR1.16b, KEY6.16b 86 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid 87 pmull2 v4.1q, v6.2d, HASH2.2d // GHASH[2] - high 88 pmull v5.1q, v6.1d, HASH2.1d // GHASH[2] - low 89 ROUND CTR1.16b, KEY7.16b 90 pmull v6.1q, v7.1d, HASH1.1d // GHASH[0] - low 91 eor v9.16b, v9.16b, v4.16b // GHASH[2] - high 92 ROUND CTR3.16b, KEY6.16b 93 94 ldp x21, x22, [INPUT], #16 // AES[2] - load plaintext 95 ROUND CTR1.16b, KEY8.16b 96 mov d4, v7.d[1] // GHASH[0] - mid 97#ifdef HITLS_BIG_ENDIAN 98 rev x21, x21 99 rev x22, x22 100#endif 101 ROUND CTR2.16b, KEY6.16b 102 eor HASH0.16b, HASH0.16b, v5.16b // GHASH[2] - low 103 pmull2 v8.1q, v8.2d, v16.2d // GHASH[2] - mid 104 pmull2 v5.1q, v7.2d, HASH1.2d // GHASH[0] - high 105 eor v4.8b, v4.8b, v7.8b // GHASH[0] - mid 106 ROUND CTR2.16b, KEY7.16b 107 eor x19, x19, KEND0 // AES[1] - round 14 low 108 ROUND CTR1.16b, KEY9.16b 109 eor v10.16b, v10.16b, v8.16b // GHASH[2] - mid 110 ROUND CTR3.16b, KEY7.16b 111 eor x21, x21, KEND0 // AES[2] - round 14 low 112 ROUND CTR0.16b, KEY9.16b 113 movi v8.8b, #0xc2 114 pmull v4.1q, v4.1d, v16.1d // GHASH[0] - mid 115 eor v9.16b, v9.16b, v5.16b // GHASH[0] - high 116 fmov d5, x19 // AES[1] - mov low 117 118 ROUND CTR2.16b, KEY8.16b 119 ldp x23, x24, [INPUT], #16 // AES[3] - load plaintext 120 ROUND CTR0.16b, KEY10.16b 121 shl d8, d8, #56 // mod_constant 122#ifdef HITLS_BIG_ENDIAN 123 rev x23, x23 124 rev x24, x24 125#endif 126 ROUND CTR3.16b, KEY8.16b 127 eor HASH0.16b, HASH0.16b, v6.16b // GHASH[0] - low 128 ROUND CTR2.16b, KEY9.16b 129 ROUND CTR1.16b, KEY10.16b 130 eor v10.16b, v10.16b, v4.16b // GHASH[0] - mid 131 ROUND CTR3.16b, KEY9.16b 132 add IV_W, IV_W, #1 // CTR++ 133 ROUND CTR0.16b, KEY11.16b 134 eor v4.16b, HASH0.16b, v9.16b // MODULO - karatsuba tidy up 135 ROUND CTR1.16b, KEY11.16b 136 137 pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid 138 rev w9, IV_W // CTR block 4k+8 139 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment 140 ROUND CTR2.16b, KEY10.16b 141 eor x23, x23, KEND0 // AES[3] - round 14 low 142 ROUND CTR1.16b, KEY12.16b 143 eor v10.16b, v10.16b, v4.16b // MODULO - karatsuba tidy up 144 ROUND CTR3.16b, KEY10.16b 145 eor x20, x20, KEND1 // AES[1] - round 14 high 146 147 fmov d4, x6 // AES[0] - mov low 148 orr x9, x11, x9, lsl #32 // CTR block 4k+8 149 eor v7.16b, v9.16b, v7.16b // MODULO - fold into mid 150 ROUND CTR0.16b, KEY12.16b 151 eor x22, x22, KEND1 // AES[2] - round 14 high 152 ROUND CTR2.16b, KEY11.16b 153 eor x24, x24, KEND1 // AES[3] - round 14 high 154 155 ROUND CTR3.16b, KEY11.16b 156 add IV_W, IV_W, #1 // CTR++ 157 aese CTR0.16b, KEY13.16b // AES[0] - round 13 158 fmov OUT0.d[1], x7 // AES[0] - mov high 159 eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid 160 ROUND CTR2.16b, KEY12.16b 161 fmov d7, x23 // AES[3] - mov low 162 aese CTR1.16b, KEY13.16b // AES[2] - round 13 163 fmov OUT1.d[1], x20 // AES[1] - mov high 164 fmov d6, x21 // AES[2] - mov low 165 166 subs COUNT, COUNT, #1 // COUNT-- 167 fmov OUT2.d[1], x22 // AES[2] - mov high 168 pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low 169 eor OUT0.16b, OUT0.16b, CTR0.16b // AES[0] - result 170 fmov d0, x10 // CTR[0] 171 fmov CTR0.d[1], x9 // CTR[0]--OK 172 rev w9, IV_W // CTR[1] 173 add IV_W, IV_W, #1 // CTR++ 174 175 eor OUT1.16b, OUT1.16b, CTR1.16b // AES[1] - result 176 fmov d1, x10 // CTR[1] 177 orr x9, x11, x9, lsl #32 // CTR[1] 178 ROUND CTR3.16b, KEY12.16b 179 fmov v1.d[1], x9 // CTR[1]--OK 180 aese CTR2.16b, KEY13.16b // AES[3] - round 13 181 rev w9, IV_W // CTR block 4k+10 182 183 st1 {OUT0.16b}, [OUT00], #16 // AES[0] - store result 184 orr x9, x11, x9, lsl #32 // CTR block 4k+10 185 eor HASH0.16b, HASH0.16b, v9.16b // MODULO - fold into low 186 fmov OUT3.d[1], x24 // AES[3] - mov high 187 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment 188 st1 {OUT1.16b}, [OUT00], #16 // AES[1] - store result 189 add IV_W, IV_W, #1 // CTR++ 190 aese CTR3.16b, KEY13.16b // AES[0] - round 13 191 192 eor OUT2.16b, OUT2.16b, CTR2.16b // AES[2] - result 193 fmov d2, x10 // CTR[2] 194 st1 {OUT2.16b}, [OUT00], #16 // AES[2] - store result 195 fmov v2.d[1], x9 // CTR[2]--OK 196 rev w9, IV_W // CTR block 4k+11 197 198 eor OUT3.16b, OUT3.16b, CTR3.16b // AES[3] - result 199 eor HASH0.16b, HASH0.16b, v10.16b // MODULO - fold into low 200 orr x9, x11, x9, lsl #32 // CTR block 4k+11 201 st1 {OUT3.16b}, [OUT00], #16 // AES[3] - store result 202.endm 203 204.macro GCM_DEC256_LOOP 205 mov x21, CTR2.d[0] // AES[2] block - mov low 206 ext HASH0.16b, HASH0.16b, HASH0.16b, #8 // PRE 0 207 eor CTR3.16b, OUT3.16b, CTR3.16b // AES[3] block - result 208 ROUND CTR0.16b, KEY0.16b 209 mov x22, CTR2.d[1] // AES[2] block - mov high 210 211 ROUND CTR1.16b, KEY0.16b 212 fmov d2, x10 // CTR[2] 213 fmov v2.d[1], x9 // CTR[2] 214 eor v4.16b, v4.16b, HASH0.16b // PRE 1 215#ifdef HITLS_BIG_ENDIAN 216 rev x21, x21 217 rev x22, x22 218#endif 219 rev w9, IV_W // CTR[0] 220 ROUND CTR0.16b, KEY1.16b 221 mov x24, CTR3.d[1] // AES[3] block - mov high 222 ROUND CTR1.16b, KEY1.16b 223 mov x23, CTR3.d[0] // AES[3] block - mov low 224 225 pmull2 v9.1q, v4.2d, HASH4.2d // GHASH block 4k - high 226 mov d8, v4.d[1] // GHASH block 4k - mid 227 fmov d3, x10 // CTR[0] 228#ifdef HITLS_BIG_ENDIAN 229 rev x23, x23 230 rev x24, x24 231#endif 232 ROUND CTR0.16b, KEY2.16b 233 orr x9, x11, x9, lsl #32 // CTR[0] 234 ROUND CTR2.16b, KEY0.16b 235 fmov v3.d[1], x9 // CTR[0] 236 ROUND CTR1.16b, KEY2.16b 237 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid 238 ROUND CTR0.16b, KEY3.16b 239 eor x22, x22, KEND1 // AES[2] - round 14 high 240 ROUND CTR2.16b, KEY1.16b 241 mov d10, v17.d[1] // GHASH block 4k - mid 242 ROUND CTR1.16b, KEY3.16b 243 rev64 v6.16b, v6.16b // GHASH[2] 244 ROUND CTR3.16b, KEY0.16b 245 eor x21, x21, KEND0 // AES[2] - round 14 low 246 ROUND CTR2.16b, KEY2.16b 247 stp x21, x22, [OUT00], #16 // AES[2] - store result 248 pmull HASH0.1q, v4.1d, HASH4.1d // GHASH block 4k - low 249 pmull2 v4.1q, v5.2d, HASH3.2d // GHASH block 4k+1 - high 250 ROUND CTR2.16b, KEY3.16b 251 rev64 v7.16b, v7.16b // GHASH[0] 252 253 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid 254 eor x23, x23, KEND0 // AES[3] - round 14 low 255 pmull v8.1q, v5.1d, HASH3.1d // GHASH block 4k+1 - low 256 eor x24, x24, KEND1 // AES[3] - round 14 high 257 eor v9.16b, v9.16b, v4.16b // GHASH block 4k+1 - high 258 ROUND CTR2.16b, KEY4.16b 259 ROUND CTR3.16b, KEY1.16b 260 mov d4, v5.d[1] // GHASH block 4k+1 - mid 261 ROUND CTR0.16b, KEY4.16b 262 eor HASH0.16b, HASH0.16b, v8.16b // GHASH block 4k+1 - low 263 ROUND CTR2.16b, KEY5.16b 264 add IV_W, IV_W, #1 // CTR[0] 265 ROUND CTR3.16b, KEY2.16b 266 mov d8, v6.d[1] // GHASH[2] - mid 267 ROUND CTR1.16b, KEY4.16b 268 eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid 269 270 pmull v5.1q, v6.1d, HASH2.1d // GHASH[2] - low 271 ROUND CTR3.16b, KEY3.16b 272 eor v8.8b, v8.8b, v6.8b // GHASH[2] - mid 273 ROUND CTR1.16b, KEY5.16b 274 ROUND CTR0.16b, KEY5.16b 275 eor HASH0.16b, HASH0.16b, v5.16b // GHASH[2] - low 276 277 pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid 278 rev w9, IV_W // CTR block 4k+8 279 ROUND CTR1.16b, KEY6.16b 280 ins v8.d[1], v8.d[0] // GHASH[2] - mid 281 ROUND CTR0.16b, KEY6.16b 282 add IV_W, IV_W, #1 // CTR block 4k+8 283 ROUND CTR3.16b, KEY4.16b 284 ROUND CTR1.16b, KEY7.16b 285 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid 286 ROUND CTR0.16b, KEY7.16b 287 288 pmull2 v4.1q, v6.2d, HASH2.2d // GHASH[2] - high 289 mov d6, v7.d[1] // GHASH[0] - mid 290 ROUND CTR3.16b, KEY5.16b 291 292 pmull2 v8.1q, v8.2d, v16.2d // GHASH[2] - mid 293 ROUND CTR0.16b, KEY8.16b 294 eor v9.16b, v9.16b, v4.16b // GHASH[2] - high 295 ROUND CTR3.16b, KEY6.16b 296 297 pmull v4.1q, v7.1d, HASH1.1d // GHASH[0] - low 298 orr x9, x11, x9, lsl #32 // CTR block 4k+8 299 eor v10.16b, v10.16b, v8.16b // GHASH[2] - mid 300 301 pmull2 v5.1q, v7.2d, HASH1.2d // GHASH[0] - high 302 ROUND CTR0.16b, KEY9.16b 303 eor v6.8b, v6.8b, v7.8b // GHASH[0] - mid 304 ROUND CTR1.16b, KEY8.16b 305 ROUND CTR2.16b, KEY6.16b 306 eor v9.16b, v9.16b, v5.16b // GHASH[0] - high 307 ROUND CTR0.16b, KEY10.16b 308 pmull v6.1q, v6.1d, v16.1d // GHASH[0] - mid 309 movi v8.8b, #0xc2 310 ROUND CTR2.16b, KEY7.16b 311 eor HASH0.16b, HASH0.16b, v4.16b // GHASH[0] - low 312 ROUND CTR0.16b, KEY11.16b 313 ROUND CTR3.16b, KEY7.16b 314 shl d8, d8, #56 // mod_constant 315 ROUND CTR2.16b, KEY8.16b 316 eor v10.16b, v10.16b, v6.16b // GHASH[0] - mid 317 ROUND CTR0.16b, KEY12.16b 318 pmull v7.1q, v9.1d, v8.1d // MODULO - top 64b align with mid 319 eor v6.16b, HASH0.16b, v9.16b // MODULO - karatsuba tidy up 320 ROUND CTR1.16b, KEY9.16b 321 322 ld1 {OUT0.16b}, [INPUT], #16 // AES load[0] ciphertext 323 aese CTR0.16b, KEY13.16b 324 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment 325 ROUND CTR1.16b, KEY10.16b 326 eor v10.16b, v10.16b, v6.16b // MODULO - karatsuba tidy up 327 ROUND CTR2.16b, KEY9.16b 328 329 ld1 {OUT1.16b}, [INPUT], #16 // AES load[1] ciphertext 330 ROUND CTR3.16b, KEY8.16b 331 eor CTR0.16b, OUT0.16b, CTR0.16b // AES[0] block - result 332 ROUND CTR1.16b, KEY11.16b 333 334 stp x23, x24, [OUT00], #16 // AES[3] block - store result 335 ROUND CTR2.16b, KEY10.16b 336 eor v10.16b, v10.16b, v7.16b // MODULO - fold into mid 337 ROUND CTR3.16b, KEY9.16b 338 ld1 {OUT2.16b}, [INPUT], #16 // AES load[1] ciphertext 339 340 ROUND CTR1.16b, KEY12.16b 341 ld1 {OUT3.16b}, [INPUT], #16 // AES load[1] ciphertext 342 ROUND CTR2.16b, KEY11.16b 343 mov x7, CTR0.d[1] // AES[0] block - mov high 344 ROUND CTR3.16b, KEY10.16b 345 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid 346 aese CTR1.16b, KEY13.16b // AES[2] - round 13 347 mov x6, CTR0.d[0] // AES[0] block - mov low 348 ROUND CTR2.16b, KEY12.16b 349 fmov d0, x10 // CTR[0] 350 ROUND CTR3.16b, KEY11.16b 351#ifdef HITLS_BIG_ENDIAN 352 rev x6, x6 353 rev x7, x7 354#endif 355 fmov CTR0.d[1], x9 // CTR[0]--OK 356 pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low 357 eor CTR1.16b, OUT1.16b, CTR1.16b // AES[1] block - result 358 rev w9, IV_W // CTR block 4k+9 359 aese CTR2.16b, KEY13.16b 360 orr x9, x11, x9, lsl #32 // CTR block 4k+9 361 362 subs COUNT, COUNT, #1 // COUNT-- 363 add IV_W, IV_W, #1 // CTR++ 364 eor x6, x6, KEND0 // AES[0] block - round 14 low 365 eor x7, x7, KEND1 // AES[0] block - round 14 high 366 367 mov x20, v1.d[1] // AES[1] block - mov high 368 eor CTR2.16b, OUT2.16b, CTR2.16b // AES[2] block - result 369 eor HASH0.16b, HASH0.16b, v8.16b // MODULO - fold into low 370 ROUND CTR3.16b, KEY12.16b 371 mov x19, CTR1.d[0] // AES[1] block - mov low 372 fmov d1, x10 // CTR[1] 373 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment 374#ifdef HITLS_BIG_ENDIAN 375 rev x20, x20 376 rev x19, x19 377#endif 378 fmov CTR1.d[1], x9 // CTR[1]--OK 379 rev w9, IV_W // CTR block 4k+10 380 add IV_W, IV_W, #1 // CTR++ 381 382 aese CTR3.16b, KEY13.16b 383 orr x9, x11, x9, lsl #32 // CTR block 4k+10 384 rev64 v5.16b, v5.16b // GHASH[2] 385 eor x20, x20, KEND1 // AES[1] block - round 14 high 386 stp x6, x7, [OUT00], #16 // AES[0] block - store result 387 eor x19, x19, KEND0 // AES[1] block - round 14 low 388 stp x19, x20, [OUT00], #16 // AES[1] block - store result 389 rev64 OUT0.16b, OUT0.16b // GHASH block[0] 390 eor HASH0.16b, HASH0.16b, v10.16b // MODULO - fold into low 391.endm 392 393#endif 394