1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_GCM) 18 19.macro GCM_ENC192_LOOP 20 ROUND CTR2.16b, KEY0.16b 21 rev64 OUT1.16b, OUT1.16b // GHASH block 4k+1 (t0 and t1 free) 22 ROUND CTR1.16b, KEY0.16b 23 ldp x6, x7, [INPUT], #16 // AES[0] - load plaintext 24 ext HASH0.16b, HASH0.16b, HASH0.16b, #8 // PRE 0 25 26 fmov d3, x10 // CTR[3] 27 rev64 OUT0.16b, OUT0.16b // GHASH block 4k (only t0 is free) 28 ROUND CTR2.16b, KEY1.16b 29 fmov CTR3.d[1], x9 // CTR[3]--OK 30#ifdef HITLS_BIG_ENDIAN 31 rev x6, x6 32 rev x7, x7 33#endif 34 pmull2 v30.1q, v5.2d, HASH3.2d // GHASH block 4k+1 - high 35 rev64 OUT3.16b, OUT3.16b // GHASH[0] (t0, t1, t2 and t3 free) 36 ldp x19, x20, [INPUT], #16 // AES[1] - load plaintext 37 ROUND CTR0.16b, KEY0.16b 38 ldp x21, x22, [INPUT], #16 // AES[2] - load plaintext 39 pmull v31.1q, v5.1d, HASH3.1d // GHASH block 4k+1 - low 40 eor v4.16b, v4.16b, HASH0.16b // PRE 1 41#ifdef HITLS_BIG_ENDIAN 42 rev x19, x19 43 rev x20, x20 44 rev x21, x21 45 rev x22, x22 46#endif 47 ROUND CTR1.16b, KEY1.16b 48 ROUND CTR0.16b, KEY1.16b 49 rev64 OUT2.16b, OUT2.16b // GHASH[2] (t0, t1, and t2 free) 50 ROUND CTR3.16b, KEY0.16b 51 eor x7, x7, KEND1 // AES[0] - round 12 high 52 pmull HASH0.1q, v4.1d, HASH4.1d // GHASH block 4k - low 53 mov d8, v4.d[1] // GHASH block 4k - mid 54 ROUND CTR0.16b, KEY2.16b 55 ROUND CTR3.16b, KEY1.16b 56 57 eor x6, x6, KEND0 // AES[0] - round 12 low 58 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid 59 eor HASH0.16b, HASH0.16b, v31.16b // GHASH block 4k+1 - low 60 ROUND CTR0.16b, KEY3.16b 61 eor x19, x19, KEND0 // AES[1] - round 12 low 62 ROUND CTR1.16b, KEY2.16b 63 mov d31, v6.d[1] // GHASH[2] - mid 64 pmull2 v9.1q, v4.2d, HASH4.2d // GHASH block 4k - high 65 mov d4, v5.d[1] // GHASH block 4k+1 - mid 66 67 ROUND CTR2.16b, KEY2.16b 68 ROUND CTR1.16b, KEY3.16b 69 mov d10, v17.d[1] // GHASH block 4k - mid 70 eor v9.16b, v9.16b, v30.16b // GHASH block 4k+1 - high 71 ROUND CTR3.16b, KEY2.16b 72 eor v31.8b, v31.8b, v6.8b // GHASH[2] - mid 73 pmull2 v30.1q, v6.2d, HASH2.2d // GHASH[2] - high 74 ROUND CTR0.16b, KEY4.16b 75 eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid 76 ROUND CTR3.16b, KEY3.16b 77 pmull2 v5.1q, v7.2d, HASH1.2d // GHASH[0] - high 78 eor x20, x20, KEND1 // AES[1] - round 12 high 79 80 ins v31.d[1], v31.d[0] // GHASH[2] - mid 81 ROUND CTR0.16b, KEY5.16b 82 add IV_W, IV_W, #1 // CTR++ 83 ROUND CTR3.16b, KEY4.16b 84 eor v9.16b, v9.16b, v30.16b // GHASH[2] - high 85 pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid 86 eor x22, x22, KEND1 // AES[2] - round 12 high 87 88 pmull2 v31.1q, v31.2d, v16.2d // GHASH[2] - mid 89 eor x21, x21, KEND0 // AES[2] - round 12 low 90 mov d30, v7.d[1] // GHASH[0] - mid 91 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid 92 rev w9, IV_W // CTR[0] 93 pmull v8.1q, v6.1d, HASH2.1d // GHASH[2] - low 94 orr x9, x11, x9, lsl #32 // CTR[0] 95 ROUND CTR2.16b, KEY3.16b 96 eor v30.8b, v30.8b, v7.8b // GHASH[0] - mid 97 ROUND CTR1.16b, KEY4.16b 98 99 ldp x23, x24, [INPUT], #16 // AES[3] - load plaintext 100 ROUND CTR0.16b, KEY6.16b 101 eor HASH0.16b, HASH0.16b, v8.16b // GHASH[2] - low 102 ROUND CTR2.16b, KEY4.16b 103#ifdef HITLS_BIG_ENDIAN 104 rev x23, x23 105 rev x24, x24 106#endif 107 ROUND CTR1.16b, KEY5.16b 108 movi v8.8b, #0xc2 109 pmull v6.1q, v7.1d, HASH1.1d // GHASH[0] - low 110 eor x24, x24, KEND1 // AES[3] - round 12 high 111 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid 112 ROUND CTR2.16b, KEY5.16b 113 eor x23, x23, KEND0 // AES[3] - round 12 low 114 115 ROUND CTR1.16b, KEY6.16b 116 shl d8, d8, #56 // mod_constant 117 ROUND CTR3.16b, KEY5.16b 118 eor v9.16b, v9.16b, v5.16b // GHASH[0] - high 119 ROUND CTR0.16b, KEY7.16b 120 fmov d5, x19 // AES[1] - mov low 121 ROUND CTR1.16b, KEY7.16b 122 eor v10.16b, v10.16b, v31.16b // GHASH[2] - mid 123 ROUND CTR3.16b, KEY6.16b 124 fmov OUT1.d[1], x20 // AES[1] - mov high 125 126 ROUND CTR0.16b, KEY8.16b 127 eor HASH0.16b, HASH0.16b, v6.16b // GHASH[0] - low 128 pmull v30.1q, v30.1d, v16.1d // GHASH[0] - mid 129 130 subs COUNT, COUNT, #1 // count-- 131 fmov d4, x6 // AES[0] - mov low 132 ROUND CTR2.16b, KEY6.16b 133 fmov OUT0.d[1], x7 // AES[0] - mov high 134 135 ROUND CTR1.16b, KEY8.16b 136 fmov d7, x23 // AES[0] - mov low 137 eor v10.16b, v10.16b, v30.16b // GHASH[0] - mid 138 eor v30.16b, HASH0.16b, v9.16b // MODULO - karatsuba tidy up 139 add IV_W, IV_W, #1 // CTR++ 140 ROUND CTR2.16b, KEY7.16b 141 fmov OUT3.d[1], x24 // AES[3] - mov high 142 143 pmull v31.1q, v9.1d, v8.1d // MODULO - top 64b align with mid 144 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment 145 fmov d6, x21 // AES[3] - mov low 146 ROUND CTR3.16b, KEY7.16b 147 ROUND CTR0.16b, KEY9.16b 148 eor v10.16b, v10.16b, v30.16b // MODULO - karatsuba tidy up 149 ROUND CTR2.16b, KEY8.16b 150 ROUND CTR3.16b, KEY8.16b 151 ROUND CTR1.16b, KEY9.16b 152 ROUND CTR0.16b, KEY10.16b 153 eor v10.16b, v10.16b, v31.16b // MODULO - fold into mid 154 ROUND CTR3.16b, KEY9.16b 155 ROUND CTR2.16b, KEY9.16b 156 aese CTR0.16b, KEY11.16b // AES[1] - round 11 157 158 ROUND CTR1.16b, KEY10.16b 159 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid 160 ROUND CTR2.16b, KEY10.16b 161 162 eor OUT0.16b, OUT0.16b, CTR0.16b // AES[0] - result 163 fmov d0, x10 // CTR[0] 164 aese CTR1.16b, KEY11.16b // AES[2] - round 11 165 fmov CTR0.d[1], x9 // CTR[0]--OK 166 167 rev w9, IV_W // CTR[1] 168 pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low 169 fmov OUT2.d[1], x22 // AES[2] - mov high 170 st1 {OUT0.16b}, [OUT00], #16 // AES[0] - store result 171 172 ROUND CTR3.16b, KEY10.16b 173 orr x9, x11, x9, lsl #32 // CTR[1] 174 eor OUT1.16b, OUT1.16b, CTR1.16b // AES[1] - result 175 add IV_W, IV_W, #1 // CTR++ 176 fmov d1, x10 // CTR[1] 177 aese CTR2.16b, KEY11.16b 178 179 fmov v1.d[1], x9 // CTR[1]--OK 180 rev w9, IV_W // CTR[2] 181 add IV_W, IV_W, #1 // CTR++ 182 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment 183 orr x9, x11, x9, lsl #32 // CTR[2] 184 st1 {OUT1.16b}, [OUT00], #16 // AES[1] - store result 185 186 eor HASH0.16b, HASH0.16b, v9.16b // MODULO - fold into low 187 aese CTR3.16b, KEY11.16b // AES[2] - round 11 188 eor OUT2.16b, OUT2.16b, CTR2.16b // AES[2] - result 189 fmov d2, x10 // CTR[2] 190 st1 {OUT2.16b}, [OUT00], #16 // AES[2] - store result 191 192 fmov CTR2.d[1], x9 // CTR[2]--OK 193 rev w9, IV_W // CTR[3] 194 eor OUT3.16b, OUT3.16b, CTR3.16b // AES[3] - result 195 eor HASH0.16b, HASH0.16b, v10.16b // MODULO - fold into low 196 orr x9, x11, x9, lsl #32 // CTR[3] 197 st1 {OUT3.16b}, [OUT00], #16 // AES[3] - store result 198.endm 199 200.macro GCM_DEC192_LOOP 201 ROUND CTR1.16b, KEY0.16b 202 ext HASH0.16b, HASH0.16b, HASH0.16b, #8 // PRE 0 203 pmull v31.1q, OUT1.1d, HASH3.1d // GHASH block 4k+1 - low 204 mov x21, CTR2.d[0] // AES[2] block - mov low 205 mov x22, CTR2.d[1] // AES[2] block - mov high 206 eor CTR3.16b, OUT3.16b, CTR3.16b // AES[3] block - result 207 208 rev64 v7.16b, v7.16b // GHASH[0] 209 ROUND CTR1.16b, KEY1.16b 210 fmov d2, x10 // CTR[2] block 211 ROUND CTR0.16b, KEY0.16b 212#ifdef HITLS_BIG_ENDIAN 213 rev x21, x21 214 rev x22, x22 215#endif 216 eor v4.16b, v4.16b, HASH0.16b // PRE 1 217 pmull2 v30.1q, v5.2d, HASH3.2d // GHASH block 4k+1 - high 218 fmov CTR2.d[1], x9 // CTR[2]--OK 219 220 ROUND CTR1.16b, KEY2.16b 221 mov x24, CTR3.d[1] // AES[3] block - mov high 222 ROUND CTR0.16b, KEY1.16b 223 mov x23, CTR3.d[0] // AES[3] block - mov low 224 225 pmull2 v9.1q, v4.2d, HASH4.2d // GHASH block 4k - high 226 fmov d3, x10 // CTR[3] 227 mov d8, v4.d[1] // GHASH block 4k - mid 228 pmull HASH0.1q, v4.1d, HASH4.1d // GHASH block 4k - low 229#ifdef HITLS_BIG_ENDIAN 230 rev x23, x23 231 rev x24, x24 232#endif 233 mov d10, v17.d[1] // GHASH block 4k - mid 234 rev w9, IV_W // CTR[3] 235 ROUND CTR2.16b, KEY0.16b 236 orr x9, x11, x9, lsl #32 // CTR[3] 237 fmov CTR3.d[1], x9 // CTR[3]--OK 238 239 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid 240 mov d4, v5.d[1] // GHASH block 4k+1 - mid 241 ROUND CTR1.16b, KEY3.16b 242 ROUND CTR0.16b, KEY2.16b 243 eor x22, x22, KEND1 // AES[2] block - round 12 high 244 245 ROUND CTR2.16b, KEY1.16b 246 eor v4.8b, v4.8b, v5.8b // GHASH block 4k+1 - mid 247 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid 248 ROUND CTR3.16b, KEY0.16b 249 rev64 v6.16b, v6.16b // GHASH[2] 250 ROUND CTR2.16b, KEY2.16b 251 pmull v4.1q, v4.1d, v17.1d // GHASH block 4k+1 - mid 252 eor HASH0.16b, HASH0.16b, v31.16b // GHASH block 4k+1 - low 253 eor x21, x21, KEND0 // AES[2] block - round 12 low 254 255 ROUND CTR1.16b, KEY4.16b 256 ROUND CTR0.16b, KEY3.16b 257 eor v10.16b, v10.16b, v4.16b // GHASH block 4k+1 - mid 258 mov d31, v6.d[1] // GHASH[2] - mid 259 ROUND CTR3.16b, KEY1.16b 260 eor v9.16b, v9.16b, v30.16b // GHASH block 4k+1 - high 261 ROUND CTR0.16b, KEY4.16b 262 pmull2 v30.1q, v6.2d, HASH2.2d // GHASH[2] - high 263 eor v31.8b, v31.8b, v6.8b // GHASH[2] - mid 264 pmull v8.1q, v6.1d, HASH2.1d // GHASH[2] - low 265 ROUND CTR0.16b, KEY5.16b 266 eor v9.16b, v9.16b, v30.16b // GHASH[2] - high 267 mov d30, v7.d[1] // GHASH[0] - mid 268 ROUND CTR1.16b, KEY5.16b 269 pmull2 v5.1q, v7.2d, HASH1.2d // GHASH[0] - high 270 ROUND CTR3.16b, KEY2.16b 271 eor v30.8b, v30.8b, v7.8b // GHASH[0] - mid 272 ROUND CTR1.16b, KEY6.16b 273 ROUND CTR0.16b, KEY6.16b 274 ins v31.d[1], v31.d[0] // GHASH[2] - mid 275 ROUND CTR3.16b, KEY3.16b 276 pmull v30.1q, v30.1d, v16.1d // GHASH[0] - mid 277 eor HASH0.16b, HASH0.16b, v8.16b // GHASH[2] - low 278 ROUND CTR0.16b, KEY7.16b 279 pmull2 v31.1q, v31.2d, v16.2d // GHASH[2] - mid 280 eor v9.16b, v9.16b, v5.16b // GHASH[0] - high 281 ROUND CTR1.16b, KEY7.16b 282 ROUND CTR0.16b, KEY8.16b 283 movi v8.8b, #0xc2 284 pmull v6.1q, v7.1d, HASH1.1d // GHASH[0] - low 285 ROUND CTR1.16b, KEY8.16b 286 eor v10.16b, v10.16b, v31.16b // GHASH[2] - mid 287 ROUND CTR2.16b, KEY3.16b 288 ROUND CTR0.16b, KEY9.16b 289 eor HASH0.16b, HASH0.16b, v6.16b // GHASH[0] - low 290 ROUND CTR3.16b, KEY4.16b 291 ROUND CTR2.16b, KEY4.16b 292 eor v10.16b, v10.16b, v30.16b // GHASH[0] - mid 293 ROUND CTR0.16b, KEY10.16b 294 ROUND CTR1.16b, KEY9.16b 295 eor v30.16b, HASH0.16b, v9.16b // MODULO - karatsuba tidy up 296 ROUND CTR2.16b, KEY5.16b 297 ROUND CTR3.16b, KEY5.16b 298 shl d8, d8, #56 // mod_constant 299 ROUND CTR1.16b, KEY10.16b 300 ROUND CTR2.16b, KEY6.16b 301 ld1 {OUT0.16b}, [INPUT], #16 // AES load[0] ciphertext 302 ROUND CTR3.16b, KEY6.16b 303 eor v10.16b, v10.16b, v30.16b // MODULO - karatsuba tidy up 304 pmull v31.1q, v9.1d, v8.1d // MODULO - top 64b align with mid 305 ld1 {OUT1.16b}, [INPUT], #16 // AES load[1] ciphertext 306 eor x23, x23, KEND0 // AES[3] block - round 12 low 307 ROUND CTR2.16b, KEY7.16b 308 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment 309 aese CTR0.16b, KEY11.16b 310 add IV_W, IV_W, #1 // CTR++ 311 ROUND CTR3.16b, KEY7.16b 312 eor v10.16b, v10.16b, v31.16b // MODULO - fold into mid 313 ld1 {OUT2.16b}, [INPUT], #16 // AES load[2] ciphertext 314 ROUND CTR2.16b, KEY8.16b 315 aese CTR1.16b, KEY11.16b 316 ld1 {OUT3.16b}, [INPUT], #16 // AES load[3] ciphertext 317 rev w9, IV_W // CTR block 4k+8 318 ROUND CTR3.16b, KEY8.16b 319 320 stp x21, x22, [OUT00], #16 // AES[2] block - store result 321 ROUND CTR2.16b, KEY9.16b 322 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid 323 324 subs COUNT, COUNT, #1 // COUNT-- 325 eor CTR0.16b, OUT0.16b, CTR0.16b // AES[0] block - result 326 eor x24, x24, KEND1 // AES[3] block - round 12 high 327 eor CTR1.16b, OUT1.16b, CTR1.16b // AES[1] block - result 328 ROUND CTR2.16b, KEY10.16b 329 orr x9, x11, x9, lsl #32 // CTR block 4k+8 330 ROUND CTR3.16b, KEY9.16b 331 pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low 332 mov x19, CTR1.d[0] // AES[1] block - mov low 333 mov x6, CTR0.d[0] // AES[0] block - mov low 334 335 stp x23, x24, [OUT00], #16 // AES[3] - store result 336 rev64 v5.16b, v5.16b // GHASH[2] 337 aese CTR2.16b, KEY11.16b 338 mov x7, CTR0.d[1] // AES[0] block - mov high 339 ROUND CTR3.16b, KEY10.16b 340 mov x20, CTR1.d[1] // AES[1] block - mov high 341#ifdef HITLS_BIG_ENDIAN 342 rev x6, x6 343 rev x7, x7 344 rev x19, x19 345 rev x20, x20 346#endif 347 fmov d0, x10 // CTR[0] 348 add IV_W, IV_W, #1 // CTR++ 349 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment 350 eor CTR2.16b, OUT2.16b, CTR2.16b // AES[2] block - result 351 fmov CTR0.d[1], x9 // CTR[0]--OK 352 rev w9, IV_W // CTR block 4k+9 353 eor x6, x6, KEND0 // AES[0] block - round 12 low 354 orr x9, x11, x9, lsl #32 // CTR block 4k+9 355 eor HASH0.16b, HASH0.16b, v8.16b // MODULO - fold into low 356 fmov d1, x10 // CTR[1] 357 add IV_W, IV_W, #1 // CTR++ 358 eor x19, x19, KEND0 // AES[1] block - round 12 low 359 fmov CTR1.d[1], x9 // CTR[1]--OK 360 rev w9, IV_W // CTR block 4k+10 361 eor x20, x20, KEND1 // AES[2] - round 12 high 362 eor x7, x7, KEND1 // AES[0] - round 12 high 363 364 stp x6, x7, [OUT00], #16 // AES[0] block - store result 365 eor HASH0.16b, HASH0.16b, v10.16b // MODULO - fold into low 366 add IV_W, IV_W, #1 // CTR++ 367 rev64 v4.16b, v4.16b // GHASH[1] 368 orr x9, x11, x9, lsl #32 // CTR block 4k+10 369 aese CTR3.16b, KEY11.16b // AES[3] round 11 370 stp x19, x20, [OUT00], #16 // AES[1] block - store result 371.endm 372 373#endif 374