1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15#include "hitls_build.h" 16#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_GCM) 17 18.macro GCM_ENC128_LOOP 19 ldp x6, x7, [INPUT], #16 // AES[0] - load plaintext 20 rev64 OUT0.16b, OUT0.16b // GHASH blocl[0] 21 rev64 OUT2.16b, OUT2.16b // GHASH block[2] 22 ROUND CTR2.16b, KEY0.16b 23#ifdef HITLS_BIG_ENDIAN 24 rev x6, x6 25 rev x7, x7 26#endif 27 28 fmov d3, x10 // CTR[3] 29 ext HASH0.16b, HASH0.16b, HASH0.16b, #8 // PRE 0 30 rev64 OUT1.16b, OUT1.16b // GHASH block[1] 31 ROUND CTR1.16b, KEY0.16b 32 33 add IV_W, IV_W, #1 // CTR3++ 34 fmov CTR3.d[1], x9 // CTR[3]--OK 35 ROUND CTR0.16b, KEY0.16b 36 mov d31, OUT2.d[1] // GHASH block[2.1] 37 38 ROUND CTR2.16b, KEY1.16b 39 mov d30, OUT1.d[1] // GHASH block[1.1] 40 ROUND CTR1.16b, KEY1.16b 41 eor v4.16b, OUT0.16b, HASH0.16b // PRE 1 42 43 ROUND CTR3.16b, KEY0.16b 44 eor x7, x7, KEND1 // AES[0] - round 10 high 45 pmull2 v28.1q, OUT1.2d, HASH3.2d // GHASH block 4k+1 - high 46 eor v31.8b, v31.8b, OUT2.8b // GHASH[2] - mid 47 48 ldp x19, x20, [INPUT], #16 // AES[1] - load plaintext 49 ROUND CTR0.16b, KEY1.16b 50 rev w9, IV_W // CTR0--Start 51 eor v30.8b, v30.8b, OUT1.8b // GHASH block 4k+1 - mid 52#ifdef HITLS_BIG_ENDIAN 53 rev x19, x19 54 rev x20, x20 55#endif 56 57 mov d8, v4.d[1] // GHASH block 4k - mid 58 orr x9, x11, x9, lsl #32 // CTR0 block 4k+8 59 pmull2 v9.1q, v4.2d, HASH4.2d // GHASH block 4k - high 60 add IV_W, IV_W, #1 // CTR0++ 61 62 mov d10, v17.d[1] // GHASH block 4k - mid 63 ROUND CTR0.16b, KEY2.16b 64 pmull HASH0.1q, v4.1d, HASH4.1d // GHASH block 4k - low 65 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid 66 67 ROUND CTR1.16b, KEY2.16b 68 ROUND CTR0.16b, KEY3.16b 69 eor v9.16b, v9.16b, v28.16b // GHASH block 4k+1 - high 70 pmull v28.1q, OUT2.1d, HASH2.1d // GHASH[2] - low 71 72 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid 73 rev64 OUT3.16b, OUT3.16b // GHASH[0] (t0, t1, t2 and t3 free) 74 pmull v30.1q, v30.1d, v17.1d // GHASH block 4k+1 - mid 75 pmull v29.1q, OUT1.1d, HASH3.1d // GHASH block 4k+1 - low 76 77 ins v31.d[1], v31.d[0] // GHASH[2] - mid 78 pmull2 v8.1q, OUT2.2d, HASH2.2d // GHASH[2] - high 79 eor x20, x20, KEND1 // AES[1] - round 10 high 80 eor v10.16b, v10.16b, v30.16b // GHASH block 4k+1 - mid 81 82 mov d30, OUT3.d[1] // GHASH[0] - mid 83 ROUND CTR3.16b, v19.16b 84 eor HASH0.16b, HASH0.16b, v29.16b // GHASH block 4k+1 - low 85 ROUND CTR2.16b, KEY2.16b 86 87 eor x6, x6, KEND0 // AES[0] - round 10 low 88 ROUND CTR1.16b, KEY3.16b 89 eor v30.8b, v30.8b, OUT3.8b // GHASH[0] - mid 90 pmull2 v4.1q, OUT3.2d, HASH1.2d // GHASH[0] - high 91 92 ROUND CTR2.16b, KEY3.16b 93 eor v9.16b, v9.16b, v8.16b // GHASH[2] - high 94 pmull2 v31.1q, v31.2d, v16.2d // GHASH[2] - mid 95 pmull v29.1q, OUT3.1d, HASH1.1d // GHASH[0] - low 96 97 movi v8.8b, #0xc2 98 pmull v30.1q, v30.1d, v16.1d // GHASH[0] - mid 99 eor HASH0.16b, HASH0.16b, v28.16b // GHASH[2] - low 100 ROUND CTR1.16b, KEY4.16b 101 102 ROUND CTR3.16b, v20.16b 103 shl d8, d8, #56 // mod_constant 104 ROUND CTR0.16b, KEY4.16b 105 eor v9.16b, v9.16b, v4.16b // GHASH[0] - high 106 107 ROUND CTR1.16b, KEY5.16b 108 ldp x21, x22, [INPUT], #16 // AES[2] - load plaintext 109 ROUND CTR3.16b, v21.16b 110 eor v10.16b, v10.16b, v31.16b // GHASH[2] - mid 111#ifdef HITLS_BIG_ENDIAN 112 rev x21, x21 113 rev x22, x22 114#endif 115 116 ROUND CTR0.16b, KEY5.16b 117 ldp x23, x24, [INPUT], #16 // AES[3] - load plaintext 118 pmull v31.1q, v9.1d, v8.1d // MODULO - top 64b align with mid 119 eor HASH0.16b, HASH0.16b, v29.16b // GHASH[0] - low 120#ifdef HITLS_BIG_ENDIAN 121 rev x23, x23 122 rev x24, x24 123#endif 124 125 ROUND CTR2.16b, KEY4.16b 126 eor x19, x19, KEND0 // AES[1] - round 10 low 127 ROUND CTR3.16b, v22.16b 128 eor v10.16b, v10.16b, v30.16b // GHASH[0] - mid 129 130 ROUND CTR1.16b, KEY6.16b 131 eor x23, x23, KEND0 // AES[3] - round 10 low 132 ROUND CTR2.16b, KEY5.16b 133 eor v30.16b, HASH0.16b, v9.16b // MODULO - karatsuba tidy up 134 135 fmov d4, x6 // AES[0] - mov low 136 ROUND CTR0.16b, KEY6.16b 137 fmov OUT0.d[1], x7 // AES[0] - mov high 138 fmov d7, x23 // AES[3] - mov low 139 140 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment 141 ROUND CTR3.16b, v23.16b 142 fmov d5, x19 // AES[2] - mov low 143 ROUND CTR0.16b, KEY7.16b 144 145 eor v10.16b, v10.16b, v30.16b // MODULO - karatsuba tidy up 146 ROUND CTR2.16b, KEY6.16b 147 eor x24, x24, KEND1 // AES[3] - round 10 high 148 ROUND CTR1.16b, KEY7.16b 149 150 fmov OUT1.d[1], x20 // AES[1] - mov high 151 ROUND CTR0.16b, KEY8.16b 152 fmov OUT3.d[1], x24 // AES[3] - mov high 153 ROUND CTR3.16b, v24.16b 154 155 subs COUNT, COUNT, #1 // count-- 156 ROUND CTR1.16b, KEY8.16b 157 eor v10.16b, v10.16b, v31.16b // MODULO - fold into mid 158 aese CTR0.16b, KEY9.16b 159 160 eor x21, x21, KEND0 // AES[2] - round 10 low 161 eor x22, x22, KEND1 // AES[2] - round 10 high 162 ROUND CTR3.16b, v25.16b 163 fmov d6, x21 // AES[2] - mov low 164 165 aese CTR1.16b, KEY9.16b // AES[1] - round 9 166 fmov OUT2.d[1], x22 // AES[2] - mov high 167 ROUND CTR2.16b, KEY7.16b 168 eor OUT0.16b, OUT0.16b, CTR0.16b // AES[0] - result 169 170 fmov d0, x10 // CTR0-0 171 ROUND CTR3.16b, KEY8.16b 172 fmov CTR0.d[1], x9 // CTR0-1--OK 173 rev w9, IV_W // CTR1--start 174 175 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid 176 ROUND CTR2.16b, KEY8.16b 177 eor OUT1.16b, OUT1.16b, CTR1.16b // AES[1] - result 178 add IV_W, IV_W, #1 // CTR1++ 179 180 orr x9, x11, x9, lsl #32 // CTR1 block 4k+9 181 fmov d1, x10 // CTR1-0 182 pmull v9.1q, v10.1d, v8.1d // MODULO - mid 64b align with low 183 fmov CTR1.d[1], x9 // CTR1-1--OK 184 185 rev w9, IV_W // CTR2--Start 186 aese CTR2.16b, KEY9.16b 187 st1 {OUT0.16b}, [OUT00], #16 // Write back - OUT0 188 eor OUT2.16b, OUT2.16b, CTR2.16b // AES[2]-result 189 190 orr x9, x11, x9, lsl #32 // CTR2 block 4k+10 191 aese CTR3.16b, KEY9.16b 192 add IV_W, IV_W, #1 // CTR2++ 193 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment 194 fmov d2, x10 // CTR2-0 195 196 eor HASH0.16b, HASH0.16b, v9.16b // MODULO - fold into low 197 st1 {OUT1.16b}, [OUT00], #16 // Write back - OUT1 198 fmov CTR2.d[1], x9 // CTR2-1--OK 199 st1 {OUT2.16b}, [OUT00], #16 // Write back - OUT2 200 201 rev w9, IV_W // CTR3--start 202 eor OUT3.16b, OUT3.16b, CTR3.16b // AES[3]-result 203 orr x9, x11, x9, lsl #32 // CTR3 block 4k+11 204 eor HASH0.16b, HASH0.16b, v10.16b // MODULO - fold into low 205 st1 {OUT3.16b}, [OUT00], #16 // Write back - OUT3 206.endm 207 208.macro GCM_DEC128_LOOP 209 eor CTR3.16b, OUT3.16b, CTR3.16b // AES[3] - result 210 ext HASH0.16b, HASH0.16b, HASH0.16b, #8 // PRE 0 211 mov x21, CTR2.d[0] // AES[2] - mov low 212 pmull2 v28.1q, v5.2d, HASH3.2d // GHASH block 4k+1 - high 213 mov x22, CTR2.d[1] // AES[2] - mov high 214 ROUND CTR1.16b, KEY0.16b 215 fmov d2, x10 // CTR[3] 216#ifdef HITLS_BIG_ENDIAN 217 rev x21, x21 218 rev x22, x22 219#endif 220 rev64 OUT2.16b, OUT2.16b // GHASH[2] 221 fmov v2.d[1], x9 // CTR[3] 222 rev w9, IV_W // CTR[0] 223 mov x23, CTR3.d[0] // AES[3] - mov low 224 eor v4.16b, v4.16b, HASH0.16b // PRE 1 225 mov d30, v5.d[1] // GHASH block 4k+1 - mid 226 ROUND CTR1.16b, KEY1.16b 227 rev64 v7.16b, v7.16b // GHASH[0] 228 pmull v29.1q, v5.1d, HASH3.1d // GHASH block 4k+1 - low 229 mov x24, CTR3.d[1] // AES[3] - mov high 230 orr x9, x11, x9, lsl #32 // CTR[0] 231 pmull HASH0.1q, v4.1d, HASH4.1d // GHASH block 4k - low 232#ifdef HITLS_BIG_ENDIAN 233 rev x23, x23 234 rev x24, x24 235#endif 236 fmov d3, x10 // CTR[0] 237 eor v30.8b, v30.8b, v5.8b // GHASH block 4k+1 - mid 238 ROUND CTR1.16b, KEY2.16b 239 fmov v3.d[1], x9 // CTR[0] 240 ROUND CTR2.16b, KEY0.16b 241 mov d10, v17.d[1] // GHASH block 4k - mid 242 pmull2 v9.1q, v4.2d, HASH4.2d // GHASH block 4k - high 243 eor HASH0.16b, HASH0.16b, v29.16b // GHASH block 4k+1 - low 244 pmull v29.1q, v7.1d, HASH1.1d // GHASH[0] - low 245 ROUND CTR1.16b, KEY3.16b 246 mov d8, v4.d[1] // GHASH block 4k - mid 247 ROUND CTR3.16b, KEY0.16b 248 eor v9.16b, v9.16b, v28.16b // GHASH block 4k+1 - high 249 ROUND CTR0.16b, KEY0.16b 250 pmull v28.1q, v6.1d, HASH2.1d // GHASH[2] - low 251 eor v8.8b, v8.8b, v4.8b // GHASH block 4k - mid 252 ROUND CTR3.16b, KEY1.16b 253 eor x23, x23, KEND0 // AES[3] - round 10 low 254 pmull v30.1q, v30.1d, v17.1d // GHASH block 4k+1 - mid 255 eor x22, x22, KEND1 // AES[2] - round 10 high 256 mov d31, v6.d[1] // GHASH[2] - mid 257 ROUND CTR0.16b, KEY1.16b 258 eor HASH0.16b, HASH0.16b, v28.16b // GHASH[2] - low 259 pmull v10.1q, v8.1d, v10.1d // GHASH block 4k - mid 260 ROUND CTR3.16b, KEY2.16b 261 eor v31.8b, v31.8b, v6.8b // GHASH[2] - mid 262 ROUND CTR0.16b, KEY2.16b 263 ROUND CTR1.16b, KEY4.16b 264 eor v10.16b, v10.16b, v30.16b // GHASH block 4k+1 - mid 265 pmull2 v8.1q, v6.2d, HASH2.2d // GHASH[2] - high 266 ROUND CTR0.16b, KEY3.16b 267 ins v31.d[1], v31.d[0] // GHASH[2] - mid 268 pmull2 v4.1q, v7.2d, HASH1.2d // GHASH[0] - high 269 ROUND CTR2.16b, KEY1.16b 270 mov d30, v7.d[1] // GHASH[0] - mid 271 ROUND CTR0.16b, KEY4.16b 272 eor v9.16b, v9.16b, v8.16b // GHASH[2] - high 273 pmull2 v31.1q, v31.2d, v16.2d // GHASH[2] - mid 274 eor x24, x24, KEND1 // AES[3] - round 10 high 275 ROUND CTR2.16b, KEY2.16b 276 eor v30.8b, v30.8b, v7.8b // GHASH[0] - mid 277 ROUND CTR1.16b, KEY5.16b 278 eor x21, x21, KEND0 // AES[2] - round 10 low 279 ROUND CTR0.16b, KEY5.16b 280 movi v8.8b, #0xc2 281 ROUND CTR2.16b, KEY3.16b 282 eor HASH0.16b, HASH0.16b, v29.16b // GHASH[0] - low 283 ROUND CTR1.16b, KEY6.16b 284 ROUND CTR0.16b, KEY6.16b 285 eor v10.16b, v10.16b, v31.16b // GHASH[2] - mid 286 ROUND CTR2.16b, KEY4.16b 287 stp x21, x22, [OUT00], #16 // AES[2] - store result 288 pmull v30.1q, v30.1d, v16.1d // GHASH[0] - mid 289 eor v9.16b, v9.16b, v4.16b // GHASH[0] - high 290 291 ld1 {OUT0.16b}, [INPUT], #16 // AES[0] - load ciphertext 292 293 ROUND CTR1.16b, KEY7.16b 294 add IV_W, IV_W, #1 // CTR++ 295 ROUND CTR0.16b, KEY7.16b 296 shl d8, d8, #56 // mod_constant 297 ROUND CTR2.16b, KEY5.16b 298 eor v10.16b, v10.16b, v30.16b // GHASH[0] - mid 299 ROUND CTR1.16b, KEY8.16b 300 stp x23, x24, [OUT00], #16 // AES[3] - store result 301 ROUND CTR0.16b, KEY8.16b 302 eor v30.16b, HASH0.16b, v9.16b // MODULO - karatsuba tidy up 303 ROUND CTR3.16b, KEY3.16b 304 rev w9, IV_W // CTR block 4k+8 305 pmull v31.1q, v9.1d, v8.1d // MODULO - top 64b align with mid 306 ld1 {OUT1.16b}, [INPUT], #16 // AES[1] - load 307 ext v9.16b, v9.16b, v9.16b, #8 // MODULO - other top alignment 308 aese CTR0.16b, KEY9.16b // AES[0] - round 9 309 orr x9, x11, x9, lsl #32 // CTR block 4k+8 310 ROUND CTR3.16b, KEY4.16b 311 eor v10.16b, v10.16b, v30.16b // MODULO - karatsuba tidy up 312 aese CTR1.16b, KEY9.16b // AES[1] - round 9 313 ROUND CTR2.16b, KEY6.16b 314 315 eor CTR0.16b, OUT0.16b, CTR0.16b // AES[0] - result 316 ROUND CTR3.16b, KEY5.16b 317 ld1 {OUT2.16b}, [INPUT], #16 // AES[2] - load 318 add IV_W, IV_W, #1 // CTR++ 319 eor v10.16b, v10.16b, v31.16b // MODULO - fold into mid 320 eor CTR1.16b, OUT1.16b, CTR1.16b // AES[1] - result 321 ROUND CTR2.16b, KEY7.16b 322 ld1 {OUT3.16b}, [INPUT], #16 323 ROUND CTR3.16b, KEY6.16b 324 325 rev64 OUT1.16b, OUT1.16b // GHASH block[1] 326 eor v10.16b, v10.16b, v9.16b // MODULO - fold into mid 327 mov x7, CTR0.d[1] // AES[0] - mov high 328 ROUND CTR2.16b, KEY8.16b 329 mov x6, CTR0.d[0] // AES[0] - mov low 330 331 ROUND CTR3.16b, KEY7.16b 332 fmov d0, x10 // CTR[0] 333#ifdef HITLS_BIG_ENDIAN 334 rev x7, x7 335 rev x6, x6 336#endif 337 pmull v8.1q, v10.1d, v8.1d // MODULO - mid 64b align with low 338 fmov CTR0.d[1], x9 // CTR[0] - OK 339 rev w9, IV_W // CTR block 4k+9 340 aese CTR2.16b, KEY9.16b 341 orr x9, x11, x9, lsl #32 // CTR block 4k+9 342 343 ext v10.16b, v10.16b, v10.16b, #8 // MODULO - other mid alignment 344 345 ROUND CTR3.16b, KEY8.16b 346 347 eor x7, x7, KEND1 // AES[0] - round 10 high 348 eor HASH0.16b, HASH0.16b, v8.16b // MODULO - fold into low 349 mov x20, CTR1.d[1] // AES[1] - mov high 350 eor x6, x6, KEND0 // AES[0] - round 10 low 351 eor CTR2.16b, OUT2.16b, CTR2.16b // AES[2] - result 352 mov x19, CTR1.d[0] // AES[1] - mov low 353 add IV_W, IV_W, #1 // CTR++ 354 aese CTR3.16b, KEY9.16b 355 fmov d1, x10 // CTR[1] 356#ifdef HITLS_BIG_ENDIAN 357 rev x20, x20 358 rev x19, x19 359#endif 360 subs COUNT, COUNT, #1 // COUNT-- 361 rev64 OUT0.16b, OUT0.16b // GHASH block[0] 362 eor HASH0.16b, HASH0.16b, v10.16b // MODULO - fold into low 363 364 fmov v1.d[1], x9 // CTR[1] - OK 365 rev w9, IV_W // CTR block 4k+10 366 add IV_W, IV_W, #1 // CTR block 4k+10 367 368 eor x20, x20, KEND1 // AES[1] - round 10 high 369 stp x6, x7, [OUT00], #16 // AES[0] - store result 370 eor x19, x19, KEND0 // AES[1] - round 10 low 371 stp x19, x20, [OUT00], #16 // AES[1] - store result 372 orr x9, x11, x9, lsl #32 // CTR block 4k+10 373.endm 374 375#endif 376