1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_GCM) 18 19#include "crypt_arm.h" 20#include "aes_gcm_common_aarch64.S" 21#include "aes128_gcm_aarch64.S" 22#include "aes192_gcm_aarch64.S" 23#include "aes256_gcm_aarch64.S" 24 25.text 26.arch armv8-a+crypto 27 28.globl AES_GCM_ClearAsm 29.type AES_GCM_ClearAsm,%function 30.align 4 31AES_GCM_ClearAsm: 32AARCH64_PACIASP 33 eor KEY0.16b, KEY0.16b, KEY0.16b 34 eor KEY1.16b, KEY1.16b, KEY1.16b 35 eor KEY2.16b, KEY2.16b, KEY2.16b 36 eor KEY3.16b, KEY3.16b, KEY3.16b 37 eor KEY4.16b, KEY4.16b, KEY4.16b 38 eor KEY5.16b, KEY5.16b, KEY5.16b 39 eor KEY6.16b, KEY6.16b, KEY6.16b 40 eor KEY7.16b, KEY7.16b, KEY7.16b 41 eor KEY8.16b, KEY8.16b, KEY8.16b 42 eor KEY9.16b, KEY9.16b, KEY9.16b 43 eor KEY10.16b, KEY10.16b, KEY10.16b 44 eor HASH0.16b, HASH0.16b, HASH0.16b 45 eor HASH1.16b, HASH1.16b, HASH1.16b 46 eor HASH2.16b, HASH2.16b, HASH2.16b 47 eor HASH3.16b, HASH3.16b, HASH3.16b 48 eor HASH4.16b, HASH4.16b, HASH4.16b 49AARCH64_AUTIASP 50 ret 51.size AES_GCM_ClearAsm,.-AES_GCM_ClearAsm 52 53.globl AES_GCM_EncryptBlockAsm 54.type AES_GCM_EncryptBlockAsm,%function 55.align 4 56AES_GCM_EncryptBlockAsm: 57AARCH64_PACIASP 58 IN_STP // Register Protection 59 ldr ROUNDS, [KEY00, #240] // Number of loading rounds 60 add HTABLE, IVEC0, #16 // Sets the gHash start address. 61 lsr COUNT, INLEN, #6 // Divided by 64, count the number of times 62 cmp ROUNDS, #10 // Number of comparison rounds 10 63 LOAD_KEY // load AES KEY 64 b.eq .LEnc_128_process // go to AES128 processing part 65 cmp ROUNDS, #12 // Number of comparison rounds 12 66 ld1 {KEY10.4s, KEY11.4s}, [KEY00], #32 67 b.eq .LEnc_192_process // go to AES192 processing part 68 ld1 {KEY12.4s, KEY13.4s}, [KEY00], #32 69 b .LEnc_256_process // go to AES256 processing part 70 71.LEnc_128_process: 72 ldp KEND0, KEND1, [KEY00] // load key-10 73 ldp IV_H, IV_L, [IVEC0] // load IV 74#ifdef HITLS_BIG_ENDIAN 75 ror KEND0, KEND0, #32 76 ror KEND1, KEND1, #32 77 REV_2S IV_H, IV_L 78#endif 79 lsr IV_C, IV_L, #32 80 ld1 {CTR0.16b}, [IVEC0] // CTR bolck 0 81 lsl IVCTR, COUNTW, #2 // <<16 82 LOAD_GHASH_TABLE // load gHashTable 83 BEFORE_ROUND 84 FIRST_ROUND // data preprocessing 85 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY7.16b // round 7 86 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY8.16b // round 8 87 rev w9, IV_W // CTR0--Start 88 ROUND4_END CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY9.16b // round 9 89 orr x9, x11, x9, lsl #32 // CTR0 block 4k+8 90 add IV_W, IV_W, #1 // CTR0++ 91 eor v17.16b, v17.16b, v9.16b // h4k | h3k 92 eor v16.16b, v16.16b, v8.16b // h2k | h1k 93 STORE_RESULT // data preprocessing 94 b.le .LEnc_end // After the first 64-byte processing is complete, 95 // check the remaining length. 96 b .LEnc_128_loop // Enter the cyclic processing flow. 97 98.LEnc_192_process: 99 ldp KEND0, KEND1, [KEY00] // load key-10 100 ldp IV_H, IV_L, [IVEC0] // load IV 101#ifdef HITLS_BIG_ENDIAN 102 ror KEND0, KEND0, #32 103 ror KEND1, KEND1, #32 104 REV_2S IV_H, IV_L 105#endif 106 lsr IV_C, IV_L, #32 // IV-l 107 ld1 {CTR0.16b}, [IVEC0] // CTR bolck 0 108 lsl IVCTR, COUNTW, #2 // <<16 109 LOAD_GHASH_TABLE // load hash table 110 BEFORE_ROUND 111 FIRST_ROUND // aes round 112 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY7.16b // round 7 113 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY8.16b // round 8 114 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY9.16b // round 9 115 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY10.16b // round 10 116 rev w9, IV_W // CTR0--Start 117 ROUND4_END CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY11.16b // round 11 118 orr x9, x11, x9, lsl #32 // CTR0 block 4k+8 119 add IV_W, IV_W, #1 // CTR0++ 120 eor v17.16b, v17.16b, v9.16b // h4k | h3k 121 eor v16.16b, v16.16b, v8.16b // h2k | h1k 122 STORE_RESULT 123 b.le .LEnc_end 124 b .LEnc_192_loop 125 126.LEnc_256_process: 127 ldp KEND0, KEND1, [KEY00] // load key-10 128 ldp IV_H, IV_L, [IVEC0] // load IV 129#ifdef HITLS_BIG_ENDIAN 130 ror KEND0, KEND0, #32 131 ror KEND1, KEND1, #32 132 REV_2S IV_H, IV_L 133#endif 134 lsr IV_C, IV_L, #32 135 ld1 {CTR0.16b}, [IVEC0] // CTR bolck 0 136 lsl IVCTR, COUNTW, #2 // <<16 137 LOAD_GHASH_TABLE 138 BEFORE_ROUND 139 FIRST_ROUND 140 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY7.16b // round 7 141 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY8.16b // round 8 142 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY9.16b // round 9 143 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY10.16b // round 10 144 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY11.16b // round 11 145 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY12.16b // round 12 146 rev w9, IV_W // CTR0--Start 147 ROUND4_END CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY13.16b // round 13 148 orr x9, x11, x9, lsl #32 // CTR0 block 4k+8 149 add IV_W, IV_W, #1 // CTR0++ 150 eor v17.16b, v17.16b, v9.16b // h4k | h3k 151 eor v16.16b, v16.16b, v8.16b // h2k | h1k 152 STORE_RESULT 153 b.le .LEnc_end 154 b .LEnc_256_loop 155 156.LEnc_128_loop: 157 GCM_ENC128_LOOP // Processes 64 bytes. 158 b.le .LEnc_end // If the number of remaining blocks is 0, exit the loop. 159 b .LEnc_128_loop // Continue the loop 160 161.LEnc_192_loop: 162 GCM_ENC192_LOOP 163 b.le .LEnc_end // <= 0 164 b .LEnc_192_loop 165 166.LEnc_256_loop: 167 GCM_ENC256_LOOP 168 b.le .LEnc_end // <= 0 169 b .LEnc_256_loop 170 171.LEnc_end: 172 rev64 OUT0.16b, OUT0.16b // GHASH block 4k (only t0 is free) 173 rev64 OUT1.16b, OUT1.16b // GHASH block 4k+1 (t0 and t1 free) 174 rev64 OUT2.16b, OUT2.16b // GHASH[2] (t0, t1, and t2 free) 175 rev64 OUT3.16b, OUT3.16b // GHASH[0] (t0, t1, t2 and t3 free) 176 GHASH_BLOCK // Ghash calculation and encryption/decryption processing 177 rev w9, IVCTR // CTR[0] 178 ext HASH0.16b, HASH0.16b, HASH0.16b, #8 179 add x6, IVEC0, #16 180 orr x9, x11, x9, lsl #32 // CTR[0] 181 fmov d0, x10 // CTR[0] 182 fmov CTR0.d[1], x9 // CTR[0]--OK 183 st1 {CTR0.16b }, [IVEC0] // out hash 184 rev64 HASH0.16b, HASH0.16b 185 st1 {HASH0.16b }, [x6] // out hash 186 OUT_STP 187.LEnc_ret: 188 and x0, INLEN, #-64 // length of processed data 189AARCH64_AUTIASP 190 ret 191.size AES_GCM_EncryptBlockAsm,.-AES_GCM_EncryptBlockAsm 192 193.globl AES_GCM_DecryptBlockAsm 194.type AES_GCM_DecryptBlockAsm,%function 195.align 4 196AES_GCM_DecryptBlockAsm: 197AARCH64_PACIASP 198 IN_STP // stp 199 ldr ROUNDS, [KEY00, #240] // pull rounds 200 mov IVEC0, x0 // ctr0 201 add HTABLE, IVEC0, #16 // htable 202 lsr COUNT, INLEN, #6 // divided by 64 203 cmp ROUNDS, #10 204 LOAD_KEY 205 b.eq .LDec_128_process 206 cmp ROUNDS, #12 207 ld1 {KEY10.4s, KEY11.4s}, [KEY00], #32 208 b.eq .LDec_192_process 209 ld1 {KEY12.4s, KEY13.4s}, [KEY00], #32 210 b .LDec_256_process 211 212.LDec_128_process: 213 ldp KEND0, KEND1, [KEY00] // load key-10 214 ldp IV_H, IV_L, [IVEC0] // load IV 215#ifdef HITLS_BIG_ENDIAN 216 ror KEND0, KEND0, #32 217 ror KEND1, KEND1, #32 218 REV_2S IV_H, IV_L 219#endif 220 lsr IV_C, IV_L, #32 221 ld1 {CTR0.16b}, [IVEC0] // CTR[0] 222#ifdef HITLS_BIG_ENDIAN 223 REV_2S KEND0, KEND1 224#endif 225 lsl IVCTR, COUNTW, #2 // <<16 226 LOAD_GHASH_TABLE 227 BEFORE_ROUND 228 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY0.16b // round 0 229 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY1.16b // round 1 230 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY2.16b // round 2 231 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY3.16b // round 3 232 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY4.16b // round 4 233 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY5.16b // round 5 234 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY6.16b // round 6 235 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY7.16b // round 7 236 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY8.16b // round 8 237 ROUND4_END CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY9.16b // round 9 238 eor v17.16b, v17.16b, v9.16b // h4k | h3k 239 eor v16.16b, v16.16b, v8.16b // h2k | h1k 240 STORE_DEC_RESULT 241 b.le .LDec_end 242 b .LDec_128_loop 243 244.LDec_192_process: 245 ldp KEND0, KEND1, [KEY00] // load key-10 246 ldp IV_H, IV_L, [IVEC0] // load IV 247#ifdef HITLS_BIG_ENDIAN 248 ror KEND0, KEND0, #32 249 ror KEND1, KEND1, #32 250 REV_2S IV_H, IV_L 251#endif 252 lsr IV_C, IV_L, #32 253 ld1 {CTR0.16b}, [IVEC0] // CTR[0] 254#ifdef HITLS_BIG_ENDIAN 255 REV_2S KEND0, KEND1 256#endif 257 lsl IVCTR, COUNTW, #2 // <<16 258 LOAD_GHASH_TABLE 259 BEFORE_ROUND 260 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY0.16b // round 0 261 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY1.16b // round 1 262 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY2.16b // round 2 263 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY3.16b // round 3 264 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY4.16b // round 4 265 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY5.16b // round 5 266 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY6.16b // round 6 267 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY7.16b // round 7 268 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY8.16b // round 8 269 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY9.16b // round 9 270 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY10.16b // round 10 271 ROUND4_END CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY11.16b // round 11 272 eor v17.16b, v17.16b, v9.16b // h4k | h3k 273 eor v16.16b, v16.16b, v8.16b // h2k | h1k 274 STORE_DEC_RESULT 275 b.le .LDec_end 276 b .LDec_192_loop 277 278.LDec_256_process: 279 ldp KEND0, KEND1, [KEY00] // load key-10 280 ldp IV_H, IV_L, [IVEC0] // load IV 281#ifdef HITLS_BIG_ENDIAN 282 ror KEND0, KEND0, #32 283 ror KEND1, KEND1, #32 284 REV_2S IV_H, IV_L 285#endif 286 lsr IV_C, IV_L, #32 287 ld1 {CTR0.16b}, [IVEC0] // CTR[0] 288#ifdef HITLS_BIG_ENDIAN 289 REV_2S KEND0, KEND1 290#endif 291 lsl IVCTR, COUNTW, #2 // <<16 292 LOAD_GHASH_TABLE 293 BEFORE_ROUND 294 295 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY0.16b // round 0 296 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY1.16b // round 1 297 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY2.16b // round 2 298 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY3.16b // round 3 299 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY4.16b // round 4 300 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY5.16b // round 5 301 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY6.16b // round 6 302 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY7.16b // round 7 303 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY8.16b // round 8 304 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY9.16b // round 9 305 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY10.16b // round 10 306 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY11.16b // round 11 307 ROUND4 CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY12.16b // round 12 308 ROUND4_END CTR2.16b, CTR1.16b, CTR3.16b, CTR0.16b, KEY13.16b // round 13 309 eor v17.16b, v17.16b, v9.16b // h4k | h3k 310 eor v16.16b, v16.16b, v8.16b // h2k | h1k 311 STORE_DEC_RESULT 312 b.le .LDec_end 313 b .LDec_256_loop 314 315.LDec_128_loop: 316 GCM_DEC128_LOOP 317 b.le .LDec_end // <=0 318 b .LDec_128_loop 319 320.LDec_192_loop: 321 GCM_DEC192_LOOP 322 b.le .LDec_end // <=0 323 b .LDec_192_loop 324 325.LDec_256_loop: 326 GCM_DEC256_LOOP 327 b.le .LDec_end // <=0 328 b .LDec_256_loop 329 330.LDec_end: 331 GHASH_DEC_BLOCK 332 rev w9, IVCTR // CTR[0] 333 ext HASH0.16b, HASH0.16b, HASH0.16b, #8 334 add x6, IVEC0, #16 335 orr x9, x11, x9, lsl #32 // CTR[0] 336 fmov d0, x10 // CTR[0] 337 rev64 HASH0.16b, HASH0.16b 338 fmov CTR0.d[1], x9 // CTR[0]--OK 339 st1 {CTR0.16b }, [IVEC0] // out hash 340 st1 {HASH0.16b }, [x6] // out hash 341 OUT_STP 342.LDec_ret: 343 and x0, INLEN, #-64 // length of processed data 344AARCH64_AUTIASP 345 ret 346.size AES_GCM_DecryptBlockAsm,.-AES_GCM_DecryptBlockAsm 347#endif