1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_CTR) 18 19#include "crypt_arm.h" 20#include "crypt_aes_macro_armv8.s" 21 22.file "crypt_aes_ctr_armv8.S" 23.text 24.arch armv8-a+crypto 25 26.align 5 27 28KEY .req x0 29IN .req x1 30OUT .req x2 31LEN .req x3 32IV .req x4 33 34LTMP .req x12 35CTMP .req v27 36 37BLK0 .req v0 38BLK1 .req v1 39BLK2 .req v2 40BLK3 .req v3 41BLK4 .req v4 42BLK5 .req v5 43BLK6 .req v6 44BLK7 .req v7 45 46CTR0 .req v19 47CTR1 .req v20 48CTR2 .req v21 49CTR3 .req v22 50CTR4 .req v23 51CTR5 .req v24 52CTR6 .req v25 53CTR7 .req v26 54 55RDK0 .req v17 56RDK1 .req v18 57ROUNDS .req w6 58 59/* ctr + 1 */ 60.macro ADDCTR ctr 61#ifndef HITLS_BIG_ENDIAN 62 add w11, w11, #1 63 rev w9, w11 64 mov \ctr, w9 65#else 66 rev w11, w11 67 add w11, w11, #1 68 rev w11, w11 69 mov \ctr, w11 70#endif 71.endm 72 73/* 74 * Vn - V0 ~ V31 75 * 8bytes - Vn.8B Vn.4H Vn.2S Vn.1D 76 * 16bytes - Vn.16B Vn.8H Vn.4S Vn.2D 77 */ 78 79/* 80 * int32_t CRYPT_AES_CTR_Encrypt(const CRYPT_AES_Key *ctx, 81 * const uint8_t *in, 82 * uint8_t *out, 83 * uint32_t len, 84 * uint8_t *iv); 85 */ 86 87.globl CRYPT_AES_CTR_Encrypt 88.type CRYPT_AES_CTR_Encrypt, %function 89CRYPT_AES_CTR_Encrypt: 90AARCH64_PACIASP 91 ld1 {CTR0.16b}, [IV] // Reads the IV. 92 mov CTMP.16b, CTR0.16b 93 mov w11, CTR0.s[3] 94#ifndef HITLS_BIG_ENDIAN 95 rev w11, w11 96#endif 97 mov LTMP, LEN 98 99.Lctr_aesenc_start: 100 cmp LTMP, #64 101 b.ge .Lctr_enc_above_equal_4_blks 102 cmp LTMP, #32 103 b.ge .Lctr_enc_above_equal_2_blks 104 cmp LTMP, #0 105 b.eq .Lctr_len_zero 106 b .Lctr_enc_proc_1_blk 107 108.Lctr_enc_above_equal_2_blks: 109 cmp LTMP, #48 110 b.lt .Lctr_enc_proc_2_blks 111 b .Lctr_enc_proc_3_blks 112 113.Lctr_enc_above_equal_4_blks: 114 cmp LTMP, #96 115 b.ge .Lctr_enc_above_equal_6_blks 116 cmp LTMP, #80 117 b.lt .Lctr_enc_proc_4_blks 118 b .Lctr_enc_proc_5_blks 119 120.Lctr_enc_above_equal_6_blks: 121 cmp LTMP, #112 122 b.lt .Lctr_enc_proc_6_blks 123 cmp LTMP, #128 124 b.lt .Lctr_enc_proc_7_blks 125 126.Lctr_enc_proc_8_blks: 127 128/* When the length is greater than or equal to 128, eight blocks loop is used. */ 129.Lctr_aesenc_8_blks_loop: 130 131 /* Calculate eight CTRs. */ 132 mov CTR1.16b, CTMP.16b 133 mov CTR2.16b, CTMP.16b 134 mov CTR3.16b, CTMP.16b 135 mov CTR4.16b, CTMP.16b 136 mov CTR5.16b, CTMP.16b 137 mov CTR6.16b, CTMP.16b 138 mov CTR7.16b, CTMP.16b 139 140 ADDCTR CTR1.s[3] 141 ADDCTR CTR2.s[3] 142 ADDCTR CTR3.s[3] 143 ADDCTR CTR4.s[3] 144 ADDCTR CTR5.s[3] 145 ADDCTR CTR6.s[3] 146 ADDCTR CTR7.s[3] 147 148 mov x14, KEY // Prevent the key from being changed. 149 AES_ENC_8_BLKS x14 CTR0.16b CTR1.16b CTR2.16b CTR3.16b CTR4.16b \ 150 CTR5.16b CTR6.16b CTR7.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS 151 152 ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN], #64 153 ld1 {BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b}, [IN], #64 154 155 eor BLK0.16b, BLK0.16b, CTR0.16b 156 eor BLK1.16b, BLK1.16b, CTR1.16b 157 eor BLK2.16b, BLK2.16b, CTR2.16b 158 eor BLK3.16b, BLK3.16b, CTR3.16b 159 eor BLK4.16b, BLK4.16b, CTR4.16b 160 eor BLK5.16b, BLK5.16b, CTR5.16b 161 eor BLK6.16b, BLK6.16b, CTR6.16b 162 eor BLK7.16b, BLK7.16b, CTR7.16b 163 164 st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT], #64 165 st1 {BLK4.16b, BLK5.16b, BLK6.16b, BLK7.16b}, [OUT], #64 166 167 sub LTMP, LTMP, #128 168 cmp LTMP, #0 169 b.eq .Lctr_aesenc_finish 170 171 ADDCTR CTMP.s[3] 172 mov CTR0.16b, CTMP.16b 173 174 cmp LTMP, #128 175 b.lt .Lctr_aesenc_start 176 b .Lctr_aesenc_8_blks_loop 177 178.Lctr_enc_proc_1_blk: 179 180 AES_ENC_1_BLK KEY CTR0.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS 181 ld1 {BLK0.16b}, [IN] 182 eor BLK0.16b, CTR0.16b, BLK0.16b 183 st1 {BLK0.16b}, [OUT] 184 b .Lctr_aesenc_finish 185 186.Lctr_enc_proc_2_blks: 187 188 mov CTR1.16b, CTMP.16b 189 ADDCTR CTR1.s[3] 190 191 AES_ENC_2_BLKS KEY CTR0.16b CTR1.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS 192 193 ld1 {BLK0.16b, BLK1.16b}, [IN] 194 195 eor BLK0.16b, CTR0.16b, BLK0.16b 196 eor BLK1.16b, CTR1.16b, BLK1.16b 197 198 st1 {BLK0.16b, BLK1.16b}, [OUT] 199 b .Lctr_aesenc_finish 200 201.Lctr_enc_proc_3_blks: 202 203 mov CTR1.16b, CTMP.16b 204 mov CTR2.16b, CTMP.16b 205 206 ADDCTR CTR1.s[3] 207 ADDCTR CTR2.s[3] 208 209 AES_ENC_3_BLKS KEY CTR0.16b CTR1.16b CTR2.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS 210 211 ld1 {BLK0.16b, BLK1.16b, BLK2.16b}, [IN] 212 213 eor BLK0.16b, BLK0.16b, CTR0.16b 214 eor BLK1.16b, BLK1.16b, CTR1.16b 215 eor BLK2.16b, BLK2.16b, CTR2.16b 216 217 st1 {BLK0.16b, BLK1.16b, BLK2.16b}, [OUT] 218 b .Lctr_aesenc_finish 219 220.Lctr_enc_proc_4_blks: 221 222 mov CTR1.16b, CTMP.16b 223 mov CTR2.16b, CTMP.16b 224 mov CTR3.16b, CTMP.16b 225 226 ADDCTR CTR1.s[3] 227 ADDCTR CTR2.s[3] 228 ADDCTR CTR3.s[3] 229 230 AES_ENC_4_BLKS KEY CTR0.16b CTR1.16b CTR2.16b CTR3.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS 231 232 ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN] 233 234 eor BLK0.16b, BLK0.16b, CTR0.16b 235 eor BLK1.16b, BLK1.16b, CTR1.16b 236 eor BLK2.16b, BLK2.16b, CTR2.16b 237 eor BLK3.16b, BLK3.16b, CTR3.16b 238 239 st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT] 240 b .Lctr_aesenc_finish 241 242.Lctr_enc_proc_5_blks: 243 244 mov CTR1.16b, CTMP.16b 245 mov CTR2.16b, CTMP.16b 246 mov CTR3.16b, CTMP.16b 247 mov CTR4.16b, CTMP.16b 248 249 ADDCTR CTR1.s[3] 250 ADDCTR CTR2.s[3] 251 ADDCTR CTR3.s[3] 252 ADDCTR CTR4.s[3] 253 254 AES_ENC_5_BLKS KEY CTR0.16b CTR1.16b CTR2.16b CTR3.16b CTR4.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS 255 256 ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN], #64 257 ld1 {BLK4.16b}, [IN] 258 259 eor BLK0.16b, BLK0.16b, CTR0.16b 260 eor BLK1.16b, BLK1.16b, CTR1.16b 261 eor BLK2.16b, BLK2.16b, CTR2.16b 262 eor BLK3.16b, BLK3.16b, CTR3.16b 263 eor BLK4.16b, BLK4.16b, CTR4.16b 264 265 st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT], #64 266 st1 {BLK4.16b}, [OUT] 267 b .Lctr_aesenc_finish 268 269.Lctr_enc_proc_6_blks: 270 271 mov CTR1.16b, CTMP.16b 272 mov CTR2.16b, CTMP.16b 273 mov CTR3.16b, CTMP.16b 274 mov CTR4.16b, CTMP.16b 275 mov CTR5.16b, CTMP.16b 276 277 ADDCTR CTR1.s[3] 278 ADDCTR CTR2.s[3] 279 ADDCTR CTR3.s[3] 280 ADDCTR CTR4.s[3] 281 ADDCTR CTR5.s[3] 282 283 AES_ENC_6_BLKS KEY CTR0.16b CTR1.16b CTR2.16b CTR3.16b CTR4.16b \ 284 CTR5.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS 285 286 ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN], #64 287 ld1 {BLK4.16b, BLK5.16b}, [IN] 288 289 eor BLK0.16b, BLK0.16b, CTR0.16b 290 eor BLK1.16b, BLK1.16b, CTR1.16b 291 eor BLK2.16b, BLK2.16b, CTR2.16b 292 eor BLK3.16b, BLK3.16b, CTR3.16b 293 eor BLK4.16b, BLK4.16b, CTR4.16b 294 eor BLK5.16b, BLK5.16b, CTR5.16b 295 296 st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT], #64 297 st1 {BLK4.16b, BLK5.16b}, [OUT] 298 b .Lctr_aesenc_finish 299 300.Lctr_enc_proc_7_blks: 301 302 mov CTR1.16b, CTMP.16b 303 mov CTR2.16b, CTMP.16b 304 mov CTR3.16b, CTMP.16b 305 mov CTR4.16b, CTMP.16b 306 mov CTR5.16b, CTMP.16b 307 mov CTR6.16b, CTMP.16b 308 309 ADDCTR CTR1.s[3] 310 ADDCTR CTR2.s[3] 311 ADDCTR CTR3.s[3] 312 ADDCTR CTR4.s[3] 313 ADDCTR CTR5.s[3] 314 ADDCTR CTR6.s[3] 315 316 AES_ENC_7_BLKS KEY CTR0.16b CTR1.16b CTR2.16b CTR3.16b CTR4.16b \ 317 CTR5.16b CTR6.16b RDK0.4s RDK1.4s RDK0.16b RDK1.16b ROUNDS 318 319 ld1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [IN], #64 320 ld1 {BLK4.16b, BLK5.16b, BLK6.16b}, [IN] 321 322 eor BLK0.16b, BLK0.16b, CTR0.16b 323 eor BLK1.16b, BLK1.16b, CTR1.16b 324 eor BLK2.16b, BLK2.16b, CTR2.16b 325 eor BLK3.16b, BLK3.16b, CTR3.16b 326 eor BLK4.16b, BLK4.16b, CTR4.16b 327 eor BLK5.16b, BLK5.16b, CTR5.16b 328 eor BLK6.16b, BLK6.16b, CTR6.16b 329 330 st1 {BLK0.16b, BLK1.16b, BLK2.16b, BLK3.16b}, [OUT], #64 331 st1 {BLK4.16b, BLK5.16b, BLK6.16b}, [OUT] 332 333.Lctr_aesenc_finish: 334 ADDCTR CTMP.s[3] // Fill CTR0 for the next round. 335 st1 {CTMP.16b}, [IV] 336 337.Lctr_len_zero: 338 mov x0, #0 339 eor CTR0.16b, CTR0.16b, CTR0.16b 340 eor CTR1.16b, CTR1.16b, CTR1.16b 341 eor CTR2.16b, CTR2.16b, CTR2.16b 342 eor CTR3.16b, CTR3.16b, CTR3.16b 343 eor CTR4.16b, CTR4.16b, CTR4.16b 344 eor CTR5.16b, CTR5.16b, CTR5.16b 345 eor CTR6.16b, CTR6.16b, CTR6.16b 346 eor CTR7.16b, CTR7.16b, CTR7.16b 347 eor RDK0.16b, RDK0.16b, RDK0.16b 348 eor RDK1.16b, RDK1.16b, RDK1.16b 349 350AARCH64_AUTIASP 351 ret 352.size CRYPT_AES_CTR_Encrypt, .-CRYPT_AES_CTR_Encrypt 353 354#endif 355