1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_GCM) 18 19.text 20 21.balign 16 22g_byteSwapMask: 23.byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 24.byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 25.size g_byteSwapMask, .-g_byteSwapMask 26.balign 16 27g_poly: 28.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 29.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2 30.size g_poly, .-g_poly 31 32/* 33 * void AES_GCM_Encrypt16BlockAsm(MODES_GCM_Ctx *ctx, const uint8_t *in, 34 * uint8_t *out, uint32_t len, void *key); 35 * ctx %rdi 36 * in %rsi 37 * out %rdx 38 * len %rcx 39 * key %r8 40 */ 41.globl AES_GCM_Encrypt16BlockAsm 42.type AES_GCM_Encrypt16BlockAsm, @function 43.align 32 44AES_GCM_Encrypt16BlockAsm: 45.cfi_startproc 46 leaq g_byteSwapMask(%rip), %r11 47 shrl $4, %ecx // blocks number = loop times 48 mov 240(%r8), %r9d // rounds 49.Lenc_loop: 50 mov 12(%rdi), %eax // counter eax(32bit) 51 addl $0x1000000, %eax // ctr inc 52 vmovdqu (%rdi), %xmm0 // iv 53 jc .Lenc_ctr_carry 54 jmp .Lenc_aes_cipher 55.Lenc_ctr_carry: 56 bswap %eax 57 addl $0x100, %eax // add carry bit 58 bswap %eax 59 jmp .Lenc_aes_cipher 60.Lenc_aes_cipher: 61 mov %eax, 12(%rdi) // out iv 62 vmovdqu (%r8), %xmm1 // key0 63 vpxor %xmm1, %xmm0, %xmm0 64 65 vmovdqu 0x10(%r8), %xmm2 // key1 66 lea 0xa0(%r8), %r10 // point to the last key in 128-bit encryption 67 vmovdqu 0x20(%r8), %xmm3 // key2 68 vaesenc %xmm2, %xmm0, %xmm0 69 70 vmovdqu 0x30(%r8), %xmm4 // key3 71 vaesenc %xmm3, %xmm0, %xmm0 72 73 vmovdqu 0x40(%r8), %xmm5 // key4 74 vaesenc %xmm4, %xmm0, %xmm0 75 76 vmovdqu 0x50(%r8), %xmm6 // key5 77 vaesenc %xmm5, %xmm0, %xmm0 78 79 vmovdqu 0x60(%r8), %xmm7 // key6 80 vaesenc %xmm6, %xmm0, %xmm0 81 82 vmovdqu 0x70(%r8), %xmm8 // key7 83 vaesenc %xmm7, %xmm0, %xmm0 84 85 vmovdqu 0x80(%r8), %xmm9 // key8 86 vaesenc %xmm8, %xmm0, %xmm0 87 88 vmovdqu 0x90(%r8), %xmm10 // key9 89 vaesenc %xmm9, %xmm0, %xmm0 90 91 vaesenc %xmm10, %xmm0, %xmm0 92 cmp $12, %r9d // compare the number of rounds to determine 93 // when to jump to the next processing part 94 95 jb .Lenc_aes_end 96 97 vmovdqu (%r10), %xmm1 // key10 98 vaesenc %xmm1, %xmm0, %xmm0 99 100 vmovdqu 0x10(%r10), %xmm2 // key11 101 vaesenc %xmm2, %xmm0, %xmm0 102 lea 0x20(%r10), %r10 103 104 je .Lenc_aes_end 105 106 vmovdqu (%r10), %xmm1 // key12 107 vaesenc %xmm1, %xmm0, %xmm0 108 109 vmovdqu 0x10(%r10), %xmm2 // key13 110 vaesenc %xmm2, %xmm0, %xmm0 111 lea 0x20(%r10), %r10 112 jmp .Lenc_aes_end 113.Lenc_aes_end: 114 vmovdqu (%r10), %xmm1 // key last 115 vpxor (%rsi), %xmm1, %xmm1 // Advance ciphertext XOR in 116 vaesenclast %xmm1, %xmm0, %xmm0 117 vmovdqu %xmm0, (%rdx) // out 118 119 vmovdqu 16(%rdi), %xmm1 // ghash 120 vmovdqa (%r11), %xmm15 // .LByte_Swap_Mask 121 vpxor %xmm1, %xmm0, %xmm0 // input for ghash operation 122 vmovdqu 32(%rdi), %xmm1 // Hash key H^1 123 vpshufb %xmm15, %xmm0, %xmm0 // data transform 124 vmovdqu 32+32(%rdi), %xmm2 // Hash key H^1_2 125 126 vpalignr $8, %xmm0, %xmm0, %xmm3 // data transform 127 128 vpclmulqdq $0x11, %xmm1, %xmm0, %xmm5 // Karatsuba Multiply 129 vpxor %xmm0, %xmm3, %xmm3 130 vpclmulqdq $0x00, %xmm1, %xmm0, %xmm0 131 vpxor %xmm0, %xmm5, %xmm1 132 vpclmulqdq $0x00, %xmm2, %xmm3, %xmm3 133 vpxor %xmm1, %xmm3, %xmm3 134 135 vpslldq $8, %xmm3, %xmm4 136 vpsrldq $8, %xmm3, %xmm3 137 vpxor %xmm4, %xmm0, %xmm0 138 vpxor %xmm3, %xmm5, %xmm5 139 140 vmovdqa 0x10(%r11), %xmm14 // g_poly 141 vpalignr $8, %xmm0, %xmm0, %xmm2 // 1st phase of reduction 142 vpclmulqdq $0x10, %xmm14, %xmm0, %xmm0 143 vpxor %xmm2, %xmm0, %xmm0 144 145 vpalignr $8, %xmm0, %xmm0, %xmm2 // 2nd phase of reduction 146 vpclmulqdq $0x10, %xmm14, %xmm0, %xmm0 147 vpxor %xmm5, %xmm2, %xmm2 148 vpxor %xmm2, %xmm0, %xmm0 149 150 vpshufb %xmm15, %xmm0, %xmm0 151 lea 0x10(%rsi), %rsi 152 vmovdqu %xmm0, 16(%rdi) // out 153 lea 0x10(%rdx), %rdx 154 dec %ecx 155 jnz .Lenc_loop 156 ret 157.cfi_endproc 158.size AES_GCM_Encrypt16BlockAsm, .-AES_GCM_Encrypt16BlockAsm 159 160/* 161 * void AES_GCM_Decrypt16BlockAsm(MODES_GCM_Ctx *ctx, const uint8_t *in, 162 * uint8_t *out, uint32_t len, void *key); 163 * ctx %rdi 164 * in %rsi 165 * out %rdx 166 * len %rcx 167 * key %r8 168 */ 169.globl AES_GCM_Decrypt16BlockAsm 170.type AES_GCM_Decrypt16BlockAsm, @function 171.balign 32 172AES_GCM_Decrypt16BlockAsm: 173.cfi_startproc 174 leaq g_byteSwapMask(%rip), %r11 175 vmovdqu 16(%rdi), %xmm10 // ghash 176 shrl $4, %ecx // blocks number = loop times 177 vmovdqa (%r11), %xmm15 // g_byteSwapMask 178.Ldec_loop: 179 mov 12(%rdi), %eax // counter eax(32bit) 180 addl $0x1000000, %eax // ctr inc 181 mov 240(%r8), %r9d // rounds 182 vmovdqu (%rdi), %xmm0 // iv 183 jc .Ldec_ctr_carry 184 jmp .Ldec_aes_cipher 185.Ldec_ctr_carry: 186 bswap %eax 187 addl $0x100, %eax // add carry bit 188 bswap %eax 189 jmp .Ldec_aes_cipher 190.balign 32 191.Ldec_aes_cipher: 192 mov %eax, 12(%rdi) // out iv 193 cmp $12, %r9d // Compare the number of rounds to determine 194 // when to jump to the next processing part 195 vmovdqu (%r8), %xmm1 // key 0 196 vpxor (%rsi), %xmm10, %xmm10 // input for ghash operation 197 lea 0xa0(%r8), %r10 // Point to the last key in 128-bit encryption 198 vpxor %xmm1, %xmm0, %xmm0 199 vmovdqu 0x10(%r8), %xmm1 // key 1 200 vmovdqu 32(%rdi), %xmm11 // Hash key H^1 201 vmovdqu 32+32(%rdi), %xmm12 // Hash key H^1_2 202 203 vaesenc %xmm1, %xmm0, %xmm0 204 vmovdqu 0x20(%r8), %xmm1 // key 2 205 vpshufb %xmm15, %xmm10, %xmm10 // data transform 206 vpshufd $0x4e, %xmm10, %xmm13 207 208 vaesenc %xmm1, %xmm0, %xmm0 209 vmovdqu 0x30(%r8), %xmm1 // key 3 210 vpclmulqdq $0x11, %xmm11, %xmm10, %xmm14// Karatsuba Multiply 211 vpxor %xmm10, %xmm13, %xmm13 212 213 vaesenc %xmm1, %xmm0, %xmm0 214 vmovdqu 0x40(%r8), %xmm1 // key 4 215 vpclmulqdq $0x00, %xmm11, %xmm10, %xmm10 216 vpxor %xmm10, %xmm14, %xmm11 217 218 vaesenc %xmm1, %xmm0, %xmm0 219 vmovdqu 0x50(%r8), %xmm1 // key 5 220 vpclmulqdq $0x00, %xmm12, %xmm13, %xmm13 221 vpxor %xmm11, %xmm13, %xmm13 222 223 vaesenc %xmm1, %xmm0, %xmm0 224 vmovdqu 0x60(%r8), %xmm1 // key 6 225 vpslldq $8, %xmm13, %xmm11 226 vpsrldq $8, %xmm13, %xmm13 227 vpxor %xmm11, %xmm10, %xmm10 228 vpxor %xmm13, %xmm14, %xmm14 229 230 vaesenc %xmm1, %xmm0, %xmm0 231 vmovdqu 0x70(%r8), %xmm1 // key 7 232 vmovdqa 0x10(%r11), %xmm13 // g_poly 233 vpalignr $8, %xmm10, %xmm10, %xmm12 // 1st phase of reduction 234 235 vaesenc %xmm1, %xmm0, %xmm0 236 vmovdqu 0x80(%r8), %xmm1 // key 8 237 vpclmulqdq $0x10, %xmm13, %xmm10, %xmm10 238 vpxor %xmm12, %xmm10, %xmm10 239 240 vaesenc %xmm1, %xmm0, %xmm0 241 vmovdqu 0x90(%r8), %xmm1 // key 9 242 vpalignr $8, %xmm10, %xmm10, %xmm12 // 2nd phase of reduction 243 vpclmulqdq $0x10, %xmm13, %xmm10, %xmm10 244 245 vaesenc %xmm1, %xmm0, %xmm0 246 vpxor %xmm14, %xmm12, %xmm12 247 248 jb .Ldec_ending 249 250 vmovdqu (%r10), %xmm1 // key 10 251 vmovdqu 0x10(%r10), %xmm2 // key 11 252 lea 0x20(%r10), %r10 253 vaesenc %xmm1, %xmm0, %xmm0 254 vaesenc %xmm2, %xmm0, %xmm0 255 256 je .Ldec_ending 257 258 vmovdqu (%r10), %xmm1 // key 12 259 vmovdqu 0x10(%r10), %xmm2 // key 13 260 lea 0x20(%r10), %r10 261 vaesenc %xmm1, %xmm0, %xmm0 262 vaesenc %xmm2, %xmm0, %xmm0 263 264 jmp .Ldec_ending 265 266.Ldec_ending: 267 vmovdqu (%r10), %xmm1 // key last 268 vpxor %xmm12, %xmm10, %xmm10 269 vpxor (%rsi), %xmm1, %xmm1 270 vaesenclast %xmm1, %xmm0, %xmm0 271 vpshufb %xmm15, %xmm10, %xmm10 272 vmovdqu %xmm0, (%rdx) // out 273 lea 0x10(%rsi), %rsi 274 lea 0x10(%rdx), %rdx 275 dec %ecx 276 jnz .Ldec_loop 277 vmovdqu %xmm10, 16(%rdi) // out 278 ret 279.cfi_endproc 280.size AES_GCM_Decrypt16BlockAsm, .-AES_GCM_Decrypt16BlockAsm 281 282.globl AES_GCM_ClearAsm 283.type AES_GCM_ClearAsm, @function 284.balign 32 285AES_GCM_ClearAsm: 286.cfi_startproc 287 vpxor %xmm1, %xmm1, %xmm1 288 vpxor %xmm2, %xmm2, %xmm2 289 vpxor %xmm3, %xmm3, %xmm3 290 vpxor %xmm4, %xmm4, %xmm4 291 vpxor %xmm5, %xmm5, %xmm5 292 vpxor %xmm6, %xmm6, %xmm6 293 vpxor %xmm7, %xmm7, %xmm7 294 vpxor %xmm8, %xmm8, %xmm8 295 vpxor %xmm9, %xmm9, %xmm9 296 vpxor %xmm10, %xmm10, %xmm10 297 vpxor %xmm11, %xmm11, %xmm11 298 vpxor %xmm12, %xmm12, %xmm12 299 ret 300.cfi_endproc 301.size AES_GCM_ClearAsm, .-AES_GCM_ClearAsm 302#endif