1/* 2 * This file is part of the openHiTLS project. 3 * 4 * openHiTLS is licensed under the Mulan PSL v2. 5 * You can use this software according to the terms and conditions of the Mulan PSL v2. 6 * You may obtain a copy of Mulan PSL v2 at: 7 * 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * See the Mulan PSL v2 for more details. 14 */ 15 16#include "hitls_build.h" 17#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_CCM) 18 19.text 20 21.balign 16 22g_byteSwapMask: 23.byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 24.byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 25.size g_byteSwapMask, .-g_byteSwapMask 26.balign 16 27g_one: 28.byte 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 29.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 30.size g_one, .-g_one 31 32/* 33 * void AesCcmEncryptAsm(void *key, uint8_t *nonce, const uint8_t *in, uint8_t *out, uint32_t len) 34 * rdi *key 35 * rsi *nonce 36 * rdx *in 37 * rcx *out 38 * r8 len 39 */ 40.globl AesCcmEncryptAsm 41.type AesCcmEncryptAsm, @function 42.balign 16 43AesCcmEncryptAsm: 44.cfi_startproc 45 shr $4, %r8d // loop times 46 jz .Lenc_ret 47 lea g_byteSwapMask(%rip), %r11 48 mov 0xf0(%rdi), %r9d // key->rounds 49 vmovdqa (%r11), %xmm15 // g_byteSwapMask 50 sub $1, %r9d 51 vmovdqa 0x10(%r11), %xmm14 // g_one 52 vmovdqu (%rsi), %xmm0 // nonce(counter) 53 vmovdqu 0x10(%rsi), %xmm8 // tag 54 vmovdqu 0x20(%rsi), %xmm9 // last 55.balign 16 56.Lenc_outer_loop: 57 mov %r9d, %r10d 58 vpxor (%rdx), %xmm8, %xmm8 // in ^ tag = tag 59 vmovdqu (%rdi), %xmm1 // key0 60 lea 0x10(%rdi), %r11 // &key + 1 61 vpxor %xmm0, %xmm1, %xmm2 // first round xor(aes-ctr) 62 vpshufb %xmm15, %xmm0, %xmm0 // reverse byte order of nonce => nonce' 63 vpxor %xmm8, %xmm1, %xmm3 // first round xor(aes-cmac) 64.balign 16 65.Lenc_aes_loop: 66 vmovdqu (%r11), %xmm1 67 vaesenc %xmm1, %xmm2, %xmm2 68 vaesenc %xmm1, %xmm3, %xmm3 69 lea 0x10(%r11), %r11 // to next key ptr 70 dec %r10d 71 jnz .Lenc_aes_loop 72 vmovdqu (%r11), %xmm1 // get the last key 73 vpaddq %xmm14, %xmm0, %xmm0 // nonce' + 1 74 vaesenclast %xmm1, %xmm2, %xmm9 75 vaesenclast %xmm1, %xmm3, %xmm8 76 vpxor (%rdx), %xmm9, %xmm2 // in ^ last = out 77 vpshufb %xmm15, %xmm0, %xmm0 // reverse byte order of nonce' => nonce 78 lea 0x10(%rdx), %rdx // go to next ptr 79 vmovdqu %xmm2, (%rcx) // out out 80 81 lea 0x10(%rcx), %rcx // go to next ptr 82 dec %r8d 83 jnz .Lenc_outer_loop 84 vpxor %xmm1, %xmm1, %xmm1 85 vpxor %xmm2, %xmm2, %xmm2 86 vpxor %xmm3, %xmm3, %xmm3 87 vmovdqu %xmm0, (%rsi) // out nonce 88 vpxor %xmm0, %xmm0, %xmm0 89 vmovdqu %xmm8, 0x10(%rsi) // out tag 90 vpxor %xmm8, %xmm8, %xmm8 91 vmovdqu %xmm9, 0x20(%rsi) // out last 92 vpxor %xmm9, %xmm9, %xmm9 93.Lenc_ret: 94 ret 95.cfi_endproc 96.size AesCcmEncryptAsm, .-AesCcmEncryptAsm 97 98/* 99 * void AesCcmDecryptAsm(void *key, uint8_t *nonce, const uint8_t *in, uint8_t *out, uint32_t len) 100 * rdi *key 101 * rsi *nonce 102 * rdx *in 103 * rcx *out 104 * r8 len 105 */ 106.globl AesCcmDecryptAsm 107.type AesCcmDecryptAsm, @function 108.balign 16 109AesCcmDecryptAsm: 110.cfi_startproc 111 shr $4, %r8d // loop times 112 jz .Ldec_ret 113 lea g_byteSwapMask(%rip), %r11 114 mov 0xf0(%rdi), %r9d // key->rounds 115 vmovdqa (%r11), %xmm15 // g_byteSwapMask 116 sub $1, %r9d 117 vmovdqa 0x10(%r11), %xmm14 // g_one 118 vmovdqu (%rsi), %xmm0 // nonce(counter) 119 vmovdqu 0x10(%rsi), %xmm8 // tag 120 121.balign 16 122.Ldec_outer_loop: 123 mov %r9d, %r10d 124 lea 0x10(%rdi), %r11 // &key 125 vmovdqu (%rdi), %xmm1 // key0 126 vpxor %xmm0, %xmm1, %xmm2 // first round xor(aes-ctr) 127.Ldec_aes_loop: 128 vmovdqu (%r11), %xmm1 129 vaesenc %xmm1, %xmm2, %xmm2 130 lea 0x10(%r11), %r11 131 dec %r10d 132 jnz .Ldec_aes_loop 133 vmovdqu (%r11), %xmm1 134 vaesenclast %xmm1, %xmm2, %xmm4 135 vmovdqu %xmm4, 0x20(%rsi) // out last 136 vpxor (%rdx), %xmm4, %xmm2 // in ^ last = out 137 vpxor %xmm2, %xmm8, %xmm8 // out ^ tag = tag 138 vmovdqu %xmm2, (%rcx) // out out 139 lea 0x10(%rdx), %rdx 140 lea 0x10(%rcx), %rcx 141 vpshufb %xmm15, %xmm0, %xmm0 // reverse byte order of nonce => nonce' 142 vpaddq %xmm14, %xmm0, %xmm0 // nonce' + 1 143 vpshufb %xmm15, %xmm0, %xmm0 // reverse byte order of nonce' => nonce 144 cmp $2, %r8d 145 jb .Ldec_parallel_out 146 147.Ldec_parallel_loop: 148 mov %r9d, %r10d 149 lea 0x10(%rdi), %r11 // &key 150 vmovdqu (%rdi), %xmm1 // key0 151 vpxor %xmm0, %xmm1, %xmm2 // first round xor(aes-ctr) 152 vpxor %xmm8, %xmm1, %xmm3 // first round xor(aes-cmac) 153.Ldec_parallel_inner_loop: 154 vmovdqu (%r11), %xmm1 155 vaesenc %xmm1, %xmm2, %xmm2 156 lea 0x10(%r11), %r11 157 vaesenc %xmm1, %xmm3, %xmm3 158 dec %r10d 159 jnz .Ldec_parallel_inner_loop 160 vmovdqu (%r11), %xmm1 161 vaesenclast %xmm1, %xmm2, %xmm4 162 vaesenclast %xmm1, %xmm3, %xmm8 163 vmovdqu %xmm4, 0x20(%rsi) // out last 164 vpxor (%rdx), %xmm4, %xmm2 // in ^ last = out 165 vpxor %xmm2, %xmm8, %xmm8 // out ^ tag = tag 166 vmovdqu %xmm2, (%rcx) // out out 167 lea 0x10(%rdx), %rdx 168 lea 0x10(%rcx), %rcx 169 vpshufb %xmm15, %xmm0, %xmm0 // reverse byte order of nonce => nonce' 170 vpaddq %xmm14, %xmm0, %xmm0 // nonce' + 1 171 vpshufb %xmm15, %xmm0, %xmm0 // reverse byte order of nonce' => nonce 172 dec %r8d 173 cmp $2, %r8d 174 jae .Ldec_parallel_loop 175 176.Ldec_parallel_out: 177 mov %r9d, %r10d 178 lea 0x10(%rdi), %r11 // &key 179 vmovdqu (%rdi), %xmm1 // key0 180 vpxor %xmm8, %xmm1, %xmm3 // first round xor(aes-cmac) 181.Ldec_aes_loop_1: 182 vmovdqu (%r11), %xmm1 183 vaesenc %xmm1, %xmm3, %xmm3 184 lea 0x10(%r11), %r11 185 dec %r10d 186 jnz .Ldec_aes_loop_1 187 vmovdqu (%r11), %xmm1 188 vaesenclast %xmm1, %xmm3, %xmm8 189 dec %r8d 190 jnz .Ldec_outer_loop 191 192 vmovdqu %xmm0, (%rsi) // out nonce 193 vpxor %xmm0, %xmm0, %xmm0 194 vpxor %xmm1, %xmm1, %xmm1 195 vpxor %xmm2, %xmm2, %xmm2 196 vmovdqu %xmm8, 0x10(%rsi) // out tag 197 vpxor %xmm8, %xmm8, %xmm8 198 vpxor %xmm3, %xmm3, %xmm3 199 vpxor %xmm4, %xmm4, %xmm4 200.Ldec_ret: 201 ret 202.cfi_endproc 203.size AesCcmDecryptAsm, .-AesCcmDecryptAsm 204#endif 205