1/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64 2 * 3 * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de> 4 * 5 * License: 6 * This code can be distributed under the terms of the GNU General Public 7 * License (GPL) Version 2 provided that the above header down to and 8 * including this sentence is retained in full. 9 */ 10 11.extern crypto_ft_tab 12.extern crypto_it_tab 13.extern crypto_fl_tab 14.extern crypto_il_tab 15 16.text 17 18#include <asm/asm-offsets.h> 19 20#define BASE crypto_tfm_ctx_offset 21 22#define R1 %rax 23#define R1E %eax 24#define R1X %ax 25#define R1H %ah 26#define R1L %al 27#define R2 %rbx 28#define R2E %ebx 29#define R2X %bx 30#define R2H %bh 31#define R2L %bl 32#define R3 %rcx 33#define R3E %ecx 34#define R3X %cx 35#define R3H %ch 36#define R3L %cl 37#define R4 %rdx 38#define R4E %edx 39#define R4X %dx 40#define R4H %dh 41#define R4L %dl 42#define R5 %rsi 43#define R5E %esi 44#define R6 %rdi 45#define R6E %edi 46#define R7 %rbp 47#define R7E %ebp 48#define R8 %r8 49#define R9 %r9 50#define R10 %r10 51#define R11 %r11 52 53#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ 54 .global FUNC; \ 55 .type FUNC,@function; \ 56 .align 8; \ 57FUNC: movq r1,r2; \ 58 movq r3,r4; \ 59 leaq BASE+KEY+48+4(r8),r9; \ 60 movq r10,r11; \ 61 movl (r7),r5 ## E; \ 62 movl 4(r7),r1 ## E; \ 63 movl 8(r7),r6 ## E; \ 64 movl 12(r7),r7 ## E; \ 65 movl BASE+0(r8),r10 ## E; \ 66 xorl -48(r9),r5 ## E; \ 67 xorl -44(r9),r1 ## E; \ 68 xorl -40(r9),r6 ## E; \ 69 xorl -36(r9),r7 ## E; \ 70 cmpl $24,r10 ## E; \ 71 jb B128; \ 72 leaq 32(r9),r9; \ 73 je B192; \ 74 leaq 32(r9),r9; 75 76#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \ 77 movq r1,r2; \ 78 movq r3,r4; \ 79 movl r5 ## E,(r9); \ 80 movl r6 ## E,4(r9); \ 81 movl r7 ## E,8(r9); \ 82 movl r8 ## E,12(r9); \ 83 ret; 84 85#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ 86 movzbl r2 ## H,r5 ## E; \ 87 movzbl r2 ## L,r6 ## E; \ 88 movl TAB+1024(,r5,4),r5 ## E;\ 89 movw r4 ## X,r2 ## X; \ 90 movl TAB(,r6,4),r6 ## E; \ 91 roll $16,r2 ## E; \ 92 shrl $16,r4 ## E; \ 93 movzbl r4 ## H,r7 ## E; \ 94 movzbl r4 ## L,r4 ## E; \ 95 xorl OFFSET(r8),ra ## E; \ 96 xorl OFFSET+4(r8),rb ## E; \ 97 xorl TAB+3072(,r7,4),r5 ## E;\ 98 xorl TAB+2048(,r4,4),r6 ## E;\ 99 movzbl r1 ## L,r7 ## E; \ 100 movzbl r1 ## H,r4 ## E; \ 101 movl TAB+1024(,r4,4),r4 ## E;\ 102 movw r3 ## X,r1 ## X; \ 103 roll $16,r1 ## E; \ 104 shrl $16,r3 ## E; \ 105 xorl TAB(,r7,4),r5 ## E; \ 106 movzbl r3 ## H,r7 ## E; \ 107 movzbl r3 ## L,r3 ## E; \ 108 xorl TAB+3072(,r7,4),r4 ## E;\ 109 xorl TAB+2048(,r3,4),r5 ## E;\ 110 movzbl r1 ## H,r7 ## E; \ 111 movzbl r1 ## L,r3 ## E; \ 112 shrl $16,r1 ## E; \ 113 xorl TAB+3072(,r7,4),r6 ## E;\ 114 movl TAB+2048(,r3,4),r3 ## E;\ 115 movzbl r1 ## H,r7 ## E; \ 116 movzbl r1 ## L,r1 ## E; \ 117 xorl TAB+1024(,r7,4),r6 ## E;\ 118 xorl TAB(,r1,4),r3 ## E; \ 119 movzbl r2 ## H,r1 ## E; \ 120 movzbl r2 ## L,r7 ## E; \ 121 shrl $16,r2 ## E; \ 122 xorl TAB+3072(,r1,4),r3 ## E;\ 123 xorl TAB+2048(,r7,4),r4 ## E;\ 124 movzbl r2 ## H,r1 ## E; \ 125 movzbl r2 ## L,r2 ## E; \ 126 xorl OFFSET+8(r8),rc ## E; \ 127 xorl OFFSET+12(r8),rd ## E; \ 128 xorl TAB+1024(,r1,4),r3 ## E;\ 129 xorl TAB(,r2,4),r4 ## E; 130 131#define move_regs(r1,r2,r3,r4) \ 132 movl r3 ## E,r1 ## E; \ 133 movl r4 ## E,r2 ## E; 134 135#define entry(FUNC,KEY,B128,B192) \ 136 prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) 137 138#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11) 139 140#define encrypt_round(TAB,OFFSET) \ 141 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ 142 move_regs(R1,R2,R5,R6) 143 144#define encrypt_final(TAB,OFFSET) \ 145 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) 146 147#define decrypt_round(TAB,OFFSET) \ 148 round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \ 149 move_regs(R1,R2,R5,R6) 150 151#define decrypt_final(TAB,OFFSET) \ 152 round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) 153 154/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ 155 156 entry(aes_enc_blk,0,enc128,enc192) 157 encrypt_round(crypto_ft_tab,-96) 158 encrypt_round(crypto_ft_tab,-80) 159enc192: encrypt_round(crypto_ft_tab,-64) 160 encrypt_round(crypto_ft_tab,-48) 161enc128: encrypt_round(crypto_ft_tab,-32) 162 encrypt_round(crypto_ft_tab,-16) 163 encrypt_round(crypto_ft_tab, 0) 164 encrypt_round(crypto_ft_tab, 16) 165 encrypt_round(crypto_ft_tab, 32) 166 encrypt_round(crypto_ft_tab, 48) 167 encrypt_round(crypto_ft_tab, 64) 168 encrypt_round(crypto_ft_tab, 80) 169 encrypt_round(crypto_ft_tab, 96) 170 encrypt_final(crypto_fl_tab,112) 171 return 172 173/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ 174 175 entry(aes_dec_blk,240,dec128,dec192) 176 decrypt_round(crypto_it_tab,-96) 177 decrypt_round(crypto_it_tab,-80) 178dec192: decrypt_round(crypto_it_tab,-64) 179 decrypt_round(crypto_it_tab,-48) 180dec128: decrypt_round(crypto_it_tab,-32) 181 decrypt_round(crypto_it_tab,-16) 182 decrypt_round(crypto_it_tab, 0) 183 decrypt_round(crypto_it_tab, 16) 184 decrypt_round(crypto_it_tab, 32) 185 decrypt_round(crypto_it_tab, 48) 186 decrypt_round(crypto_it_tab, 64) 187 decrypt_round(crypto_it_tab, 80) 188 decrypt_round(crypto_it_tab, 96) 189 decrypt_final(crypto_il_tab,112) 190 return 191