1// ------------------------------------------------------------------------- 2// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK. 3// All rights reserved. 4// 5// LICENSE TERMS 6// 7// The free distribution and use of this software in both source and binary 8// form is allowed (with or without changes) provided that: 9// 10// 1. distributions of this source code include the above copyright 11// notice, this list of conditions and the following disclaimer// 12// 13// 2. distributions in binary form include the above copyright 14// notice, this list of conditions and the following disclaimer 15// in the documentation and/or other associated materials// 16// 17// 3. the copyright holder's name is not used to endorse products 18// built using this software without specific written permission. 19// 20// 21// ALTERNATIVELY, provided that this notice is retained in full, this product 22// may be distributed under the terms of the GNU General Public License (GPL), 23// in which case the provisions of the GPL apply INSTEAD OF those given above. 24// 25// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org> 26// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> 27 28// DISCLAIMER 29// 30// This software is provided 'as is' with no explicit or implied warranties 31// in respect of its properties including, but not limited to, correctness 32// and fitness for purpose. 33// ------------------------------------------------------------------------- 34// Issue Date: 29/07/2002 35 36.file "aes-i586-asm.S" 37.text 38 39#include <asm/asm-offsets.h> 40 41#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) 42 43/* offsets to parameters with one register pushed onto stack */ 44#define ctx 8 45#define out_blk 12 46#define in_blk 16 47 48/* offsets in crypto_aes_ctx structure */ 49#define klen (480) 50#define ekey (0) 51#define dkey (240) 52 53// register mapping for encrypt and decrypt subroutines 54 55#define r0 eax 56#define r1 ebx 57#define r2 ecx 58#define r3 edx 59#define r4 esi 60#define r5 edi 61 62#define eaxl al 63#define eaxh ah 64#define ebxl bl 65#define ebxh bh 66#define ecxl cl 67#define ecxh ch 68#define edxl dl 69#define edxh dh 70 71#define _h(reg) reg##h 72#define h(reg) _h(reg) 73 74#define _l(reg) reg##l 75#define l(reg) _l(reg) 76 77// This macro takes a 32-bit word representing a column and uses 78// each of its four bytes to index into four tables of 256 32-bit 79// words to obtain values that are then xored into the appropriate 80// output registers r0, r1, r4 or r5. 81 82// Parameters: 83// table table base address 84// %1 out_state[0] 85// %2 out_state[1] 86// %3 out_state[2] 87// %4 out_state[3] 88// idx input register for the round (destroyed) 89// tmp scratch register for the round 90// sched key schedule 91 92#define do_col(table, a1,a2,a3,a4, idx, tmp) \ 93 movzx %l(idx),%tmp; \ 94 xor table(,%tmp,4),%a1; \ 95 movzx %h(idx),%tmp; \ 96 shr $16,%idx; \ 97 xor table+tlen(,%tmp,4),%a2; \ 98 movzx %l(idx),%tmp; \ 99 movzx %h(idx),%idx; \ 100 xor table+2*tlen(,%tmp,4),%a3; \ 101 xor table+3*tlen(,%idx,4),%a4; 102 103// initialise output registers from the key schedule 104// NB1: original value of a3 is in idx on exit 105// NB2: original values of a1,a2,a4 aren't used 106#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ 107 mov 0 sched,%a1; \ 108 movzx %l(idx),%tmp; \ 109 mov 12 sched,%a2; \ 110 xor table(,%tmp,4),%a1; \ 111 mov 4 sched,%a4; \ 112 movzx %h(idx),%tmp; \ 113 shr $16,%idx; \ 114 xor table+tlen(,%tmp,4),%a2; \ 115 movzx %l(idx),%tmp; \ 116 movzx %h(idx),%idx; \ 117 xor table+3*tlen(,%idx,4),%a4; \ 118 mov %a3,%idx; \ 119 mov 8 sched,%a3; \ 120 xor table+2*tlen(,%tmp,4),%a3; 121 122// initialise output registers from the key schedule 123// NB1: original value of a3 is in idx on exit 124// NB2: original values of a1,a2,a4 aren't used 125#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ 126 mov 0 sched,%a1; \ 127 movzx %l(idx),%tmp; \ 128 mov 4 sched,%a2; \ 129 xor table(,%tmp,4),%a1; \ 130 mov 12 sched,%a4; \ 131 movzx %h(idx),%tmp; \ 132 shr $16,%idx; \ 133 xor table+tlen(,%tmp,4),%a2; \ 134 movzx %l(idx),%tmp; \ 135 movzx %h(idx),%idx; \ 136 xor table+3*tlen(,%idx,4),%a4; \ 137 mov %a3,%idx; \ 138 mov 8 sched,%a3; \ 139 xor table+2*tlen(,%tmp,4),%a3; 140 141 142// original Gladman had conditional saves to MMX regs. 143#define save(a1, a2) \ 144 mov %a2,4*a1(%esp) 145 146#define restore(a1, a2) \ 147 mov 4*a2(%esp),%a1 148 149// These macros perform a forward encryption cycle. They are entered with 150// the first previous round column values in r0,r1,r4,r5 and 151// exit with the final values in the same registers, using stack 152// for temporary storage. 153 154// round column values 155// on entry: r0,r1,r4,r5 156// on exit: r2,r1,r4,r5 157#define fwd_rnd1(arg, table) \ 158 save (0,r1); \ 159 save (1,r5); \ 160 \ 161 /* compute new column values */ \ 162 do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ 163 do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ 164 restore(r0,0); \ 165 do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ 166 restore(r0,1); \ 167 do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ 168 169// round column values 170// on entry: r2,r1,r4,r5 171// on exit: r0,r1,r4,r5 172#define fwd_rnd2(arg, table) \ 173 save (0,r1); \ 174 save (1,r5); \ 175 \ 176 /* compute new column values */ \ 177 do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ 178 do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ 179 restore(r2,0); \ 180 do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ 181 restore(r2,1); \ 182 do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ 183 184// These macros performs an inverse encryption cycle. They are entered with 185// the first previous round column values in r0,r1,r4,r5 and 186// exit with the final values in the same registers, using stack 187// for temporary storage 188 189// round column values 190// on entry: r0,r1,r4,r5 191// on exit: r2,r1,r4,r5 192#define inv_rnd1(arg, table) \ 193 save (0,r1); \ 194 save (1,r5); \ 195 \ 196 /* compute new column values */ \ 197 do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ 198 do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ 199 restore(r0,0); \ 200 do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ 201 restore(r0,1); \ 202 do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ 203 204// round column values 205// on entry: r2,r1,r4,r5 206// on exit: r0,r1,r4,r5 207#define inv_rnd2(arg, table) \ 208 save (0,r1); \ 209 save (1,r5); \ 210 \ 211 /* compute new column values */ \ 212 do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ 213 do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ 214 restore(r2,0); \ 215 do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ 216 restore(r2,1); \ 217 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ 218 219// AES (Rijndael) Encryption Subroutine 220/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ 221 222.global aes_enc_blk 223 224.extern crypto_ft_tab 225.extern crypto_fl_tab 226 227.align 4 228 229aes_enc_blk: 230 push %ebp 231 mov ctx(%esp),%ebp 232 233// CAUTION: the order and the values used in these assigns 234// rely on the register mappings 235 2361: push %ebx 237 mov in_blk+4(%esp),%r2 238 push %esi 239 mov klen(%ebp),%r3 // key size 240 push %edi 241#if ekey != 0 242 lea ekey(%ebp),%ebp // key pointer 243#endif 244 245// input four columns and xor in first round key 246 247 mov (%r2),%r0 248 mov 4(%r2),%r1 249 mov 8(%r2),%r4 250 mov 12(%r2),%r5 251 xor (%ebp),%r0 252 xor 4(%ebp),%r1 253 xor 8(%ebp),%r4 254 xor 12(%ebp),%r5 255 256 sub $8,%esp // space for register saves on stack 257 add $16,%ebp // increment to next round key 258 cmp $24,%r3 259 jb 4f // 10 rounds for 128-bit key 260 lea 32(%ebp),%ebp 261 je 3f // 12 rounds for 192-bit key 262 lea 32(%ebp),%ebp 263 2642: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key 265 fwd_rnd2( -48(%ebp), crypto_ft_tab) 2663: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key 267 fwd_rnd2( -16(%ebp), crypto_ft_tab) 2684: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key 269 fwd_rnd2( +16(%ebp), crypto_ft_tab) 270 fwd_rnd1( +32(%ebp), crypto_ft_tab) 271 fwd_rnd2( +48(%ebp), crypto_ft_tab) 272 fwd_rnd1( +64(%ebp), crypto_ft_tab) 273 fwd_rnd2( +80(%ebp), crypto_ft_tab) 274 fwd_rnd1( +96(%ebp), crypto_ft_tab) 275 fwd_rnd2(+112(%ebp), crypto_ft_tab) 276 fwd_rnd1(+128(%ebp), crypto_ft_tab) 277 fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table 278 279// move final values to the output array. CAUTION: the 280// order of these assigns rely on the register mappings 281 282 add $8,%esp 283 mov out_blk+12(%esp),%ebp 284 mov %r5,12(%ebp) 285 pop %edi 286 mov %r4,8(%ebp) 287 pop %esi 288 mov %r1,4(%ebp) 289 pop %ebx 290 mov %r0,(%ebp) 291 pop %ebp 292 ret 293 294// AES (Rijndael) Decryption Subroutine 295/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ 296 297.global aes_dec_blk 298 299.extern crypto_it_tab 300.extern crypto_il_tab 301 302.align 4 303 304aes_dec_blk: 305 push %ebp 306 mov ctx(%esp),%ebp 307 308// CAUTION: the order and the values used in these assigns 309// rely on the register mappings 310 3111: push %ebx 312 mov in_blk+4(%esp),%r2 313 push %esi 314 mov klen(%ebp),%r3 // key size 315 push %edi 316#if dkey != 0 317 lea dkey(%ebp),%ebp // key pointer 318#endif 319 320// input four columns and xor in first round key 321 322 mov (%r2),%r0 323 mov 4(%r2),%r1 324 mov 8(%r2),%r4 325 mov 12(%r2),%r5 326 xor (%ebp),%r0 327 xor 4(%ebp),%r1 328 xor 8(%ebp),%r4 329 xor 12(%ebp),%r5 330 331 sub $8,%esp // space for register saves on stack 332 add $16,%ebp // increment to next round key 333 cmp $24,%r3 334 jb 4f // 10 rounds for 128-bit key 335 lea 32(%ebp),%ebp 336 je 3f // 12 rounds for 192-bit key 337 lea 32(%ebp),%ebp 338 3392: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key 340 inv_rnd2( -48(%ebp), crypto_it_tab) 3413: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key 342 inv_rnd2( -16(%ebp), crypto_it_tab) 3434: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key 344 inv_rnd2( +16(%ebp), crypto_it_tab) 345 inv_rnd1( +32(%ebp), crypto_it_tab) 346 inv_rnd2( +48(%ebp), crypto_it_tab) 347 inv_rnd1( +64(%ebp), crypto_it_tab) 348 inv_rnd2( +80(%ebp), crypto_it_tab) 349 inv_rnd1( +96(%ebp), crypto_it_tab) 350 inv_rnd2(+112(%ebp), crypto_it_tab) 351 inv_rnd1(+128(%ebp), crypto_it_tab) 352 inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table 353 354// move final values to the output array. CAUTION: the 355// order of these assigns rely on the register mappings 356 357 add $8,%esp 358 mov out_blk+12(%esp),%ebp 359 mov %r5,12(%ebp) 360 pop %edi 361 mov %r4,8(%ebp) 362 pop %esi 363 mov %r1,4(%ebp) 364 pop %ebx 365 mov %r0,(%ebp) 366 pop %ebp 367 ret 368