• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ---------------------------------------------------------------------------
3  Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
4 
5  LICENSE TERMS
6 
7  The redistribution and use of this software (with or without changes)
8  is allowed without the payment of fees or royalties provided that:
9 
10   1. source code distributions include the above copyright notice, this
11      list of conditions and the following disclaimer;
12 
13   2. binary distributions include the above copyright notice, this list
14      of conditions and the following disclaimer in their documentation;
15 
16   3. the name of the copyright holder is not used to endorse products
17      built using this software without specific written permission.
18 
19  DISCLAIMER
20 
21  This software is provided 'as is' with no explicit or implied warranties
22  in respect of its properties, including, but not limited to, correctness
23  and/or fitness for purpose.
24  ---------------------------------------------------------------------------
25  Issue 09/09/2006
26 
27  This is an AES implementation that uses only 8-bit byte operations on the
28  cipher state (there are options to use 32-bit types if available).
29 
30  The combination of mix columns and byte substitution used here is based on
31  that developed by Karl Malbrain. His contribution is acknowledged.
32  */
33 
34 /* define if you have a fast memcpy function on your system */
35 #if 1
36 #  define HAVE_MEMCPY
37 #  include <string.h>
38 #if 0
39 #  if defined( _MSC_VER )
40 #    include <intrin.h>
41 #    pragma intrinsic( memcpy )
42 #  endif
43 #endif
44 #endif
45 
46 #include <stdlib.h>
47 
48 /* define if you have fast 32-bit types on your system */
49 #if 1
50 #  define HAVE_UINT_32T
51 #endif
52 
53 /* define if you don't want any tables */
54 #if 1
55 #  define USE_TABLES
56 #endif
57 
58 /*  On Intel Core 2 duo VERSION_1 is faster */
59 
60 /* alternative versions (test for performance on your system) */
61 #if 1
62 #  define VERSION_1
63 #endif
64 
65 #include "aes.h"
66 
67 #if defined( HAVE_UINT_32T )
68   typedef unsigned long uint_32t;
69 #endif
70 
71 /* functions for finite field multiplication in the AES Galois field    */
72 
73 #define WPOLY   0x011b
74 #define BPOLY     0x1b
75 #define DPOLY   0x008d
76 
77 #define f1(x)   (x)
78 #define f2(x)   ((x << 1) ^ (((x >> 7) & 1) * WPOLY))
79 #define f4(x)   ((x << 2) ^ (((x >> 6) & 1) * WPOLY) ^ (((x >> 6) & 2) * WPOLY))
80 #define f8(x)   ((x << 3) ^ (((x >> 5) & 1) * WPOLY) ^ (((x >> 5) & 2) * WPOLY) \
81                           ^ (((x >> 5) & 4) * WPOLY))
82 #define d2(x)   (((x) >> 1) ^ ((x) & 1 ? DPOLY : 0))
83 
84 #define f3(x)   (f2(x) ^ x)
85 #define f9(x)   (f8(x) ^ x)
86 #define fb(x)   (f8(x) ^ f2(x) ^ x)
87 #define fd(x)   (f8(x) ^ f4(x) ^ x)
88 #define fe(x)   (f8(x) ^ f4(x) ^ f2(x))
89 
90 #if defined( USE_TABLES )
91 
92 #define sb_data(w) {    /* S Box data values */                            \
93     w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
94     w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
95     w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
96     w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
97     w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
98     w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
99     w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
100     w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
101     w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
102     w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
103     w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
104     w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
105     w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
106     w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
107     w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
108     w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
109     w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
110     w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
111     w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
112     w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
113     w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
114     w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
115     w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
116     w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
117     w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
118     w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
119     w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
120     w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
121     w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
122     w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
123     w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
124     w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
125 
126 #define isb_data(w) {   /* inverse S Box data values */                    \
127     w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
128     w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
129     w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
130     w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
131     w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
132     w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
133     w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
134     w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
135     w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
136     w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
137     w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
138     w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
139     w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
140     w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
141     w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
142     w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
143     w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
144     w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
145     w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
146     w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
147     w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
148     w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
149     w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
150     w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
151     w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
152     w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
153     w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
154     w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
155     w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
156     w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
157     w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
158     w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) }
159 
160 #define mm_data(w) {    /* basic data for forming finite field tables */   \
161     w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
162     w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
163     w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
164     w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
165     w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
166     w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
167     w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
168     w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
169     w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
170     w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
171     w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
172     w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
173     w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
174     w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
175     w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
176     w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
177     w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
178     w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
179     w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
180     w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
181     w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
182     w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
183     w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
184     w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
185     w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
186     w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
187     w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
188     w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
189     w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
190     w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
191     w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
192     w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) }
193 
194 static const uint_8t sbox[256]  =  sb_data(f1);
195 static const uint_8t isbox[256] = isb_data(f1);
196 
197 static const uint_8t gfm2_sbox[256] = sb_data(f2);
198 static const uint_8t gfm3_sbox[256] = sb_data(f3);
199 
200 static const uint_8t gfmul_9[256] = mm_data(f9);
201 static const uint_8t gfmul_b[256] = mm_data(fb);
202 static const uint_8t gfmul_d[256] = mm_data(fd);
203 static const uint_8t gfmul_e[256] = mm_data(fe);
204 
205 #define s_box(x)     sbox[(x)]
206 #define is_box(x)    isbox[(x)]
207 #define gfm2_sb(x)   gfm2_sbox[(x)]
208 #define gfm3_sb(x)   gfm3_sbox[(x)]
209 #define gfm_9(x)     gfmul_9[(x)]
210 #define gfm_b(x)     gfmul_b[(x)]
211 #define gfm_d(x)     gfmul_d[(x)]
212 #define gfm_e(x)     gfmul_e[(x)]
213 
214 #else
215 
216 /* this is the high bit of x right shifted by 1 */
217 /* position. Since the starting polynomial has  */
218 /* 9 bits (0x11b), this right shift keeps the   */
219 /* values of all top bits within a byte         */
220 
hibit(const uint_8t x)221 static uint_8t hibit(const uint_8t x)
222 {   uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
223 
224     r |= (r >> 2);
225     r |= (r >> 4);
226     return (r + 1) >> 1;
227 }
228 
229 /* return the inverse of the finite field element x */
230 
gf_inv(const uint_8t x)231 static uint_8t gf_inv(const uint_8t x)
232 {   uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
233 
234     if(x < 2)
235         return x;
236 
237     for( ; ; )
238     {
239         if(n1)
240             while(n2 >= n1)             /* divide polynomial p2 by p1    */
241             {
242                 n2 /= n1;               /* shift smaller polynomial left */
243                 p2 ^= (p1 * n2) & 0xff; /* and remove from larger one    */
244                 v2 ^= (v1 * n2);        /* shift accumulated value and   */
245                 n2 = hibit(p2);         /* add into result               */
246             }
247         else
248             return v1;
249 
250         if(n2)                          /* repeat with values swapped    */
251             while(n1 >= n2)
252             {
253                 n1 /= n2;
254                 p1 ^= p2 * n1;
255                 v1 ^= v2 * n1;
256                 n1 = hibit(p1);
257             }
258         else
259             return v2;
260     }
261 }
262 
263 /* The forward and inverse affine transformations used in the S-box */
fwd_affine(const uint_8t x)264 uint_8t fwd_affine(const uint_8t x)
265 {
266 #if defined( HAVE_UINT_32T )
267     uint_32t w = x;
268     w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
269     return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
270 #else
271     return 0x63 ^ x ^ (x << 1) ^ (x << 2) ^ (x << 3) ^ (x << 4)
272                     ^ (x >> 7) ^ (x >> 6) ^ (x >> 5) ^ (x >> 4);
273 #endif
274 }
275 
inv_affine(const uint_8t x)276 uint_8t inv_affine(const uint_8t x)
277 {
278 #if defined( HAVE_UINT_32T )
279     uint_32t w = x;
280     w = (w << 1) ^ (w << 3) ^ (w << 6);
281     return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
282 #else
283     return 0x05 ^ (x << 1) ^ (x << 3) ^ (x << 6)
284                 ^ (x >> 7) ^ (x >> 5) ^ (x >> 2);
285 #endif
286 }
287 
288 #define s_box(x)   fwd_affine(gf_inv(x))
289 #define is_box(x)  gf_inv(inv_affine(x))
290 #define gfm2_sb(x) f2(s_box(x))
291 #define gfm3_sb(x) f3(s_box(x))
292 #define gfm_9(x)   f9(x)
293 #define gfm_b(x)   fb(x)
294 #define gfm_d(x)   fd(x)
295 #define gfm_e(x)   fe(x)
296 
297 #endif
298 
299 #if defined( HAVE_MEMCPY )
300 #  define block_copy_nn(d, s, l)    memcpy(d, s, l)
301 #  define block_copy(d, s)          memcpy(d, s, N_BLOCK)
302 #else
303 #  define block_copy_nn(d, s, l)    copy_block_nn(d, s, l)
304 #  define block_copy(d, s)          copy_block(d, s)
305 #endif
306 
307 #if !defined( HAVE_MEMCPY )
copy_block(void * d,const void * s)308 static void copy_block( void *d, const void *s )
309 {
310 #if defined( HAVE_UINT_32T )
311     ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0];
312     ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1];
313     ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2];
314     ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3];
315 #else
316     ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0];
317     ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1];
318     ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2];
319     ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3];
320     ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4];
321     ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5];
322     ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6];
323     ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7];
324     ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8];
325     ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9];
326     ((uint_8t*)d)[10] = ((uint_8t*)s)[10];
327     ((uint_8t*)d)[11] = ((uint_8t*)s)[11];
328     ((uint_8t*)d)[12] = ((uint_8t*)s)[12];
329     ((uint_8t*)d)[13] = ((uint_8t*)s)[13];
330     ((uint_8t*)d)[14] = ((uint_8t*)s)[14];
331     ((uint_8t*)d)[15] = ((uint_8t*)s)[15];
332 #endif
333 }
334 
copy_block_nn(void * d,const void * s,uint_8t nn)335 static void copy_block_nn( void * d, const void *s, uint_8t nn )
336 {
337     while( nn-- )
338         *((uint_8t*)d)++ = *((uint_8t*)s)++;
339 }
340 #endif
341 
xor_block(void * d,const void * s)342 static void xor_block( void *d, const void *s )
343 {
344 #if defined( HAVE_UINT_32T )
345     ((uint_32t*)d)[ 0] ^= ((uint_32t*)s)[ 0];
346     ((uint_32t*)d)[ 1] ^= ((uint_32t*)s)[ 1];
347     ((uint_32t*)d)[ 2] ^= ((uint_32t*)s)[ 2];
348     ((uint_32t*)d)[ 3] ^= ((uint_32t*)s)[ 3];
349 #else
350     ((uint_8t*)d)[ 0] ^= ((uint_8t*)s)[ 0];
351     ((uint_8t*)d)[ 1] ^= ((uint_8t*)s)[ 1];
352     ((uint_8t*)d)[ 2] ^= ((uint_8t*)s)[ 2];
353     ((uint_8t*)d)[ 3] ^= ((uint_8t*)s)[ 3];
354     ((uint_8t*)d)[ 4] ^= ((uint_8t*)s)[ 4];
355     ((uint_8t*)d)[ 5] ^= ((uint_8t*)s)[ 5];
356     ((uint_8t*)d)[ 6] ^= ((uint_8t*)s)[ 6];
357     ((uint_8t*)d)[ 7] ^= ((uint_8t*)s)[ 7];
358     ((uint_8t*)d)[ 8] ^= ((uint_8t*)s)[ 8];
359     ((uint_8t*)d)[ 9] ^= ((uint_8t*)s)[ 9];
360     ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10];
361     ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11];
362     ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12];
363     ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13];
364     ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14];
365     ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15];
366 #endif
367 }
368 
copy_and_key(void * d,const void * s,const void * k)369 static void copy_and_key( void *d, const void *s, const void *k )
370 {
371 #if defined( HAVE_UINT_32T )
372     ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0] ^ ((uint_32t*)k)[ 0];
373     ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1] ^ ((uint_32t*)k)[ 1];
374     ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2] ^ ((uint_32t*)k)[ 2];
375     ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3] ^ ((uint_32t*)k)[ 3];
376 #elif 1
377     ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0] ^ ((uint_8t*)k)[ 0];
378     ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1] ^ ((uint_8t*)k)[ 1];
379     ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2] ^ ((uint_8t*)k)[ 2];
380     ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3] ^ ((uint_8t*)k)[ 3];
381     ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4] ^ ((uint_8t*)k)[ 4];
382     ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5] ^ ((uint_8t*)k)[ 5];
383     ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6] ^ ((uint_8t*)k)[ 6];
384     ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7] ^ ((uint_8t*)k)[ 7];
385     ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8] ^ ((uint_8t*)k)[ 8];
386     ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9] ^ ((uint_8t*)k)[ 9];
387     ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10];
388     ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11];
389     ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12];
390     ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13];
391     ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14];
392     ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15];
393 #else
394     block_copy(d, s);
395     xor_block(d, k);
396 #endif
397 }
398 
add_round_key(uint_8t d[N_BLOCK],const uint_8t k[N_BLOCK])399 static void add_round_key( uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK] )
400 {
401     xor_block(d, k);
402 }
403 
shift_sub_rows(uint_8t st[N_BLOCK])404 static void shift_sub_rows( uint_8t st[N_BLOCK] )
405 {   uint_8t tt;
406 
407     st[ 0] = s_box(st[ 0]); st[ 4] = s_box(st[ 4]);
408     st[ 8] = s_box(st[ 8]); st[12] = s_box(st[12]);
409 
410     tt = st[1]; st[ 1] = s_box(st[ 5]); st[ 5] = s_box(st[ 9]);
411     st[ 9] = s_box(st[13]); st[13] = s_box( tt );
412 
413     tt = st[2]; st[ 2] = s_box(st[10]); st[10] = s_box( tt );
414     tt = st[6]; st[ 6] = s_box(st[14]); st[14] = s_box( tt );
415 
416     tt = st[15]; st[15] = s_box(st[11]); st[11] = s_box(st[ 7]);
417     st[ 7] = s_box(st[ 3]); st[ 3] = s_box( tt );
418 }
419 
inv_shift_sub_rows(uint_8t st[N_BLOCK])420 static void inv_shift_sub_rows( uint_8t st[N_BLOCK] )
421 {   uint_8t tt;
422 
423     st[ 0] = is_box(st[ 0]); st[ 4] = is_box(st[ 4]);
424     st[ 8] = is_box(st[ 8]); st[12] = is_box(st[12]);
425 
426     tt = st[13]; st[13] = is_box(st[9]); st[ 9] = is_box(st[5]);
427     st[ 5] = is_box(st[1]); st[ 1] = is_box( tt );
428 
429     tt = st[2]; st[ 2] = is_box(st[10]); st[10] = is_box( tt );
430     tt = st[6]; st[ 6] = is_box(st[14]); st[14] = is_box( tt );
431 
432     tt = st[3]; st[ 3] = is_box(st[ 7]); st[ 7] = is_box(st[11]);
433     st[11] = is_box(st[15]); st[15] = is_box( tt );
434 }
435 
436 #if defined( VERSION_1 )
mix_sub_columns(uint_8t dt[N_BLOCK])437   static void mix_sub_columns( uint_8t dt[N_BLOCK] )
438   { uint_8t st[N_BLOCK];
439     block_copy(st, dt);
440 #else
441   static void mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] )
442   {
443 #endif
444     dt[ 0] = gfm2_sb(st[0]) ^ gfm3_sb(st[5]) ^ s_box(st[10]) ^ s_box(st[15]);
445     dt[ 1] = s_box(st[0]) ^ gfm2_sb(st[5]) ^ gfm3_sb(st[10]) ^ s_box(st[15]);
446     dt[ 2] = s_box(st[0]) ^ s_box(st[5]) ^ gfm2_sb(st[10]) ^ gfm3_sb(st[15]);
447     dt[ 3] = gfm3_sb(st[0]) ^ s_box(st[5]) ^ s_box(st[10]) ^ gfm2_sb(st[15]);
448 
449     dt[ 4] = gfm2_sb(st[4]) ^ gfm3_sb(st[9]) ^ s_box(st[14]) ^ s_box(st[3]);
450     dt[ 5] = s_box(st[4]) ^ gfm2_sb(st[9]) ^ gfm3_sb(st[14]) ^ s_box(st[3]);
451     dt[ 6] = s_box(st[4]) ^ s_box(st[9]) ^ gfm2_sb(st[14]) ^ gfm3_sb(st[3]);
452     dt[ 7] = gfm3_sb(st[4]) ^ s_box(st[9]) ^ s_box(st[14]) ^ gfm2_sb(st[3]);
453 
454     dt[ 8] = gfm2_sb(st[8]) ^ gfm3_sb(st[13]) ^ s_box(st[2]) ^ s_box(st[7]);
455     dt[ 9] = s_box(st[8]) ^ gfm2_sb(st[13]) ^ gfm3_sb(st[2]) ^ s_box(st[7]);
456     dt[10] = s_box(st[8]) ^ s_box(st[13]) ^ gfm2_sb(st[2]) ^ gfm3_sb(st[7]);
457     dt[11] = gfm3_sb(st[8]) ^ s_box(st[13]) ^ s_box(st[2]) ^ gfm2_sb(st[7]);
458 
459     dt[12] = gfm2_sb(st[12]) ^ gfm3_sb(st[1]) ^ s_box(st[6]) ^ s_box(st[11]);
460     dt[13] = s_box(st[12]) ^ gfm2_sb(st[1]) ^ gfm3_sb(st[6]) ^ s_box(st[11]);
461     dt[14] = s_box(st[12]) ^ s_box(st[1]) ^ gfm2_sb(st[6]) ^ gfm3_sb(st[11]);
462     dt[15] = gfm3_sb(st[12]) ^ s_box(st[1]) ^ s_box(st[6]) ^ gfm2_sb(st[11]);
463   }
464 
465 #if defined( VERSION_1 )
466   static void inv_mix_sub_columns( uint_8t dt[N_BLOCK] )
467   { uint_8t st[N_BLOCK];
468     block_copy(st, dt);
469 #else
470   static void inv_mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] )
471   {
472 #endif
473     dt[ 0] = is_box(gfm_e(st[ 0]) ^ gfm_b(st[ 1]) ^ gfm_d(st[ 2]) ^ gfm_9(st[ 3]));
474     dt[ 5] = is_box(gfm_9(st[ 0]) ^ gfm_e(st[ 1]) ^ gfm_b(st[ 2]) ^ gfm_d(st[ 3]));
475     dt[10] = is_box(gfm_d(st[ 0]) ^ gfm_9(st[ 1]) ^ gfm_e(st[ 2]) ^ gfm_b(st[ 3]));
476     dt[15] = is_box(gfm_b(st[ 0]) ^ gfm_d(st[ 1]) ^ gfm_9(st[ 2]) ^ gfm_e(st[ 3]));
477 
478     dt[ 4] = is_box(gfm_e(st[ 4]) ^ gfm_b(st[ 5]) ^ gfm_d(st[ 6]) ^ gfm_9(st[ 7]));
479     dt[ 9] = is_box(gfm_9(st[ 4]) ^ gfm_e(st[ 5]) ^ gfm_b(st[ 6]) ^ gfm_d(st[ 7]));
480     dt[14] = is_box(gfm_d(st[ 4]) ^ gfm_9(st[ 5]) ^ gfm_e(st[ 6]) ^ gfm_b(st[ 7]));
481     dt[ 3] = is_box(gfm_b(st[ 4]) ^ gfm_d(st[ 5]) ^ gfm_9(st[ 6]) ^ gfm_e(st[ 7]));
482 
483     dt[ 8] = is_box(gfm_e(st[ 8]) ^ gfm_b(st[ 9]) ^ gfm_d(st[10]) ^ gfm_9(st[11]));
484     dt[13] = is_box(gfm_9(st[ 8]) ^ gfm_e(st[ 9]) ^ gfm_b(st[10]) ^ gfm_d(st[11]));
485     dt[ 2] = is_box(gfm_d(st[ 8]) ^ gfm_9(st[ 9]) ^ gfm_e(st[10]) ^ gfm_b(st[11]));
486     dt[ 7] = is_box(gfm_b(st[ 8]) ^ gfm_d(st[ 9]) ^ gfm_9(st[10]) ^ gfm_e(st[11]));
487 
488     dt[12] = is_box(gfm_e(st[12]) ^ gfm_b(st[13]) ^ gfm_d(st[14]) ^ gfm_9(st[15]));
489     dt[ 1] = is_box(gfm_9(st[12]) ^ gfm_e(st[13]) ^ gfm_b(st[14]) ^ gfm_d(st[15]));
490     dt[ 6] = is_box(gfm_d(st[12]) ^ gfm_9(st[13]) ^ gfm_e(st[14]) ^ gfm_b(st[15]));
491     dt[11] = is_box(gfm_b(st[12]) ^ gfm_d(st[13]) ^ gfm_9(st[14]) ^ gfm_e(st[15]));
492   }
493 
494 #if defined( AES_ENC_PREKEYED ) || defined( AES_DEC_PREKEYED )
495 
496 /*  Set the cipher key for the pre-keyed version */
497 
498 return_type aes_set_key( const unsigned char key[], length_type keylen, aes_context ctx[1] )
499 {
500     uint_8t cc, rc, hi;
501 
502     switch( keylen )
503     {
504     case 16:
505     case 128:
506         keylen = 16;
507         break;
508     case 24:
509     case 192:
510         keylen = 24;
511         break;
512     case 32:
513     case 256:
514         keylen = 32;
515         break;
516     default:
517         ctx->rnd = 0;
518         return (return_type)-1;
519     }
520     block_copy_nn(ctx->ksch, key, keylen);
521     hi = (keylen + 28) << 2;
522     ctx->rnd = (hi >> 4) - 1;
523     for( cc = keylen, rc = 1; cc < hi; cc += 4 )
524     {   uint_8t tt, t0, t1, t2, t3;
525 
526         t0 = ctx->ksch[cc - 4];
527         t1 = ctx->ksch[cc - 3];
528         t2 = ctx->ksch[cc - 2];
529         t3 = ctx->ksch[cc - 1];
530         if( cc % keylen == 0 )
531         {
532             tt = t0;
533             t0 = s_box(t1) ^ rc;
534             t1 = s_box(t2);
535             t2 = s_box(t3);
536             t3 = s_box(tt);
537             rc = f2(rc);
538         }
539         else if( keylen > 24 && cc % keylen == 16 )
540         {
541             t0 = s_box(t0);
542             t1 = s_box(t1);
543             t2 = s_box(t2);
544             t3 = s_box(t3);
545         }
546         tt = cc - keylen;
547         ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0;
548         ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1;
549         ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2;
550         ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3;
551     }
552     return 0;
553 }
554 
555 #endif
556 
557 #if defined( AES_ENC_PREKEYED )
558 
559 /*  Encrypt a single block of 16 bytes */
560 
561 return_type aes_encrypt( const unsigned char in[N_BLOCK], unsigned char  out[N_BLOCK], const aes_context ctx[1] )
562 {
563     if( ctx->rnd )
564     {
565         uint_8t s1[N_BLOCK], r;
566         copy_and_key( s1, in, ctx->ksch );
567 
568         for( r = 1 ; r < ctx->rnd ; ++r )
569 #if defined( VERSION_1 )
570         {
571             mix_sub_columns( s1 );
572             add_round_key( s1, ctx->ksch + r * N_BLOCK);
573         }
574 #else
575         {   uint_8t s2[N_BLOCK];
576             mix_sub_columns( s2, s1 );
577             copy_and_key( s1, s2, ctx->ksch + r * N_BLOCK);
578         }
579 #endif
580         shift_sub_rows( s1 );
581         copy_and_key( out, s1, ctx->ksch + r * N_BLOCK );
582     }
583     else
584         return (return_type)-1;
585     return 0;
586 }
587 
588 /* CBC encrypt a number of blocks (input and return an IV) */
589 
590 return_type aes_cbc_encrypt( const unsigned char *in, unsigned char *out,
591                          int n_block, unsigned char iv[N_BLOCK], const aes_context ctx[1] )
592 {
593 
594     while(n_block--)
595     {
596         xor_block(iv, in);
597         if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
598 			return EXIT_FAILURE;
599         memcpy(out, iv, N_BLOCK);
600         in += N_BLOCK;
601         out += N_BLOCK;
602     }
603     return EXIT_SUCCESS;
604 }
605 
606 #endif
607 
608 #if defined( AES_DEC_PREKEYED )
609 
610 /*  Decrypt a single block of 16 bytes */
611 
612 return_type aes_decrypt( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1] )
613 {
614     if( ctx->rnd )
615     {
616         uint_8t s1[N_BLOCK], r;
617         copy_and_key( s1, in, ctx->ksch + ctx->rnd * N_BLOCK );
618         inv_shift_sub_rows( s1 );
619 
620         for( r = ctx->rnd ; --r ; )
621 #if defined( VERSION_1 )
622         {
623             add_round_key( s1, ctx->ksch + r * N_BLOCK );
624             inv_mix_sub_columns( s1 );
625         }
626 #else
627         {   uint_8t s2[N_BLOCK];
628             copy_and_key( s2, s1, ctx->ksch + r * N_BLOCK );
629             inv_mix_sub_columns( s1, s2 );
630         }
631 #endif
632         copy_and_key( out, s1, ctx->ksch );
633     }
634     else
635         return (return_type)-1;
636     return 0;
637 }
638 
639 /* CBC decrypt a number of blocks (input and return an IV) */
640 
641 return_type aes_cbc_decrypt( const unsigned char *in, unsigned char *out,
642                          int n_block, unsigned char iv[N_BLOCK], const aes_context ctx[1] )
643 {
644     while(n_block--)
645     {   uint_8t tmp[N_BLOCK];
646 
647         memcpy(tmp, in, N_BLOCK);
648         if(aes_decrypt(in, out, ctx) != EXIT_SUCCESS)
649 			return EXIT_FAILURE;
650         xor_block(out, iv);
651         memcpy(iv, tmp, N_BLOCK);
652         in += N_BLOCK;
653         out += N_BLOCK;
654     }
655     return EXIT_SUCCESS;
656 }
657 
658 #endif
659 
660 #if defined( AES_ENC_128_OTFK )
661 
662 /*  The 'on the fly' encryption key update for for 128 bit keys */
663 
664 static void update_encrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc )
665 {   uint_8t cc;
666 
667     k[0] ^= s_box(k[13]) ^ *rc;
668     k[1] ^= s_box(k[14]);
669     k[2] ^= s_box(k[15]);
670     k[3] ^= s_box(k[12]);
671     *rc = f2( *rc );
672 
673     for(cc = 4; cc < 16; cc += 4 )
674     {
675         k[cc + 0] ^= k[cc - 4];
676         k[cc + 1] ^= k[cc - 3];
677         k[cc + 2] ^= k[cc - 2];
678         k[cc + 3] ^= k[cc - 1];
679     }
680 }
681 
682 /*  Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
683 
684 void aes_encrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
685                      const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] )
686 {   uint_8t s1[N_BLOCK], r, rc = 1;
687 
688     if(o_key != key)
689         block_copy( o_key, key );
690     copy_and_key( s1, in, o_key );
691 
692     for( r = 1 ; r < 10 ; ++r )
693 #if defined( VERSION_1 )
694     {
695         mix_sub_columns( s1 );
696         update_encrypt_key_128( o_key, &rc );
697         add_round_key( s1, o_key );
698     }
699 #else
700     {   uint_8t s2[N_BLOCK];
701         mix_sub_columns( s2, s1 );
702         update_encrypt_key_128( o_key, &rc );
703         copy_and_key( s1, s2, o_key );
704     }
705 #endif
706 
707     shift_sub_rows( s1 );
708     update_encrypt_key_128( o_key, &rc );
709     copy_and_key( out, s1, o_key );
710 }
711 
712 #endif
713 
714 #if defined( AES_DEC_128_OTFK )
715 
716 /*  The 'on the fly' decryption key update for for 128 bit keys */
717 
718 static void update_decrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc )
719 {   uint_8t cc;
720 
721     for( cc = 12; cc > 0; cc -= 4 )
722     {
723         k[cc + 0] ^= k[cc - 4];
724         k[cc + 1] ^= k[cc - 3];
725         k[cc + 2] ^= k[cc - 2];
726         k[cc + 3] ^= k[cc - 1];
727     }
728     *rc = d2(*rc);
729     k[0] ^= s_box(k[13]) ^ *rc;
730     k[1] ^= s_box(k[14]);
731     k[2] ^= s_box(k[15]);
732     k[3] ^= s_box(k[12]);
733 }
734 
735 /*  Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
736 
737 void aes_decrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
738                       const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] )
739 {
740     uint_8t s1[N_BLOCK], r, rc = 0x6c;
741     if(o_key != key)
742         block_copy( o_key, key );
743 
744     copy_and_key( s1, in, o_key );
745     inv_shift_sub_rows( s1 );
746 
747     for( r = 10 ; --r ; )
748 #if defined( VERSION_1 )
749     {
750         update_decrypt_key_128( o_key, &rc );
751         add_round_key( s1, o_key );
752         inv_mix_sub_columns( s1 );
753     }
754 #else
755     {   uint_8t s2[N_BLOCK];
756         update_decrypt_key_128( o_key, &rc );
757         copy_and_key( s2, s1, o_key );
758         inv_mix_sub_columns( s1, s2 );
759     }
760 #endif
761     update_decrypt_key_128( o_key, &rc );
762     copy_and_key( out, s1, o_key );
763 }
764 
765 #endif
766 
767 #if defined( AES_ENC_256_OTFK )
768 
769 /*  The 'on the fly' encryption key update for for 256 bit keys */
770 
771 static void update_encrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc )
772 {   uint_8t cc;
773 
774     k[0] ^= s_box(k[29]) ^ *rc;
775     k[1] ^= s_box(k[30]);
776     k[2] ^= s_box(k[31]);
777     k[3] ^= s_box(k[28]);
778     *rc = f2( *rc );
779 
780     for(cc = 4; cc < 16; cc += 4)
781     {
782         k[cc + 0] ^= k[cc - 4];
783         k[cc + 1] ^= k[cc - 3];
784         k[cc + 2] ^= k[cc - 2];
785         k[cc + 3] ^= k[cc - 1];
786     }
787 
788     k[16] ^= s_box(k[12]);
789     k[17] ^= s_box(k[13]);
790     k[18] ^= s_box(k[14]);
791     k[19] ^= s_box(k[15]);
792 
793     for( cc = 20; cc < 32; cc += 4 )
794     {
795         k[cc + 0] ^= k[cc - 4];
796         k[cc + 1] ^= k[cc - 3];
797         k[cc + 2] ^= k[cc - 2];
798         k[cc + 3] ^= k[cc - 1];
799     }
800 }
801 
802 /*  Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */
803 
804 void aes_encrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
805                       const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] )
806 {
807     uint_8t s1[N_BLOCK], r, rc = 1;
808     if(o_key != key)
809     {
810         block_copy( o_key, key );
811         block_copy( o_key + 16, key + 16 );
812     }
813     copy_and_key( s1, in, o_key );
814 
815     for( r = 1 ; r < 14 ; ++r )
816 #if defined( VERSION_1 )
817     {
818         mix_sub_columns(s1);
819         if( r & 1 )
820             add_round_key( s1, o_key + 16 );
821         else
822         {
823             update_encrypt_key_256( o_key, &rc );
824             add_round_key( s1, o_key );
825         }
826     }
827 #else
828     {   uint_8t s2[N_BLOCK];
829         mix_sub_columns( s2, s1 );
830         if( r & 1 )
831             copy_and_key( s1, s2, o_key + 16 );
832         else
833         {
834             update_encrypt_key_256( o_key, &rc );
835             copy_and_key( s1, s2, o_key );
836         }
837     }
838 #endif
839 
840     shift_sub_rows( s1 );
841     update_encrypt_key_256( o_key, &rc );
842     copy_and_key( out, s1, o_key );
843 }
844 
845 #endif
846 
847 #if defined( AES_DEC_256_OTFK )
848 
849 /*  The 'on the fly' encryption key update for for 256 bit keys */
850 
851 static void update_decrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc )
852 {   uint_8t cc;
853 
854     for(cc = 28; cc > 16; cc -= 4)
855     {
856         k[cc + 0] ^= k[cc - 4];
857         k[cc + 1] ^= k[cc - 3];
858         k[cc + 2] ^= k[cc - 2];
859         k[cc + 3] ^= k[cc - 1];
860     }
861 
862     k[16] ^= s_box(k[12]);
863     k[17] ^= s_box(k[13]);
864     k[18] ^= s_box(k[14]);
865     k[19] ^= s_box(k[15]);
866 
867     for(cc = 12; cc > 0; cc -= 4)
868     {
869         k[cc + 0] ^= k[cc - 4];
870         k[cc + 1] ^= k[cc - 3];
871         k[cc + 2] ^= k[cc - 2];
872         k[cc + 3] ^= k[cc - 1];
873     }
874 
875     *rc = d2(*rc);
876     k[0] ^= s_box(k[29]) ^ *rc;
877     k[1] ^= s_box(k[30]);
878     k[2] ^= s_box(k[31]);
879     k[3] ^= s_box(k[28]);
880 }
881 
882 /*  Decrypt a single block of 16 bytes with 'on the fly'
883     256 bit keying
884 */
885 void aes_decrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
886                       const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] )
887 {
888     uint_8t s1[N_BLOCK], r, rc = 0x80;
889 
890     if(o_key != key)
891     {
892         block_copy( o_key, key );
893         block_copy( o_key + 16, key + 16 );
894     }
895 
896     copy_and_key( s1, in, o_key );
897     inv_shift_sub_rows( s1 );
898 
899     for( r = 14 ; --r ; )
900 #if defined( VERSION_1 )
901     {
902         if( ( r & 1 ) )
903         {
904             update_decrypt_key_256( o_key, &rc );
905             add_round_key( s1, o_key + 16 );
906         }
907         else
908             add_round_key( s1, o_key );
909         inv_mix_sub_columns( s1 );
910     }
911 #else
912     {   uint_8t s2[N_BLOCK];
913         if( ( r & 1 ) )
914         {
915             update_decrypt_key_256( o_key, &rc );
916             copy_and_key( s2, s1, o_key + 16 );
917         }
918         else
919             copy_and_key( s2, s1, o_key );
920         inv_mix_sub_columns( s1, s2 );
921     }
922 #endif
923     copy_and_key( out, s1, o_key );
924 }
925 
926 #endif
927