1 /*
2 ---------------------------------------------------------------------------
3 Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
4
5 LICENSE TERMS
6
7 The redistribution and use of this software (with or without changes)
8 is allowed without the payment of fees or royalties provided that:
9
10 1. source code distributions include the above copyright notice, this
11 list of conditions and the following disclaimer;
12
13 2. binary distributions include the above copyright notice, this list
14 of conditions and the following disclaimer in their documentation;
15
16 3. the name of the copyright holder is not used to endorse products
17 built using this software without specific written permission.
18
19 DISCLAIMER
20
21 This software is provided 'as is' with no explicit or implied warranties
22 in respect of its properties, including, but not limited to, correctness
23 and/or fitness for purpose.
24 ---------------------------------------------------------------------------
25 Issue 09/09/2006
26
27 This is an AES implementation that uses only 8-bit byte operations on the
28 cipher state (there are options to use 32-bit types if available).
29
30 The combination of mix columns and byte substitution used here is based on
31 that developed by Karl Malbrain. His contribution is acknowledged.
32 */
33
34 /* define if you have a fast memcpy function on your system */
35 #if 1
36 #define HAVE_MEMCPY
37 #include <string.h>
38 #if 0
39 #if defined(_MSC_VER)
40 #include <intrin.h>
41 #pragma intrinsic(memcpy)
42 #endif
43 #endif
44 #endif
45
46 #include <stdint.h>
47 #include <stdlib.h>
48
49 /* define if you have fast 32-bit types on your system */
50 #if 1
51 #define HAVE_UINT_32T
52 #endif
53
54 /* define if you don't want any tables */
55 #if 1
56 #define USE_TABLES
57 #endif
58
59 /* On Intel Core 2 duo VERSION_1 is faster */
60
61 /* alternative versions (test for performance on your system) */
62 #if 1
63 #define VERSION_1
64 #endif
65
66 #include "aes.h"
67
68 #if defined(HAVE_UINT_32T)
69 typedef uint32_t uint_32t;
70 #endif
71
72 /* functions for finite field multiplication in the AES Galois field */
73
74 #define WPOLY 0x011b
75 #define BPOLY 0x1b
76 #define DPOLY 0x008d
77
78 #define f1(x) (x)
79 #define f2(x) (((x) << 1) ^ ((((x) >> 7) & 1) * WPOLY))
80 #define f4(x) \
81 (((x) << 2) ^ ((((x) >> 6) & 1) * WPOLY) ^ ((((x) >> 6) & 2) * WPOLY))
82 #define f8(x) \
83 (((x) << 3) ^ ((((x) >> 5) & 1) * WPOLY) ^ ((((x) >> 5) & 2) * WPOLY) ^ \
84 ((((x) >> 5) & 4) * WPOLY))
85 #define d2(x) (((x) >> 1) ^ ((x)&1 ? DPOLY : 0))
86
87 #define f3(x) (f2(x) ^ (x))
88 #define f9(x) (f8(x) ^ (x))
89 #define fb(x) (f8(x) ^ f2(x) ^ (x))
90 #define fd(x) (f8(x) ^ f4(x) ^ (x))
91 #define fe(x) (f8(x) ^ f4(x) ^ f2(x))
92
93 #if defined(USE_TABLES)
94
95 #define sb_data(w) \
96 { /* S Box data values */ \
97 w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5), \
98 w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), \
99 w(0x76), w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), \
100 w(0x47), w(0xf0), w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), \
101 w(0xa4), w(0x72), w(0xc0), w(0xb7), w(0xfd), w(0x93), w(0x26), \
102 w(0x36), w(0x3f), w(0xf7), w(0xcc), w(0x34), w(0xa5), w(0xe5), \
103 w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15), w(0x04), w(0xc7), \
104 w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a), w(0x07), \
105 w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75), \
106 w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), \
107 w(0xa0), w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), \
108 w(0x2f), w(0x84), w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), \
109 w(0xfc), w(0xb1), w(0x5b), w(0x6a), w(0xcb), w(0xbe), w(0x39), \
110 w(0x4a), w(0x4c), w(0x58), w(0xcf), w(0xd0), w(0xef), w(0xaa), \
111 w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85), w(0x45), w(0xf9), \
112 w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8), w(0x51), \
113 w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5), \
114 w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), \
115 w(0xd2), w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), \
116 w(0x44), w(0x17), w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), \
117 w(0x5d), w(0x19), w(0x73), w(0x60), w(0x81), w(0x4f), w(0xdc), \
118 w(0x22), w(0x2a), w(0x90), w(0x88), w(0x46), w(0xee), w(0xb8), \
119 w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb), w(0xe0), w(0x32), \
120 w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c), w(0xc2), \
121 w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79), \
122 w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), \
123 w(0xa9), w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), \
124 w(0xae), w(0x08), w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), \
125 w(0xa6), w(0xb4), w(0xc6), w(0xe8), w(0xdd), w(0x74), w(0x1f), \
126 w(0x4b), w(0xbd), w(0x8b), w(0x8a), w(0x70), w(0x3e), w(0xb5), \
127 w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e), w(0x61), w(0x35), \
128 w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e), w(0xe1), \
129 w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94), \
130 w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), \
131 w(0xdf), w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), \
132 w(0x42), w(0x68), w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), \
133 w(0x54), w(0xbb), w(0x16) \
134 }
135
136 #define isb_data(w) \
137 { /* inverse S Box data values */ \
138 w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38), \
139 w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), \
140 w(0xfb), w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), \
141 w(0xff), w(0x87), w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), \
142 w(0xde), w(0xe9), w(0xcb), w(0x54), w(0x7b), w(0x94), w(0x32), \
143 w(0xa6), w(0xc2), w(0x23), w(0x3d), w(0xee), w(0x4c), w(0x95), \
144 w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e), w(0x08), w(0x2e), \
145 w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2), w(0x76), \
146 w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25), \
147 w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), \
148 w(0x16), w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), \
149 w(0xb6), w(0x92), w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), \
150 w(0xed), w(0xb9), w(0xda), w(0x5e), w(0x15), w(0x46), w(0x57), \
151 w(0xa7), w(0x8d), w(0x9d), w(0x84), w(0x90), w(0xd8), w(0xab), \
152 w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a), w(0xf7), w(0xe4), \
153 w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06), w(0xd0), \
154 w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02), \
155 w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), \
156 w(0x6b), w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), \
157 w(0xdc), w(0xea), w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), \
158 w(0xb4), w(0xe6), w(0x73), w(0x96), w(0xac), w(0x74), w(0x22), \
159 w(0xe7), w(0xad), w(0x35), w(0x85), w(0xe2), w(0xf9), w(0x37), \
160 w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e), w(0x47), w(0xf1), \
161 w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89), w(0x6f), \
162 w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b), \
163 w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), \
164 w(0x20), w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), \
165 w(0x5a), w(0xf4), w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), \
166 w(0x07), w(0xc7), w(0x31), w(0xb1), w(0x12), w(0x10), w(0x59), \
167 w(0x27), w(0x80), w(0xec), w(0x5f), w(0x60), w(0x51), w(0x7f), \
168 w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d), w(0x2d), w(0xe5), \
169 w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef), w(0xa0), \
170 w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0), \
171 w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), \
172 w(0x61), w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), \
173 w(0xd6), w(0x26), w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), \
174 w(0x21), w(0x0c), w(0x7d) \
175 }
176
177 #define mm_data(w) \
178 { /* basic data for forming finite field tables */ \
179 w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07), \
180 w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), \
181 w(0x0f), w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), \
182 w(0x16), w(0x17), w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), \
183 w(0x1d), w(0x1e), w(0x1f), w(0x20), w(0x21), w(0x22), w(0x23), \
184 w(0x24), w(0x25), w(0x26), w(0x27), w(0x28), w(0x29), w(0x2a), \
185 w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f), w(0x30), w(0x31), \
186 w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37), w(0x38), \
187 w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f), \
188 w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), \
189 w(0x47), w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), \
190 w(0x4e), w(0x4f), w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), \
191 w(0x55), w(0x56), w(0x57), w(0x58), w(0x59), w(0x5a), w(0x5b), \
192 w(0x5c), w(0x5d), w(0x5e), w(0x5f), w(0x60), w(0x61), w(0x62), \
193 w(0x63), w(0x64), w(0x65), w(0x66), w(0x67), w(0x68), w(0x69), \
194 w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f), w(0x70), \
195 w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77), \
196 w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), \
197 w(0x7f), w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), \
198 w(0x86), w(0x87), w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), \
199 w(0x8d), w(0x8e), w(0x8f), w(0x90), w(0x91), w(0x92), w(0x93), \
200 w(0x94), w(0x95), w(0x96), w(0x97), w(0x98), w(0x99), w(0x9a), \
201 w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f), w(0xa0), w(0xa1), \
202 w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7), w(0xa8), \
203 w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf), \
204 w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), \
205 w(0xb7), w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), \
206 w(0xbe), w(0xbf), w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), \
207 w(0xc5), w(0xc6), w(0xc7), w(0xc8), w(0xc9), w(0xca), w(0xcb), \
208 w(0xcc), w(0xcd), w(0xce), w(0xcf), w(0xd0), w(0xd1), w(0xd2), \
209 w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7), w(0xd8), w(0xd9), \
210 w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf), w(0xe0), \
211 w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7), \
212 w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), \
213 w(0xef), w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), \
214 w(0xf6), w(0xf7), w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), \
215 w(0xfd), w(0xfe), w(0xff) \
216 }
217
218 static const uint_8t sbox[256] = sb_data(f1);
219 static const uint_8t isbox[256] = isb_data(f1);
220
221 static const uint_8t gfm2_sbox[256] = sb_data(f2);
222 static const uint_8t gfm3_sbox[256] = sb_data(f3);
223
224 static const uint_8t gfmul_9[256] = mm_data(f9);
225 static const uint_8t gfmul_b[256] = mm_data(fb);
226 static const uint_8t gfmul_d[256] = mm_data(fd);
227 static const uint_8t gfmul_e[256] = mm_data(fe);
228
229 #define s_box(x) sbox[(x)]
230 #define is_box(x) isbox[(x)]
231 #define gfm2_sb(x) gfm2_sbox[(x)]
232 #define gfm3_sb(x) gfm3_sbox[(x)]
233 #define gfm_9(x) gfmul_9[(x)]
234 #define gfm_b(x) gfmul_b[(x)]
235 #define gfm_d(x) gfmul_d[(x)]
236 #define gfm_e(x) gfmul_e[(x)]
237
238 #else
239
240 /* this is the high bit of x right shifted by 1 */
241 /* position. Since the starting polynomial has */
242 /* 9 bits (0x11b), this right shift keeps the */
243 /* values of all top bits within a byte */
244
hibit(const uint_8t x)245 static uint_8t hibit(const uint_8t x) {
246 uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
247
248 r |= (r >> 2);
249 r |= (r >> 4);
250 return (r + 1) >> 1;
251 }
252
253 /* return the inverse of the finite field element x */
254
gf_inv(const uint_8t x)255 static uint_8t gf_inv(const uint_8t x) {
256 uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
257
258 if (x < 2) return x;
259
260 for (;;) {
261 if (n1)
262 while (n2 >= n1) /* divide polynomial p2 by p1 */
263 {
264 n2 /= n1; /* shift smaller polynomial left */
265 p2 ^= (p1 * n2) & 0xff; /* and remove from larger one */
266 v2 ^= (v1 * n2); /* shift accumulated value and */
267 n2 = hibit(p2); /* add into result */
268 }
269 else
270 return v1;
271
272 if (n2) /* repeat with values swapped */
273 while (n1 >= n2) {
274 n1 /= n2;
275 p1 ^= p2 * n1;
276 v1 ^= v2 * n1;
277 n1 = hibit(p1);
278 }
279 else
280 return v2;
281 }
282 }
283
284 /* The forward and inverse affine transformations used in the S-box */
fwd_affine(const uint_8t x)285 uint_8t fwd_affine(const uint_8t x) {
286 #if defined(HAVE_UINT_32T)
287 uint_32t w = x;
288 w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
289 return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
290 #else
291 return 0x63 ^ x ^ (x << 1) ^ (x << 2) ^ (x << 3) ^ (x << 4) ^ (x >> 7) ^
292 (x >> 6) ^ (x >> 5) ^ (x >> 4);
293 #endif
294 }
295
inv_affine(const uint_8t x)296 uint_8t inv_affine(const uint_8t x) {
297 #if defined(HAVE_UINT_32T)
298 uint_32t w = x;
299 w = (w << 1) ^ (w << 3) ^ (w << 6);
300 return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
301 #else
302 return 0x05 ^ (x << 1) ^ (x << 3) ^ (x << 6) ^ (x >> 7) ^ (x >> 5) ^ (x >> 2);
303 #endif
304 }
305
306 #define s_box(x) fwd_affine(gf_inv(x))
307 #define is_box(x) gf_inv(inv_affine(x))
308 #define gfm2_sb(x) f2(s_box(x))
309 #define gfm3_sb(x) f3(s_box(x))
310 #define gfm_9(x) f9(x)
311 #define gfm_b(x) fb(x)
312 #define gfm_d(x) fd(x)
313 #define gfm_e(x) fe(x)
314
315 #endif
316
317 #if defined(HAVE_MEMCPY)
318 #define block_copy_nn(d, s, l) memcpy(d, s, l)
319 #define block_copy(d, s) memcpy(d, s, N_BLOCK)
320 #else
321 #define block_copy_nn(d, s, l) copy_block_nn(d, s, l)
322 #define block_copy(d, s) copy_block(d, s)
323 #endif
324
325 #if !defined(HAVE_MEMCPY)
copy_block(void * d,const void * s)326 static void copy_block(void* d, const void* s) {
327 #if defined(HAVE_UINT_32T)
328 ((uint_32t*)d)[0] = ((uint_32t*)s)[0];
329 ((uint_32t*)d)[1] = ((uint_32t*)s)[1];
330 ((uint_32t*)d)[2] = ((uint_32t*)s)[2];
331 ((uint_32t*)d)[3] = ((uint_32t*)s)[3];
332 #else
333 ((uint_8t*)d)[0] = ((uint_8t*)s)[0];
334 ((uint_8t*)d)[1] = ((uint_8t*)s)[1];
335 ((uint_8t*)d)[2] = ((uint_8t*)s)[2];
336 ((uint_8t*)d)[3] = ((uint_8t*)s)[3];
337 ((uint_8t*)d)[4] = ((uint_8t*)s)[4];
338 ((uint_8t*)d)[5] = ((uint_8t*)s)[5];
339 ((uint_8t*)d)[6] = ((uint_8t*)s)[6];
340 ((uint_8t*)d)[7] = ((uint_8t*)s)[7];
341 ((uint_8t*)d)[8] = ((uint_8t*)s)[8];
342 ((uint_8t*)d)[9] = ((uint_8t*)s)[9];
343 ((uint_8t*)d)[10] = ((uint_8t*)s)[10];
344 ((uint_8t*)d)[11] = ((uint_8t*)s)[11];
345 ((uint_8t*)d)[12] = ((uint_8t*)s)[12];
346 ((uint_8t*)d)[13] = ((uint_8t*)s)[13];
347 ((uint_8t*)d)[14] = ((uint_8t*)s)[14];
348 ((uint_8t*)d)[15] = ((uint_8t*)s)[15];
349 #endif
350 }
351
copy_block_nn(void * d,const void * s,uint_8t nn)352 static void copy_block_nn(void* d, const void* s, uint_8t nn) {
353 while (nn--) *((uint_8t*)d)++ = *((uint_8t*)s)++;
354 }
355 #endif
356
xor_block(void * d,const void * s)357 static void xor_block(void* d, const void* s) {
358 #if defined(HAVE_UINT_32T)
359 ((uint_32t*)d)[0] ^= ((uint_32t*)s)[0];
360 ((uint_32t*)d)[1] ^= ((uint_32t*)s)[1];
361 ((uint_32t*)d)[2] ^= ((uint_32t*)s)[2];
362 ((uint_32t*)d)[3] ^= ((uint_32t*)s)[3];
363 #else
364 ((uint_8t*)d)[0] ^= ((uint_8t*)s)[0];
365 ((uint_8t*)d)[1] ^= ((uint_8t*)s)[1];
366 ((uint_8t*)d)[2] ^= ((uint_8t*)s)[2];
367 ((uint_8t*)d)[3] ^= ((uint_8t*)s)[3];
368 ((uint_8t*)d)[4] ^= ((uint_8t*)s)[4];
369 ((uint_8t*)d)[5] ^= ((uint_8t*)s)[5];
370 ((uint_8t*)d)[6] ^= ((uint_8t*)s)[6];
371 ((uint_8t*)d)[7] ^= ((uint_8t*)s)[7];
372 ((uint_8t*)d)[8] ^= ((uint_8t*)s)[8];
373 ((uint_8t*)d)[9] ^= ((uint_8t*)s)[9];
374 ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10];
375 ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11];
376 ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12];
377 ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13];
378 ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14];
379 ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15];
380 #endif
381 }
382
copy_and_key(void * d,const void * s,const void * k)383 static void copy_and_key(void* d, const void* s, const void* k) {
384 #if defined(HAVE_UINT_32T)
385 ((uint_32t*)d)[0] = ((uint_32t*)s)[0] ^ ((uint_32t*)k)[0];
386 ((uint_32t*)d)[1] = ((uint_32t*)s)[1] ^ ((uint_32t*)k)[1];
387 ((uint_32t*)d)[2] = ((uint_32t*)s)[2] ^ ((uint_32t*)k)[2];
388 ((uint_32t*)d)[3] = ((uint_32t*)s)[3] ^ ((uint_32t*)k)[3];
389 #elif 1
390 ((uint_8t*)d)[0] = ((uint_8t*)s)[0] ^ ((uint_8t*)k)[0];
391 ((uint_8t*)d)[1] = ((uint_8t*)s)[1] ^ ((uint_8t*)k)[1];
392 ((uint_8t*)d)[2] = ((uint_8t*)s)[2] ^ ((uint_8t*)k)[2];
393 ((uint_8t*)d)[3] = ((uint_8t*)s)[3] ^ ((uint_8t*)k)[3];
394 ((uint_8t*)d)[4] = ((uint_8t*)s)[4] ^ ((uint_8t*)k)[4];
395 ((uint_8t*)d)[5] = ((uint_8t*)s)[5] ^ ((uint_8t*)k)[5];
396 ((uint_8t*)d)[6] = ((uint_8t*)s)[6] ^ ((uint_8t*)k)[6];
397 ((uint_8t*)d)[7] = ((uint_8t*)s)[7] ^ ((uint_8t*)k)[7];
398 ((uint_8t*)d)[8] = ((uint_8t*)s)[8] ^ ((uint_8t*)k)[8];
399 ((uint_8t*)d)[9] = ((uint_8t*)s)[9] ^ ((uint_8t*)k)[9];
400 ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10];
401 ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11];
402 ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12];
403 ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13];
404 ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14];
405 ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15];
406 #else
407 block_copy(d, s);
408 xor_block(d, k);
409 #endif
410 }
411
add_round_key(uint_8t d[N_BLOCK],const uint_8t k[N_BLOCK])412 static void add_round_key(uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK]) {
413 xor_block(d, k);
414 }
415
shift_sub_rows(uint_8t st[N_BLOCK])416 static void shift_sub_rows(uint_8t st[N_BLOCK]) {
417 uint_8t tt;
418
419 st[0] = s_box(st[0]);
420 st[4] = s_box(st[4]);
421 st[8] = s_box(st[8]);
422 st[12] = s_box(st[12]);
423
424 tt = st[1];
425 st[1] = s_box(st[5]);
426 st[5] = s_box(st[9]);
427 st[9] = s_box(st[13]);
428 st[13] = s_box(tt);
429
430 tt = st[2];
431 st[2] = s_box(st[10]);
432 st[10] = s_box(tt);
433 tt = st[6];
434 st[6] = s_box(st[14]);
435 st[14] = s_box(tt);
436
437 tt = st[15];
438 st[15] = s_box(st[11]);
439 st[11] = s_box(st[7]);
440 st[7] = s_box(st[3]);
441 st[3] = s_box(tt);
442 }
443
inv_shift_sub_rows(uint_8t st[N_BLOCK])444 static void inv_shift_sub_rows(uint_8t st[N_BLOCK]) {
445 uint_8t tt;
446
447 st[0] = is_box(st[0]);
448 st[4] = is_box(st[4]);
449 st[8] = is_box(st[8]);
450 st[12] = is_box(st[12]);
451
452 tt = st[13];
453 st[13] = is_box(st[9]);
454 st[9] = is_box(st[5]);
455 st[5] = is_box(st[1]);
456 st[1] = is_box(tt);
457
458 tt = st[2];
459 st[2] = is_box(st[10]);
460 st[10] = is_box(tt);
461 tt = st[6];
462 st[6] = is_box(st[14]);
463 st[14] = is_box(tt);
464
465 tt = st[3];
466 st[3] = is_box(st[7]);
467 st[7] = is_box(st[11]);
468 st[11] = is_box(st[15]);
469 st[15] = is_box(tt);
470 }
471
472 #if defined(VERSION_1)
mix_sub_columns(uint_8t dt[N_BLOCK])473 static void mix_sub_columns(uint_8t dt[N_BLOCK]) {
474 uint_8t st[N_BLOCK];
475 block_copy(st, dt);
476 #else
477 static void mix_sub_columns(uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK]) {
478 #endif
479 dt[0] = gfm2_sb(st[0]) ^ gfm3_sb(st[5]) ^ s_box(st[10]) ^ s_box(st[15]);
480 dt[1] = s_box(st[0]) ^ gfm2_sb(st[5]) ^ gfm3_sb(st[10]) ^ s_box(st[15]);
481 dt[2] = s_box(st[0]) ^ s_box(st[5]) ^ gfm2_sb(st[10]) ^ gfm3_sb(st[15]);
482 dt[3] = gfm3_sb(st[0]) ^ s_box(st[5]) ^ s_box(st[10]) ^ gfm2_sb(st[15]);
483
484 dt[4] = gfm2_sb(st[4]) ^ gfm3_sb(st[9]) ^ s_box(st[14]) ^ s_box(st[3]);
485 dt[5] = s_box(st[4]) ^ gfm2_sb(st[9]) ^ gfm3_sb(st[14]) ^ s_box(st[3]);
486 dt[6] = s_box(st[4]) ^ s_box(st[9]) ^ gfm2_sb(st[14]) ^ gfm3_sb(st[3]);
487 dt[7] = gfm3_sb(st[4]) ^ s_box(st[9]) ^ s_box(st[14]) ^ gfm2_sb(st[3]);
488
489 dt[8] = gfm2_sb(st[8]) ^ gfm3_sb(st[13]) ^ s_box(st[2]) ^ s_box(st[7]);
490 dt[9] = s_box(st[8]) ^ gfm2_sb(st[13]) ^ gfm3_sb(st[2]) ^ s_box(st[7]);
491 dt[10] = s_box(st[8]) ^ s_box(st[13]) ^ gfm2_sb(st[2]) ^ gfm3_sb(st[7]);
492 dt[11] = gfm3_sb(st[8]) ^ s_box(st[13]) ^ s_box(st[2]) ^ gfm2_sb(st[7]);
493
494 dt[12] = gfm2_sb(st[12]) ^ gfm3_sb(st[1]) ^ s_box(st[6]) ^ s_box(st[11]);
495 dt[13] = s_box(st[12]) ^ gfm2_sb(st[1]) ^ gfm3_sb(st[6]) ^ s_box(st[11]);
496 dt[14] = s_box(st[12]) ^ s_box(st[1]) ^ gfm2_sb(st[6]) ^ gfm3_sb(st[11]);
497 dt[15] = gfm3_sb(st[12]) ^ s_box(st[1]) ^ s_box(st[6]) ^ gfm2_sb(st[11]);
498 }
499
500 #if defined(VERSION_1)
501 static void inv_mix_sub_columns(uint_8t dt[N_BLOCK]) {
502 uint_8t st[N_BLOCK];
503 block_copy(st, dt);
504 #else
505 static void inv_mix_sub_columns(uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK]) {
506 #endif
507 dt[0] = is_box(gfm_e(st[0]) ^ gfm_b(st[1]) ^ gfm_d(st[2]) ^ gfm_9(st[3]));
508 dt[5] = is_box(gfm_9(st[0]) ^ gfm_e(st[1]) ^ gfm_b(st[2]) ^ gfm_d(st[3]));
509 dt[10] = is_box(gfm_d(st[0]) ^ gfm_9(st[1]) ^ gfm_e(st[2]) ^ gfm_b(st[3]));
510 dt[15] = is_box(gfm_b(st[0]) ^ gfm_d(st[1]) ^ gfm_9(st[2]) ^ gfm_e(st[3]));
511
512 dt[4] = is_box(gfm_e(st[4]) ^ gfm_b(st[5]) ^ gfm_d(st[6]) ^ gfm_9(st[7]));
513 dt[9] = is_box(gfm_9(st[4]) ^ gfm_e(st[5]) ^ gfm_b(st[6]) ^ gfm_d(st[7]));
514 dt[14] = is_box(gfm_d(st[4]) ^ gfm_9(st[5]) ^ gfm_e(st[6]) ^ gfm_b(st[7]));
515 dt[3] = is_box(gfm_b(st[4]) ^ gfm_d(st[5]) ^ gfm_9(st[6]) ^ gfm_e(st[7]));
516
517 dt[8] = is_box(gfm_e(st[8]) ^ gfm_b(st[9]) ^ gfm_d(st[10]) ^ gfm_9(st[11]));
518 dt[13] = is_box(gfm_9(st[8]) ^ gfm_e(st[9]) ^ gfm_b(st[10]) ^ gfm_d(st[11]));
519 dt[2] = is_box(gfm_d(st[8]) ^ gfm_9(st[9]) ^ gfm_e(st[10]) ^ gfm_b(st[11]));
520 dt[7] = is_box(gfm_b(st[8]) ^ gfm_d(st[9]) ^ gfm_9(st[10]) ^ gfm_e(st[11]));
521
522 dt[12] =
523 is_box(gfm_e(st[12]) ^ gfm_b(st[13]) ^ gfm_d(st[14]) ^ gfm_9(st[15]));
524 dt[1] = is_box(gfm_9(st[12]) ^ gfm_e(st[13]) ^ gfm_b(st[14]) ^ gfm_d(st[15]));
525 dt[6] = is_box(gfm_d(st[12]) ^ gfm_9(st[13]) ^ gfm_e(st[14]) ^ gfm_b(st[15]));
526 dt[11] =
527 is_box(gfm_b(st[12]) ^ gfm_d(st[13]) ^ gfm_9(st[14]) ^ gfm_e(st[15]));
528 }
529
530 #if defined(AES_ENC_PREKEYED) || defined(AES_DEC_PREKEYED)
531
532 /* Set the cipher key for the pre-keyed version */
533 /* NOTE: If the length_type used for the key length is an
534 unsigned 8-bit character, a key length of 256 bits must
535 be entered as a length in bytes (valid inputs are hence
536 128, 192, 16, 24 and 32).
537 */
538
539 return_type aes_set_key(const unsigned char key[], length_type keylen,
540 aes_context ctx[1]) {
541 uint_8t cc, rc, hi;
542
543 switch (keylen) {
544 case 16:
545 case 128: /* length in bits (128 = 8*16) */
546 keylen = 16;
547 break;
548 case 24:
549 case 192: /* length in bits (192 = 8*24) */
550 keylen = 24;
551 break;
552 case 32:
553 /* case 256: length in bits (256 = 8*32) */
554 keylen = 32;
555 break;
556 default:
557 ctx->rnd = 0;
558 return (return_type)-1;
559 }
560 block_copy_nn(ctx->ksch, key, keylen);
561 hi = (keylen + 28) << 2;
562 ctx->rnd = (hi >> 4) - 1;
563 for (cc = keylen, rc = 1; cc < hi; cc += 4) {
564 uint_8t tt, t0, t1, t2, t3;
565
566 t0 = ctx->ksch[cc - 4];
567 t1 = ctx->ksch[cc - 3];
568 t2 = ctx->ksch[cc - 2];
569 t3 = ctx->ksch[cc - 1];
570 if (cc % keylen == 0) {
571 tt = t0;
572 t0 = s_box(t1) ^ rc;
573 t1 = s_box(t2);
574 t2 = s_box(t3);
575 t3 = s_box(tt);
576 rc = f2(rc);
577 } else if (keylen > 24 && cc % keylen == 16) {
578 t0 = s_box(t0);
579 t1 = s_box(t1);
580 t2 = s_box(t2);
581 t3 = s_box(t3);
582 }
583 tt = cc - keylen;
584 ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0;
585 ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1;
586 ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2;
587 ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3;
588 }
589 return 0;
590 }
591
592 #endif
593
594 #if defined(AES_ENC_PREKEYED)
595
596 /* Encrypt a single block of 16 bytes */
597
598 return_type aes_encrypt(const unsigned char in[N_BLOCK],
599 unsigned char out[N_BLOCK], const aes_context ctx[1]) {
600 if (ctx->rnd) {
601 uint_8t s1[N_BLOCK], r;
602 copy_and_key(s1, in, ctx->ksch);
603
604 for (r = 1; r < ctx->rnd; ++r)
605 #if defined(VERSION_1)
606 {
607 mix_sub_columns(s1);
608 add_round_key(s1, ctx->ksch + r * N_BLOCK);
609 }
610 #else
611 {
612 uint_8t s2[N_BLOCK];
613 mix_sub_columns(s2, s1);
614 copy_and_key(s1, s2, ctx->ksch + r * N_BLOCK);
615 }
616 #endif
617 shift_sub_rows(s1);
618 copy_and_key(out, s1, ctx->ksch + r * N_BLOCK);
619 } else
620 return (return_type)-1;
621 return 0;
622 }
623
624 /* CBC encrypt a number of blocks (input and return an IV) */
625
626 return_type aes_cbc_encrypt(const unsigned char* in, unsigned char* out,
627 int n_block, unsigned char iv[N_BLOCK],
628 const aes_context ctx[1]) {
629 while (n_block--) {
630 xor_block(iv, in);
631 if (aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS) return EXIT_FAILURE;
632 memcpy(out, iv, N_BLOCK);
633 in += N_BLOCK;
634 out += N_BLOCK;
635 }
636 return EXIT_SUCCESS;
637 }
638
639 #endif
640
641 #if defined(AES_DEC_PREKEYED)
642
643 /* Decrypt a single block of 16 bytes */
644
645 return_type aes_decrypt(const unsigned char in[N_BLOCK],
646 unsigned char out[N_BLOCK], const aes_context ctx[1]) {
647 if (ctx->rnd) {
648 uint_8t s1[N_BLOCK], r;
649 copy_and_key(s1, in, ctx->ksch + ctx->rnd * N_BLOCK);
650 inv_shift_sub_rows(s1);
651
652 for (r = ctx->rnd; --r;)
653 #if defined(VERSION_1)
654 {
655 add_round_key(s1, ctx->ksch + r * N_BLOCK);
656 inv_mix_sub_columns(s1);
657 }
658 #else
659 {
660 uint_8t s2[N_BLOCK];
661 copy_and_key(s2, s1, ctx->ksch + r * N_BLOCK);
662 inv_mix_sub_columns(s1, s2);
663 }
664 #endif
665 copy_and_key(out, s1, ctx->ksch);
666 } else
667 return (return_type)-1;
668 return 0;
669 }
670
671 /* CBC decrypt a number of blocks (input and return an IV) */
672
673 return_type aes_cbc_decrypt(const unsigned char* in, unsigned char* out,
674 int n_block, unsigned char iv[N_BLOCK],
675 const aes_context ctx[1]) {
676 while (n_block--) {
677 uint_8t tmp[N_BLOCK];
678
679 memcpy(tmp, in, N_BLOCK);
680 if (aes_decrypt(in, out, ctx) != EXIT_SUCCESS) return EXIT_FAILURE;
681 xor_block(out, iv);
682 memcpy(iv, tmp, N_BLOCK);
683 in += N_BLOCK;
684 out += N_BLOCK;
685 }
686 return EXIT_SUCCESS;
687 }
688
689 #endif
690
691 #if defined(AES_ENC_128_OTFK)
692
693 /* The 'on the fly' encryption key update for for 128 bit keys */
694
695 static void update_encrypt_key_128(uint_8t k[N_BLOCK], uint_8t* rc) {
696 uint_8t cc;
697
698 k[0] ^= s_box(k[13]) ^ *rc;
699 k[1] ^= s_box(k[14]);
700 k[2] ^= s_box(k[15]);
701 k[3] ^= s_box(k[12]);
702 *rc = f2(*rc);
703
704 for (cc = 4; cc < 16; cc += 4) {
705 k[cc + 0] ^= k[cc - 4];
706 k[cc + 1] ^= k[cc - 3];
707 k[cc + 2] ^= k[cc - 2];
708 k[cc + 3] ^= k[cc - 1];
709 }
710 }
711
712 /* Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
713
714 void aes_encrypt_128(const unsigned char in[N_BLOCK],
715 unsigned char out[N_BLOCK],
716 const unsigned char key[N_BLOCK],
717 unsigned char o_key[N_BLOCK]) {
718 uint_8t s1[N_BLOCK], r, rc = 1;
719
720 if (o_key != key) block_copy(o_key, key);
721 copy_and_key(s1, in, o_key);
722
723 for (r = 1; r < 10; ++r)
724 #if defined(VERSION_1)
725 {
726 mix_sub_columns(s1);
727 update_encrypt_key_128(o_key, &rc);
728 add_round_key(s1, o_key);
729 }
730 #else
731 {
732 uint_8t s2[N_BLOCK];
733 mix_sub_columns(s2, s1);
734 update_encrypt_key_128(o_key, &rc);
735 copy_and_key(s1, s2, o_key);
736 }
737 #endif
738
739 shift_sub_rows(s1);
740 update_encrypt_key_128(o_key, &rc);
741 copy_and_key(out, s1, o_key);
742 }
743
744 #endif
745
746 #if defined(AES_DEC_128_OTFK)
747
748 /* The 'on the fly' decryption key update for for 128 bit keys */
749
750 static void update_decrypt_key_128(uint_8t k[N_BLOCK], uint_8t* rc) {
751 uint_8t cc;
752
753 for (cc = 12; cc > 0; cc -= 4) {
754 k[cc + 0] ^= k[cc - 4];
755 k[cc + 1] ^= k[cc - 3];
756 k[cc + 2] ^= k[cc - 2];
757 k[cc + 3] ^= k[cc - 1];
758 }
759 *rc = d2(*rc);
760 k[0] ^= s_box(k[13]) ^ *rc;
761 k[1] ^= s_box(k[14]);
762 k[2] ^= s_box(k[15]);
763 k[3] ^= s_box(k[12]);
764 }
765
766 /* Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
767
768 void aes_decrypt_128(const unsigned char in[N_BLOCK],
769 unsigned char out[N_BLOCK],
770 const unsigned char key[N_BLOCK],
771 unsigned char o_key[N_BLOCK]) {
772 uint_8t s1[N_BLOCK], r, rc = 0x6c;
773 if (o_key != key) block_copy(o_key, key);
774
775 copy_and_key(s1, in, o_key);
776 inv_shift_sub_rows(s1);
777
778 for (r = 10; --r;)
779 #if defined(VERSION_1)
780 {
781 update_decrypt_key_128(o_key, &rc);
782 add_round_key(s1, o_key);
783 inv_mix_sub_columns(s1);
784 }
785 #else
786 {
787 uint_8t s2[N_BLOCK];
788 update_decrypt_key_128(o_key, &rc);
789 copy_and_key(s2, s1, o_key);
790 inv_mix_sub_columns(s1, s2);
791 }
792 #endif
793 update_decrypt_key_128(o_key, &rc);
794 copy_and_key(out, s1, o_key);
795 }
796
797 #endif
798
799 #if defined(AES_ENC_256_OTFK)
800
801 /* The 'on the fly' encryption key update for for 256 bit keys */
802
803 static void update_encrypt_key_256(uint_8t k[2 * N_BLOCK], uint_8t* rc) {
804 uint_8t cc;
805
806 k[0] ^= s_box(k[29]) ^ *rc;
807 k[1] ^= s_box(k[30]);
808 k[2] ^= s_box(k[31]);
809 k[3] ^= s_box(k[28]);
810 *rc = f2(*rc);
811
812 for (cc = 4; cc < 16; cc += 4) {
813 k[cc + 0] ^= k[cc - 4];
814 k[cc + 1] ^= k[cc - 3];
815 k[cc + 2] ^= k[cc - 2];
816 k[cc + 3] ^= k[cc - 1];
817 }
818
819 k[16] ^= s_box(k[12]);
820 k[17] ^= s_box(k[13]);
821 k[18] ^= s_box(k[14]);
822 k[19] ^= s_box(k[15]);
823
824 for (cc = 20; cc < 32; cc += 4) {
825 k[cc + 0] ^= k[cc - 4];
826 k[cc + 1] ^= k[cc - 3];
827 k[cc + 2] ^= k[cc - 2];
828 k[cc + 3] ^= k[cc - 1];
829 }
830 }
831
832 /* Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */
833
834 void aes_encrypt_256(const unsigned char in[N_BLOCK],
835 unsigned char out[N_BLOCK],
836 const unsigned char key[2 * N_BLOCK],
837 unsigned char o_key[2 * N_BLOCK]) {
838 uint_8t s1[N_BLOCK], r, rc = 1;
839 if (o_key != key) {
840 block_copy(o_key, key);
841 block_copy(o_key + 16, key + 16);
842 }
843 copy_and_key(s1, in, o_key);
844
845 for (r = 1; r < 14; ++r)
846 #if defined(VERSION_1)
847 {
848 mix_sub_columns(s1);
849 if (r & 1)
850 add_round_key(s1, o_key + 16);
851 else {
852 update_encrypt_key_256(o_key, &rc);
853 add_round_key(s1, o_key);
854 }
855 }
856 #else
857 {
858 uint_8t s2[N_BLOCK];
859 mix_sub_columns(s2, s1);
860 if (r & 1)
861 copy_and_key(s1, s2, o_key + 16);
862 else {
863 update_encrypt_key_256(o_key, &rc);
864 copy_and_key(s1, s2, o_key);
865 }
866 }
867 #endif
868
869 shift_sub_rows(s1);
870 update_encrypt_key_256(o_key, &rc);
871 copy_and_key(out, s1, o_key);
872 }
873
874 #endif
875
876 #if defined(AES_DEC_256_OTFK)
877
878 /* The 'on the fly' encryption key update for for 256 bit keys */
879
880 static void update_decrypt_key_256(uint_8t k[2 * N_BLOCK], uint_8t* rc) {
881 uint_8t cc;
882
883 for (cc = 28; cc > 16; cc -= 4) {
884 k[cc + 0] ^= k[cc - 4];
885 k[cc + 1] ^= k[cc - 3];
886 k[cc + 2] ^= k[cc - 2];
887 k[cc + 3] ^= k[cc - 1];
888 }
889
890 k[16] ^= s_box(k[12]);
891 k[17] ^= s_box(k[13]);
892 k[18] ^= s_box(k[14]);
893 k[19] ^= s_box(k[15]);
894
895 for (cc = 12; cc > 0; cc -= 4) {
896 k[cc + 0] ^= k[cc - 4];
897 k[cc + 1] ^= k[cc - 3];
898 k[cc + 2] ^= k[cc - 2];
899 k[cc + 3] ^= k[cc - 1];
900 }
901
902 *rc = d2(*rc);
903 k[0] ^= s_box(k[29]) ^ *rc;
904 k[1] ^= s_box(k[30]);
905 k[2] ^= s_box(k[31]);
906 k[3] ^= s_box(k[28]);
907 }
908
909 /* Decrypt a single block of 16 bytes with 'on the fly'
910 256 bit keying
911 */
912 void aes_decrypt_256(const unsigned char in[N_BLOCK],
913 unsigned char out[N_BLOCK],
914 const unsigned char key[2 * N_BLOCK],
915 unsigned char o_key[2 * N_BLOCK]) {
916 uint_8t s1[N_BLOCK], r, rc = 0x80;
917
918 if (o_key != key) {
919 block_copy(o_key, key);
920 block_copy(o_key + 16, key + 16);
921 }
922
923 copy_and_key(s1, in, o_key);
924 inv_shift_sub_rows(s1);
925
926 for (r = 14; --r;)
927 #if defined(VERSION_1)
928 {
929 if ((r & 1)) {
930 update_decrypt_key_256(o_key, &rc);
931 add_round_key(s1, o_key + 16);
932 } else
933 add_round_key(s1, o_key);
934 inv_mix_sub_columns(s1);
935 }
936 #else
937 {
938 uint_8t s2[N_BLOCK];
939 if ((r & 1)) {
940 update_decrypt_key_256(o_key, &rc);
941 copy_and_key(s2, s1, o_key + 16);
942 } else
943 copy_and_key(s2, s1, o_key);
944 inv_mix_sub_columns(s1, s2);
945 }
946 #endif
947 copy_and_key(out, s1, o_key);
948 }
949
950 #endif
951