• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env perl
2# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# AES for ARMv4
18
19# January 2007.
20#
21# Code uses single 1K S-box and is >2 times faster than code generated
22# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
23# allows to merge logical or arithmetic operation with shift or rotate
24# in one instruction and emit combined result every cycle. The module
25# is endian-neutral. The performance is ~42 cycles/byte for 128-bit
26# key [on single-issue Xscale PXA250 core].
27
28# May 2007.
29#
30# AES_set_[en|de]crypt_key is added.
31
32# July 2010.
33#
34# Rescheduling for dual-issue pipeline resulted in 12% improvement on
35# Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
36
37# February 2011.
38#
39# Profiler-assisted and platform-specific optimization resulted in 16%
40# improvement on Cortex A8 core and ~21.5 cycles per byte.
41
42$flavour = shift;
43if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
44else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
45
46if ($flavour && $flavour ne "void") {
47    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
48    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
49    ( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
50    die "can't locate arm-xlate.pl";
51
52    open OUT,"| \"$^X\" $xlate $flavour $output";
53    *STDOUT=*OUT;
54} else {
55    open OUT,">$output";
56    *STDOUT=*OUT;
57}
58
59$s0="r0";
60$s1="r1";
61$s2="r2";
62$s3="r3";
63$t1="r4";
64$t2="r5";
65$t3="r6";
66$i1="r7";
67$i2="r8";
68$i3="r9";
69
70$tbl="r10";
71$key="r11";
72$rounds="r12";
73
74$code=<<___;
75#ifndef __KERNEL__
76# include <openssl/arm_arch.h>
77#else
78# define __ARM_ARCH__ __LINUX_ARM_ARCH__
79#endif
80
81@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
82@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 AES
83@ instructions are in aesv8-armx.pl.)
84.arch  armv7-a
85
86.text
87#if defined(__thumb2__) && !defined(__APPLE__)
88.syntax	unified
89.thumb
90#else
91.code	32
92#undef __thumb2__
93#endif
94
95.type	AES_Te,%object
96.align	5
97AES_Te:
98.word	0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
99.word	0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
100.word	0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
101.word	0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
102.word	0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
103.word	0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
104.word	0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
105.word	0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
106.word	0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
107.word	0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
108.word	0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
109.word	0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
110.word	0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
111.word	0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
112.word	0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
113.word	0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
114.word	0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
115.word	0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
116.word	0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
117.word	0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
118.word	0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
119.word	0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
120.word	0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
121.word	0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
122.word	0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
123.word	0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
124.word	0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
125.word	0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
126.word	0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
127.word	0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
128.word	0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
129.word	0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
130.word	0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
131.word	0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
132.word	0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
133.word	0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
134.word	0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
135.word	0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
136.word	0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
137.word	0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
138.word	0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
139.word	0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
140.word	0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
141.word	0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
142.word	0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
143.word	0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
144.word	0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
145.word	0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
146.word	0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
147.word	0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
148.word	0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
149.word	0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
150.word	0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
151.word	0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
152.word	0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
153.word	0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
154.word	0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
155.word	0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
156.word	0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
157.word	0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
158.word	0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
159.word	0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
160.word	0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
161.word	0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
162@ Te4[256]
163.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
164.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
165.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
166.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
167.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
168.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
169.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
170.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
171.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
172.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
173.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
174.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
175.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
176.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
177.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
178.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
179.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
180.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
181.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
182.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
183.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
184.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
185.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
186.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
187.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
188.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
189.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
190.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
191.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
192.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
193.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
194.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
195@ rcon[]
196.word	0x01000000, 0x02000000, 0x04000000, 0x08000000
197.word	0x10000000, 0x20000000, 0x40000000, 0x80000000
198.word	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
199.size	AES_Te,.-AES_Te
200
201@ void aes_nohw_encrypt(const unsigned char *in, unsigned char *out,
202@ 		                  const AES_KEY *key) {
203.global aes_nohw_encrypt
204.type   aes_nohw_encrypt,%function
205.align	5
206aes_nohw_encrypt:
207#ifndef	__thumb2__
208	sub	r3,pc,#8		@ aes_nohw_encrypt
209#else
210	adr	r3,.
211#endif
212	stmdb   sp!,{r1,r4-r12,lr}
213#if defined(__thumb2__) || defined(__APPLE__)
214	adr	$tbl,AES_Te
215#else
216	sub	$tbl,r3,#aes_nohw_encrypt-AES_Te	@ Te
217#endif
218	mov	$rounds,r0		@ inp
219	mov	$key,r2
220#if __ARM_ARCH__<7
221	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
222	ldrb	$t1,[$rounds,#2]	@ manner...
223	ldrb	$t2,[$rounds,#1]
224	ldrb	$t3,[$rounds,#0]
225	orr	$s0,$s0,$t1,lsl#8
226	ldrb	$s1,[$rounds,#7]
227	orr	$s0,$s0,$t2,lsl#16
228	ldrb	$t1,[$rounds,#6]
229	orr	$s0,$s0,$t3,lsl#24
230	ldrb	$t2,[$rounds,#5]
231	ldrb	$t3,[$rounds,#4]
232	orr	$s1,$s1,$t1,lsl#8
233	ldrb	$s2,[$rounds,#11]
234	orr	$s1,$s1,$t2,lsl#16
235	ldrb	$t1,[$rounds,#10]
236	orr	$s1,$s1,$t3,lsl#24
237	ldrb	$t2,[$rounds,#9]
238	ldrb	$t3,[$rounds,#8]
239	orr	$s2,$s2,$t1,lsl#8
240	ldrb	$s3,[$rounds,#15]
241	orr	$s2,$s2,$t2,lsl#16
242	ldrb	$t1,[$rounds,#14]
243	orr	$s2,$s2,$t3,lsl#24
244	ldrb	$t2,[$rounds,#13]
245	ldrb	$t3,[$rounds,#12]
246	orr	$s3,$s3,$t1,lsl#8
247	orr	$s3,$s3,$t2,lsl#16
248	orr	$s3,$s3,$t3,lsl#24
249#else
250	ldr	$s0,[$rounds,#0]
251	ldr	$s1,[$rounds,#4]
252	ldr	$s2,[$rounds,#8]
253	ldr	$s3,[$rounds,#12]
254#ifdef __ARMEL__
255	rev	$s0,$s0
256	rev	$s1,$s1
257	rev	$s2,$s2
258	rev	$s3,$s3
259#endif
260#endif
261	bl	_armv4_AES_encrypt
262
263	ldr	$rounds,[sp],#4		@ pop out
264#if __ARM_ARCH__>=7
265#ifdef __ARMEL__
266	rev	$s0,$s0
267	rev	$s1,$s1
268	rev	$s2,$s2
269	rev	$s3,$s3
270#endif
271	str	$s0,[$rounds,#0]
272	str	$s1,[$rounds,#4]
273	str	$s2,[$rounds,#8]
274	str	$s3,[$rounds,#12]
275#else
276	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
277	mov	$t2,$s0,lsr#16		@ manner...
278	mov	$t3,$s0,lsr#8
279	strb	$t1,[$rounds,#0]
280	strb	$t2,[$rounds,#1]
281	mov	$t1,$s1,lsr#24
282	strb	$t3,[$rounds,#2]
283	mov	$t2,$s1,lsr#16
284	strb	$s0,[$rounds,#3]
285	mov	$t3,$s1,lsr#8
286	strb	$t1,[$rounds,#4]
287	strb	$t2,[$rounds,#5]
288	mov	$t1,$s2,lsr#24
289	strb	$t3,[$rounds,#6]
290	mov	$t2,$s2,lsr#16
291	strb	$s1,[$rounds,#7]
292	mov	$t3,$s2,lsr#8
293	strb	$t1,[$rounds,#8]
294	strb	$t2,[$rounds,#9]
295	mov	$t1,$s3,lsr#24
296	strb	$t3,[$rounds,#10]
297	mov	$t2,$s3,lsr#16
298	strb	$s2,[$rounds,#11]
299	mov	$t3,$s3,lsr#8
300	strb	$t1,[$rounds,#12]
301	strb	$t2,[$rounds,#13]
302	strb	$t3,[$rounds,#14]
303	strb	$s3,[$rounds,#15]
304#endif
305#if __ARM_ARCH__>=5
306	ldmia	sp!,{r4-r12,pc}
307#else
308	ldmia   sp!,{r4-r12,lr}
309	tst	lr,#1
310	moveq	pc,lr			@ be binary compatible with V4, yet
311	bx	lr			@ interoperable with Thumb ISA:-)
312#endif
313.size	aes_nohw_encrypt,.-aes_nohw_encrypt
314
315.type   _armv4_AES_encrypt,%function
316.align	2
317_armv4_AES_encrypt:
318	str	lr,[sp,#-4]!		@ push lr
319	ldmia	$key!,{$t1-$i1}
320	eor	$s0,$s0,$t1
321	ldr	$rounds,[$key,#240-16]
322	eor	$s1,$s1,$t2
323	eor	$s2,$s2,$t3
324	eor	$s3,$s3,$i1
325	sub	$rounds,$rounds,#1
326	mov	lr,#255
327
328	and	$i1,lr,$s0
329	and	$i2,lr,$s0,lsr#8
330	and	$i3,lr,$s0,lsr#16
331	mov	$s0,$s0,lsr#24
332.Lenc_loop:
333	ldr	$t1,[$tbl,$i1,lsl#2]	@ Te3[s0>>0]
334	and	$i1,lr,$s1,lsr#16	@ i0
335	ldr	$t2,[$tbl,$i2,lsl#2]	@ Te2[s0>>8]
336	and	$i2,lr,$s1
337	ldr	$t3,[$tbl,$i3,lsl#2]	@ Te1[s0>>16]
338	and	$i3,lr,$s1,lsr#8
339	ldr	$s0,[$tbl,$s0,lsl#2]	@ Te0[s0>>24]
340	mov	$s1,$s1,lsr#24
341
342	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te1[s1>>16]
343	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te3[s1>>0]
344	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te2[s1>>8]
345	eor	$s0,$s0,$i1,ror#8
346	ldr	$s1,[$tbl,$s1,lsl#2]	@ Te0[s1>>24]
347	and	$i1,lr,$s2,lsr#8	@ i0
348	eor	$t2,$t2,$i2,ror#8
349	and	$i2,lr,$s2,lsr#16	@ i1
350	eor	$t3,$t3,$i3,ror#8
351	and	$i3,lr,$s2
352	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te2[s2>>8]
353	eor	$s1,$s1,$t1,ror#24
354	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te1[s2>>16]
355	mov	$s2,$s2,lsr#24
356
357	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te3[s2>>0]
358	eor	$s0,$s0,$i1,ror#16
359	ldr	$s2,[$tbl,$s2,lsl#2]	@ Te0[s2>>24]
360	and	$i1,lr,$s3		@ i0
361	eor	$s1,$s1,$i2,ror#8
362	and	$i2,lr,$s3,lsr#8	@ i1
363	eor	$t3,$t3,$i3,ror#16
364	and	$i3,lr,$s3,lsr#16	@ i2
365	ldr	$i1,[$tbl,$i1,lsl#2]	@ Te3[s3>>0]
366	eor	$s2,$s2,$t2,ror#16
367	ldr	$i2,[$tbl,$i2,lsl#2]	@ Te2[s3>>8]
368	mov	$s3,$s3,lsr#24
369
370	ldr	$i3,[$tbl,$i3,lsl#2]	@ Te1[s3>>16]
371	eor	$s0,$s0,$i1,ror#24
372	ldr	$i1,[$key],#16
373	eor	$s1,$s1,$i2,ror#16
374	ldr	$s3,[$tbl,$s3,lsl#2]	@ Te0[s3>>24]
375	eor	$s2,$s2,$i3,ror#8
376	ldr	$t1,[$key,#-12]
377	eor	$s3,$s3,$t3,ror#8
378
379	ldr	$t2,[$key,#-8]
380	eor	$s0,$s0,$i1
381	ldr	$t3,[$key,#-4]
382	and	$i1,lr,$s0
383	eor	$s1,$s1,$t1
384	and	$i2,lr,$s0,lsr#8
385	eor	$s2,$s2,$t2
386	and	$i3,lr,$s0,lsr#16
387	eor	$s3,$s3,$t3
388	mov	$s0,$s0,lsr#24
389
390	subs	$rounds,$rounds,#1
391	bne	.Lenc_loop
392
393	add	$tbl,$tbl,#2
394
395	ldrb	$t1,[$tbl,$i1,lsl#2]	@ Te4[s0>>0]
396	and	$i1,lr,$s1,lsr#16	@ i0
397	ldrb	$t2,[$tbl,$i2,lsl#2]	@ Te4[s0>>8]
398	and	$i2,lr,$s1
399	ldrb	$t3,[$tbl,$i3,lsl#2]	@ Te4[s0>>16]
400	and	$i3,lr,$s1,lsr#8
401	ldrb	$s0,[$tbl,$s0,lsl#2]	@ Te4[s0>>24]
402	mov	$s1,$s1,lsr#24
403
404	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s1>>16]
405	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s1>>0]
406	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s1>>8]
407	eor	$s0,$i1,$s0,lsl#8
408	ldrb	$s1,[$tbl,$s1,lsl#2]	@ Te4[s1>>24]
409	and	$i1,lr,$s2,lsr#8	@ i0
410	eor	$t2,$i2,$t2,lsl#8
411	and	$i2,lr,$s2,lsr#16	@ i1
412	eor	$t3,$i3,$t3,lsl#8
413	and	$i3,lr,$s2
414	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s2>>8]
415	eor	$s1,$t1,$s1,lsl#24
416	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s2>>16]
417	mov	$s2,$s2,lsr#24
418
419	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s2>>0]
420	eor	$s0,$i1,$s0,lsl#8
421	ldrb	$s2,[$tbl,$s2,lsl#2]	@ Te4[s2>>24]
422	and	$i1,lr,$s3		@ i0
423	eor	$s1,$s1,$i2,lsl#16
424	and	$i2,lr,$s3,lsr#8	@ i1
425	eor	$t3,$i3,$t3,lsl#8
426	and	$i3,lr,$s3,lsr#16	@ i2
427	ldrb	$i1,[$tbl,$i1,lsl#2]	@ Te4[s3>>0]
428	eor	$s2,$t2,$s2,lsl#24
429	ldrb	$i2,[$tbl,$i2,lsl#2]	@ Te4[s3>>8]
430	mov	$s3,$s3,lsr#24
431
432	ldrb	$i3,[$tbl,$i3,lsl#2]	@ Te4[s3>>16]
433	eor	$s0,$i1,$s0,lsl#8
434	ldr	$i1,[$key,#0]
435	ldrb	$s3,[$tbl,$s3,lsl#2]	@ Te4[s3>>24]
436	eor	$s1,$s1,$i2,lsl#8
437	ldr	$t1,[$key,#4]
438	eor	$s2,$s2,$i3,lsl#16
439	ldr	$t2,[$key,#8]
440	eor	$s3,$t3,$s3,lsl#24
441	ldr	$t3,[$key,#12]
442
443	eor	$s0,$s0,$i1
444	eor	$s1,$s1,$t1
445	eor	$s2,$s2,$t2
446	eor	$s3,$s3,$t3
447
448	sub	$tbl,$tbl,#2
449	ldr	pc,[sp],#4		@ pop and return
450.size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
451
452.global aes_nohw_set_encrypt_key
453.type   aes_nohw_set_encrypt_key,%function
454.align	5
455aes_nohw_set_encrypt_key:
456_armv4_AES_set_encrypt_key:
457#ifndef	__thumb2__
458	sub	r3,pc,#8		@ aes_nohw_set_encrypt_key
459#else
460	adr	r3,.
461#endif
462	teq	r0,#0
463#ifdef	__thumb2__
464	itt	eq			@ Thumb2 thing, sanity check in ARM
465#endif
466	moveq	r0,#-1
467	beq	.Labrt
468	teq	r2,#0
469#ifdef	__thumb2__
470	itt	eq			@ Thumb2 thing, sanity check in ARM
471#endif
472	moveq	r0,#-1
473	beq	.Labrt
474
475	teq	r1,#128
476	beq	.Lok
477	teq	r1,#192
478	beq	.Lok
479	teq	r1,#256
480#ifdef	__thumb2__
481	itt	ne			@ Thumb2 thing, sanity check in ARM
482#endif
483	movne	r0,#-1
484	bne	.Labrt
485
486.Lok:	stmdb   sp!,{r4-r12,lr}
487	mov	$rounds,r0		@ inp
488	mov	lr,r1			@ bits
489	mov	$key,r2			@ key
490
491#if defined(__thumb2__) || defined(__APPLE__)
492	adr	$tbl,AES_Te+1024				@ Te4
493#else
494	sub	$tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024	@ Te4
495#endif
496
497#if __ARM_ARCH__<7
498	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
499	ldrb	$t1,[$rounds,#2]	@ manner...
500	ldrb	$t2,[$rounds,#1]
501	ldrb	$t3,[$rounds,#0]
502	orr	$s0,$s0,$t1,lsl#8
503	ldrb	$s1,[$rounds,#7]
504	orr	$s0,$s0,$t2,lsl#16
505	ldrb	$t1,[$rounds,#6]
506	orr	$s0,$s0,$t3,lsl#24
507	ldrb	$t2,[$rounds,#5]
508	ldrb	$t3,[$rounds,#4]
509	orr	$s1,$s1,$t1,lsl#8
510	ldrb	$s2,[$rounds,#11]
511	orr	$s1,$s1,$t2,lsl#16
512	ldrb	$t1,[$rounds,#10]
513	orr	$s1,$s1,$t3,lsl#24
514	ldrb	$t2,[$rounds,#9]
515	ldrb	$t3,[$rounds,#8]
516	orr	$s2,$s2,$t1,lsl#8
517	ldrb	$s3,[$rounds,#15]
518	orr	$s2,$s2,$t2,lsl#16
519	ldrb	$t1,[$rounds,#14]
520	orr	$s2,$s2,$t3,lsl#24
521	ldrb	$t2,[$rounds,#13]
522	ldrb	$t3,[$rounds,#12]
523	orr	$s3,$s3,$t1,lsl#8
524	str	$s0,[$key],#16
525	orr	$s3,$s3,$t2,lsl#16
526	str	$s1,[$key,#-12]
527	orr	$s3,$s3,$t3,lsl#24
528	str	$s2,[$key,#-8]
529	str	$s3,[$key,#-4]
530#else
531	ldr	$s0,[$rounds,#0]
532	ldr	$s1,[$rounds,#4]
533	ldr	$s2,[$rounds,#8]
534	ldr	$s3,[$rounds,#12]
535#ifdef __ARMEL__
536	rev	$s0,$s0
537	rev	$s1,$s1
538	rev	$s2,$s2
539	rev	$s3,$s3
540#endif
541	str	$s0,[$key],#16
542	str	$s1,[$key,#-12]
543	str	$s2,[$key,#-8]
544	str	$s3,[$key,#-4]
545#endif
546
547	teq	lr,#128
548	bne	.Lnot128
549	mov	$rounds,#10
550	str	$rounds,[$key,#240-16]
551	add	$t3,$tbl,#256			@ rcon
552	mov	lr,#255
553
554.L128_loop:
555	and	$t2,lr,$s3,lsr#24
556	and	$i1,lr,$s3,lsr#16
557	ldrb	$t2,[$tbl,$t2]
558	and	$i2,lr,$s3,lsr#8
559	ldrb	$i1,[$tbl,$i1]
560	and	$i3,lr,$s3
561	ldrb	$i2,[$tbl,$i2]
562	orr	$t2,$t2,$i1,lsl#24
563	ldrb	$i3,[$tbl,$i3]
564	orr	$t2,$t2,$i2,lsl#16
565	ldr	$t1,[$t3],#4			@ rcon[i++]
566	orr	$t2,$t2,$i3,lsl#8
567	eor	$t2,$t2,$t1
568	eor	$s0,$s0,$t2			@ rk[4]=rk[0]^...
569	eor	$s1,$s1,$s0			@ rk[5]=rk[1]^rk[4]
570	str	$s0,[$key],#16
571	eor	$s2,$s2,$s1			@ rk[6]=rk[2]^rk[5]
572	str	$s1,[$key,#-12]
573	eor	$s3,$s3,$s2			@ rk[7]=rk[3]^rk[6]
574	str	$s2,[$key,#-8]
575	subs	$rounds,$rounds,#1
576	str	$s3,[$key,#-4]
577	bne	.L128_loop
578	sub	r2,$key,#176
579	b	.Ldone
580
581.Lnot128:
582#if __ARM_ARCH__<7
583	ldrb	$i2,[$rounds,#19]
584	ldrb	$t1,[$rounds,#18]
585	ldrb	$t2,[$rounds,#17]
586	ldrb	$t3,[$rounds,#16]
587	orr	$i2,$i2,$t1,lsl#8
588	ldrb	$i3,[$rounds,#23]
589	orr	$i2,$i2,$t2,lsl#16
590	ldrb	$t1,[$rounds,#22]
591	orr	$i2,$i2,$t3,lsl#24
592	ldrb	$t2,[$rounds,#21]
593	ldrb	$t3,[$rounds,#20]
594	orr	$i3,$i3,$t1,lsl#8
595	orr	$i3,$i3,$t2,lsl#16
596	str	$i2,[$key],#8
597	orr	$i3,$i3,$t3,lsl#24
598	str	$i3,[$key,#-4]
599#else
600	ldr	$i2,[$rounds,#16]
601	ldr	$i3,[$rounds,#20]
602#ifdef __ARMEL__
603	rev	$i2,$i2
604	rev	$i3,$i3
605#endif
606	str	$i2,[$key],#8
607	str	$i3,[$key,#-4]
608#endif
609
610	teq	lr,#192
611	bne	.Lnot192
612	mov	$rounds,#12
613	str	$rounds,[$key,#240-24]
614	add	$t3,$tbl,#256			@ rcon
615	mov	lr,#255
616	mov	$rounds,#8
617
618.L192_loop:
619	and	$t2,lr,$i3,lsr#24
620	and	$i1,lr,$i3,lsr#16
621	ldrb	$t2,[$tbl,$t2]
622	and	$i2,lr,$i3,lsr#8
623	ldrb	$i1,[$tbl,$i1]
624	and	$i3,lr,$i3
625	ldrb	$i2,[$tbl,$i2]
626	orr	$t2,$t2,$i1,lsl#24
627	ldrb	$i3,[$tbl,$i3]
628	orr	$t2,$t2,$i2,lsl#16
629	ldr	$t1,[$t3],#4			@ rcon[i++]
630	orr	$t2,$t2,$i3,lsl#8
631	eor	$i3,$t2,$t1
632	eor	$s0,$s0,$i3			@ rk[6]=rk[0]^...
633	eor	$s1,$s1,$s0			@ rk[7]=rk[1]^rk[6]
634	str	$s0,[$key],#24
635	eor	$s2,$s2,$s1			@ rk[8]=rk[2]^rk[7]
636	str	$s1,[$key,#-20]
637	eor	$s3,$s3,$s2			@ rk[9]=rk[3]^rk[8]
638	str	$s2,[$key,#-16]
639	subs	$rounds,$rounds,#1
640	str	$s3,[$key,#-12]
641#ifdef	__thumb2__
642	itt	eq				@ Thumb2 thing, sanity check in ARM
643#endif
644	subeq	r2,$key,#216
645	beq	.Ldone
646
647	ldr	$i1,[$key,#-32]
648	ldr	$i2,[$key,#-28]
649	eor	$i1,$i1,$s3			@ rk[10]=rk[4]^rk[9]
650	eor	$i3,$i2,$i1			@ rk[11]=rk[5]^rk[10]
651	str	$i1,[$key,#-8]
652	str	$i3,[$key,#-4]
653	b	.L192_loop
654
655.Lnot192:
656#if __ARM_ARCH__<7
657	ldrb	$i2,[$rounds,#27]
658	ldrb	$t1,[$rounds,#26]
659	ldrb	$t2,[$rounds,#25]
660	ldrb	$t3,[$rounds,#24]
661	orr	$i2,$i2,$t1,lsl#8
662	ldrb	$i3,[$rounds,#31]
663	orr	$i2,$i2,$t2,lsl#16
664	ldrb	$t1,[$rounds,#30]
665	orr	$i2,$i2,$t3,lsl#24
666	ldrb	$t2,[$rounds,#29]
667	ldrb	$t3,[$rounds,#28]
668	orr	$i3,$i3,$t1,lsl#8
669	orr	$i3,$i3,$t2,lsl#16
670	str	$i2,[$key],#8
671	orr	$i3,$i3,$t3,lsl#24
672	str	$i3,[$key,#-4]
673#else
674	ldr	$i2,[$rounds,#24]
675	ldr	$i3,[$rounds,#28]
676#ifdef __ARMEL__
677	rev	$i2,$i2
678	rev	$i3,$i3
679#endif
680	str	$i2,[$key],#8
681	str	$i3,[$key,#-4]
682#endif
683
684	mov	$rounds,#14
685	str	$rounds,[$key,#240-32]
686	add	$t3,$tbl,#256			@ rcon
687	mov	lr,#255
688	mov	$rounds,#7
689
690.L256_loop:
691	and	$t2,lr,$i3,lsr#24
692	and	$i1,lr,$i3,lsr#16
693	ldrb	$t2,[$tbl,$t2]
694	and	$i2,lr,$i3,lsr#8
695	ldrb	$i1,[$tbl,$i1]
696	and	$i3,lr,$i3
697	ldrb	$i2,[$tbl,$i2]
698	orr	$t2,$t2,$i1,lsl#24
699	ldrb	$i3,[$tbl,$i3]
700	orr	$t2,$t2,$i2,lsl#16
701	ldr	$t1,[$t3],#4			@ rcon[i++]
702	orr	$t2,$t2,$i3,lsl#8
703	eor	$i3,$t2,$t1
704	eor	$s0,$s0,$i3			@ rk[8]=rk[0]^...
705	eor	$s1,$s1,$s0			@ rk[9]=rk[1]^rk[8]
706	str	$s0,[$key],#32
707	eor	$s2,$s2,$s1			@ rk[10]=rk[2]^rk[9]
708	str	$s1,[$key,#-28]
709	eor	$s3,$s3,$s2			@ rk[11]=rk[3]^rk[10]
710	str	$s2,[$key,#-24]
711	subs	$rounds,$rounds,#1
712	str	$s3,[$key,#-20]
713#ifdef	__thumb2__
714	itt	eq				@ Thumb2 thing, sanity check in ARM
715#endif
716	subeq	r2,$key,#256
717	beq	.Ldone
718
719	and	$t2,lr,$s3
720	and	$i1,lr,$s3,lsr#8
721	ldrb	$t2,[$tbl,$t2]
722	and	$i2,lr,$s3,lsr#16
723	ldrb	$i1,[$tbl,$i1]
724	and	$i3,lr,$s3,lsr#24
725	ldrb	$i2,[$tbl,$i2]
726	orr	$t2,$t2,$i1,lsl#8
727	ldrb	$i3,[$tbl,$i3]
728	orr	$t2,$t2,$i2,lsl#16
729	ldr	$t1,[$key,#-48]
730	orr	$t2,$t2,$i3,lsl#24
731
732	ldr	$i1,[$key,#-44]
733	ldr	$i2,[$key,#-40]
734	eor	$t1,$t1,$t2			@ rk[12]=rk[4]^...
735	ldr	$i3,[$key,#-36]
736	eor	$i1,$i1,$t1			@ rk[13]=rk[5]^rk[12]
737	str	$t1,[$key,#-16]
738	eor	$i2,$i2,$i1			@ rk[14]=rk[6]^rk[13]
739	str	$i1,[$key,#-12]
740	eor	$i3,$i3,$i2			@ rk[15]=rk[7]^rk[14]
741	str	$i2,[$key,#-8]
742	str	$i3,[$key,#-4]
743	b	.L256_loop
744
745.align	2
746.Ldone:	mov	r0,#0
747	ldmia   sp!,{r4-r12,lr}
748.Labrt:
749#if __ARM_ARCH__>=5
750	ret				@ bx lr
751#else
752	tst	lr,#1
753	moveq	pc,lr			@ be binary compatible with V4, yet
754	bx	lr			@ interoperable with Thumb ISA:-)
755#endif
756.size	aes_nohw_set_encrypt_key,.-aes_nohw_set_encrypt_key
757
758.global aes_nohw_set_decrypt_key
759.type   aes_nohw_set_decrypt_key,%function
760.align	5
761aes_nohw_set_decrypt_key:
762	str	lr,[sp,#-4]!            @ push lr
763	bl	_armv4_AES_set_encrypt_key
764	teq	r0,#0
765	ldr	lr,[sp],#4              @ pop lr
766	bne	.Labrt
767
768	mov	r0,r2			@ aes_nohw_set_encrypt_key preserves r2,
769	mov	r1,r2			@ which is AES_KEY *key
770	b	_armv4_AES_set_enc2dec_key
771.size	aes_nohw_set_decrypt_key,.-aes_nohw_set_decrypt_key
772
773@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
774.global	AES_set_enc2dec_key
775.type	AES_set_enc2dec_key,%function
776.align	5
777AES_set_enc2dec_key:
778_armv4_AES_set_enc2dec_key:
779	stmdb   sp!,{r4-r12,lr}
780
781	ldr	$rounds,[r0,#240]
782	mov	$i1,r0			@ input
783	add	$i2,r0,$rounds,lsl#4
784	mov	$key,r1			@ output
785	add	$tbl,r1,$rounds,lsl#4
786	str	$rounds,[r1,#240]
787
788.Linv:	ldr	$s0,[$i1],#16
789	ldr	$s1,[$i1,#-12]
790	ldr	$s2,[$i1,#-8]
791	ldr	$s3,[$i1,#-4]
792	ldr	$t1,[$i2],#-16
793	ldr	$t2,[$i2,#16+4]
794	ldr	$t3,[$i2,#16+8]
795	ldr	$i3,[$i2,#16+12]
796	str	$s0,[$tbl],#-16
797	str	$s1,[$tbl,#16+4]
798	str	$s2,[$tbl,#16+8]
799	str	$s3,[$tbl,#16+12]
800	str	$t1,[$key],#16
801	str	$t2,[$key,#-12]
802	str	$t3,[$key,#-8]
803	str	$i3,[$key,#-4]
804	teq	$i1,$i2
805	bne	.Linv
806
807	ldr	$s0,[$i1]
808	ldr	$s1,[$i1,#4]
809	ldr	$s2,[$i1,#8]
810	ldr	$s3,[$i1,#12]
811	str	$s0,[$key]
812	str	$s1,[$key,#4]
813	str	$s2,[$key,#8]
814	str	$s3,[$key,#12]
815	sub	$key,$key,$rounds,lsl#3
816___
817$mask80=$i1;
818$mask1b=$i2;
819$mask7f=$i3;
820$code.=<<___;
821	ldr	$s0,[$key,#16]!		@ prefetch tp1
822	mov	$mask80,#0x80
823	mov	$mask1b,#0x1b
824	orr	$mask80,$mask80,#0x8000
825	orr	$mask1b,$mask1b,#0x1b00
826	orr	$mask80,$mask80,$mask80,lsl#16
827	orr	$mask1b,$mask1b,$mask1b,lsl#16
828	sub	$rounds,$rounds,#1
829	mvn	$mask7f,$mask80
830	mov	$rounds,$rounds,lsl#2	@ (rounds-1)*4
831
832.Lmix:	and	$t1,$s0,$mask80
833	and	$s1,$s0,$mask7f
834	sub	$t1,$t1,$t1,lsr#7
835	and	$t1,$t1,$mask1b
836	eor	$s1,$t1,$s1,lsl#1	@ tp2
837
838	and	$t1,$s1,$mask80
839	and	$s2,$s1,$mask7f
840	sub	$t1,$t1,$t1,lsr#7
841	and	$t1,$t1,$mask1b
842	eor	$s2,$t1,$s2,lsl#1	@ tp4
843
844	and	$t1,$s2,$mask80
845	and	$s3,$s2,$mask7f
846	sub	$t1,$t1,$t1,lsr#7
847	and	$t1,$t1,$mask1b
848	eor	$s3,$t1,$s3,lsl#1	@ tp8
849
850	eor	$t1,$s1,$s2
851	eor	$t2,$s0,$s3		@ tp9
852	eor	$t1,$t1,$s3		@ tpe
853	eor	$t1,$t1,$s1,ror#24
854	eor	$t1,$t1,$t2,ror#24	@ ^= ROTATE(tpb=tp9^tp2,8)
855	eor	$t1,$t1,$s2,ror#16
856	eor	$t1,$t1,$t2,ror#16	@ ^= ROTATE(tpd=tp9^tp4,16)
857	eor	$t1,$t1,$t2,ror#8	@ ^= ROTATE(tp9,24)
858
859	ldr	$s0,[$key,#4]		@ prefetch tp1
860	str	$t1,[$key],#4
861	subs	$rounds,$rounds,#1
862	bne	.Lmix
863
864	mov	r0,#0
865#if __ARM_ARCH__>=5
866	ldmia	sp!,{r4-r12,pc}
867#else
868	ldmia   sp!,{r4-r12,lr}
869	tst	lr,#1
870	moveq	pc,lr			@ be binary compatible with V4, yet
871	bx	lr			@ interoperable with Thumb ISA:-)
872#endif
873.size	AES_set_enc2dec_key,.-AES_set_enc2dec_key
874
875.type	AES_Td,%object
876.align	5
877AES_Td:
878.word	0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
879.word	0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
880.word	0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
881.word	0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
882.word	0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
883.word	0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
884.word	0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
885.word	0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
886.word	0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
887.word	0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
888.word	0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
889.word	0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
890.word	0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
891.word	0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
892.word	0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
893.word	0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
894.word	0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
895.word	0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
896.word	0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
897.word	0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
898.word	0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
899.word	0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
900.word	0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
901.word	0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
902.word	0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
903.word	0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
904.word	0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
905.word	0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
906.word	0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
907.word	0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
908.word	0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
909.word	0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
910.word	0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
911.word	0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
912.word	0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
913.word	0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
914.word	0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
915.word	0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
916.word	0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
917.word	0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
918.word	0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
919.word	0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
920.word	0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
921.word	0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
922.word	0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
923.word	0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
924.word	0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
925.word	0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
926.word	0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
927.word	0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
928.word	0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
929.word	0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
930.word	0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
931.word	0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
932.word	0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
933.word	0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
934.word	0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
935.word	0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
936.word	0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
937.word	0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
938.word	0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
939.word	0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
940.word	0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
941.word	0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
942@ Td4[256]
943.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
944.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
945.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
946.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
947.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
948.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
949.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
950.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
951.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
952.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
953.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
954.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
955.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
956.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
957.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
958.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
959.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
960.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
961.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
962.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
963.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
964.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
965.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
966.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
967.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
968.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
969.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
970.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
971.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
972.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
973.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
974.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
975.size	AES_Td,.-AES_Td
976
977@ void aes_nohw_decrypt(const unsigned char *in, unsigned char *out,
978@ 		                  const AES_KEY *key) {
979.global aes_nohw_decrypt
980.type   aes_nohw_decrypt,%function
981.align	5
982aes_nohw_decrypt:
983#ifndef	__thumb2__
984	sub	r3,pc,#8		@ aes_nohw_decrypt
985#else
986	adr	r3,.
987#endif
988	stmdb   sp!,{r1,r4-r12,lr}
989#if defined(__thumb2__) || defined(__APPLE__)
990	adr	$tbl,AES_Td
991#else
992	sub	$tbl,r3,#aes_nohw_decrypt-AES_Td	@ Td
993#endif
994	mov	$rounds,r0		@ inp
995	mov	$key,r2
996#if __ARM_ARCH__<7
997	ldrb	$s0,[$rounds,#3]	@ load input data in endian-neutral
998	ldrb	$t1,[$rounds,#2]	@ manner...
999	ldrb	$t2,[$rounds,#1]
1000	ldrb	$t3,[$rounds,#0]
1001	orr	$s0,$s0,$t1,lsl#8
1002	ldrb	$s1,[$rounds,#7]
1003	orr	$s0,$s0,$t2,lsl#16
1004	ldrb	$t1,[$rounds,#6]
1005	orr	$s0,$s0,$t3,lsl#24
1006	ldrb	$t2,[$rounds,#5]
1007	ldrb	$t3,[$rounds,#4]
1008	orr	$s1,$s1,$t1,lsl#8
1009	ldrb	$s2,[$rounds,#11]
1010	orr	$s1,$s1,$t2,lsl#16
1011	ldrb	$t1,[$rounds,#10]
1012	orr	$s1,$s1,$t3,lsl#24
1013	ldrb	$t2,[$rounds,#9]
1014	ldrb	$t3,[$rounds,#8]
1015	orr	$s2,$s2,$t1,lsl#8
1016	ldrb	$s3,[$rounds,#15]
1017	orr	$s2,$s2,$t2,lsl#16
1018	ldrb	$t1,[$rounds,#14]
1019	orr	$s2,$s2,$t3,lsl#24
1020	ldrb	$t2,[$rounds,#13]
1021	ldrb	$t3,[$rounds,#12]
1022	orr	$s3,$s3,$t1,lsl#8
1023	orr	$s3,$s3,$t2,lsl#16
1024	orr	$s3,$s3,$t3,lsl#24
1025#else
1026	ldr	$s0,[$rounds,#0]
1027	ldr	$s1,[$rounds,#4]
1028	ldr	$s2,[$rounds,#8]
1029	ldr	$s3,[$rounds,#12]
1030#ifdef __ARMEL__
1031	rev	$s0,$s0
1032	rev	$s1,$s1
1033	rev	$s2,$s2
1034	rev	$s3,$s3
1035#endif
1036#endif
1037	bl	_armv4_AES_decrypt
1038
1039	ldr	$rounds,[sp],#4		@ pop out
1040#if __ARM_ARCH__>=7
1041#ifdef __ARMEL__
1042	rev	$s0,$s0
1043	rev	$s1,$s1
1044	rev	$s2,$s2
1045	rev	$s3,$s3
1046#endif
1047	str	$s0,[$rounds,#0]
1048	str	$s1,[$rounds,#4]
1049	str	$s2,[$rounds,#8]
1050	str	$s3,[$rounds,#12]
1051#else
1052	mov	$t1,$s0,lsr#24		@ write output in endian-neutral
1053	mov	$t2,$s0,lsr#16		@ manner...
1054	mov	$t3,$s0,lsr#8
1055	strb	$t1,[$rounds,#0]
1056	strb	$t2,[$rounds,#1]
1057	mov	$t1,$s1,lsr#24
1058	strb	$t3,[$rounds,#2]
1059	mov	$t2,$s1,lsr#16
1060	strb	$s0,[$rounds,#3]
1061	mov	$t3,$s1,lsr#8
1062	strb	$t1,[$rounds,#4]
1063	strb	$t2,[$rounds,#5]
1064	mov	$t1,$s2,lsr#24
1065	strb	$t3,[$rounds,#6]
1066	mov	$t2,$s2,lsr#16
1067	strb	$s1,[$rounds,#7]
1068	mov	$t3,$s2,lsr#8
1069	strb	$t1,[$rounds,#8]
1070	strb	$t2,[$rounds,#9]
1071	mov	$t1,$s3,lsr#24
1072	strb	$t3,[$rounds,#10]
1073	mov	$t2,$s3,lsr#16
1074	strb	$s2,[$rounds,#11]
1075	mov	$t3,$s3,lsr#8
1076	strb	$t1,[$rounds,#12]
1077	strb	$t2,[$rounds,#13]
1078	strb	$t3,[$rounds,#14]
1079	strb	$s3,[$rounds,#15]
1080#endif
1081#if __ARM_ARCH__>=5
1082	ldmia	sp!,{r4-r12,pc}
1083#else
1084	ldmia   sp!,{r4-r12,lr}
1085	tst	lr,#1
1086	moveq	pc,lr			@ be binary compatible with V4, yet
1087	bx	lr			@ interoperable with Thumb ISA:-)
1088#endif
1089.size	aes_nohw_decrypt,.-aes_nohw_decrypt
1090
1091.type   _armv4_AES_decrypt,%function
1092.align	2
1093_armv4_AES_decrypt:
1094	str	lr,[sp,#-4]!		@ push lr
1095	ldmia	$key!,{$t1-$i1}
1096	eor	$s0,$s0,$t1
1097	ldr	$rounds,[$key,#240-16]
1098	eor	$s1,$s1,$t2
1099	eor	$s2,$s2,$t3
1100	eor	$s3,$s3,$i1
1101	sub	$rounds,$rounds,#1
1102	mov	lr,#255
1103
1104	and	$i1,lr,$s0,lsr#16
1105	and	$i2,lr,$s0,lsr#8
1106	and	$i3,lr,$s0
1107	mov	$s0,$s0,lsr#24
1108.Ldec_loop:
1109	ldr	$t1,[$tbl,$i1,lsl#2]	@ Td1[s0>>16]
1110	and	$i1,lr,$s1		@ i0
1111	ldr	$t2,[$tbl,$i2,lsl#2]	@ Td2[s0>>8]
1112	and	$i2,lr,$s1,lsr#16
1113	ldr	$t3,[$tbl,$i3,lsl#2]	@ Td3[s0>>0]
1114	and	$i3,lr,$s1,lsr#8
1115	ldr	$s0,[$tbl,$s0,lsl#2]	@ Td0[s0>>24]
1116	mov	$s1,$s1,lsr#24
1117
1118	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td3[s1>>0]
1119	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td1[s1>>16]
1120	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td2[s1>>8]
1121	eor	$s0,$s0,$i1,ror#24
1122	ldr	$s1,[$tbl,$s1,lsl#2]	@ Td0[s1>>24]
1123	and	$i1,lr,$s2,lsr#8	@ i0
1124	eor	$t2,$i2,$t2,ror#8
1125	and	$i2,lr,$s2		@ i1
1126	eor	$t3,$i3,$t3,ror#8
1127	and	$i3,lr,$s2,lsr#16
1128	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td2[s2>>8]
1129	eor	$s1,$s1,$t1,ror#8
1130	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td3[s2>>0]
1131	mov	$s2,$s2,lsr#24
1132
1133	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td1[s2>>16]
1134	eor	$s0,$s0,$i1,ror#16
1135	ldr	$s2,[$tbl,$s2,lsl#2]	@ Td0[s2>>24]
1136	and	$i1,lr,$s3,lsr#16	@ i0
1137	eor	$s1,$s1,$i2,ror#24
1138	and	$i2,lr,$s3,lsr#8	@ i1
1139	eor	$t3,$i3,$t3,ror#8
1140	and	$i3,lr,$s3		@ i2
1141	ldr	$i1,[$tbl,$i1,lsl#2]	@ Td1[s3>>16]
1142	eor	$s2,$s2,$t2,ror#8
1143	ldr	$i2,[$tbl,$i2,lsl#2]	@ Td2[s3>>8]
1144	mov	$s3,$s3,lsr#24
1145
1146	ldr	$i3,[$tbl,$i3,lsl#2]	@ Td3[s3>>0]
1147	eor	$s0,$s0,$i1,ror#8
1148	ldr	$i1,[$key],#16
1149	eor	$s1,$s1,$i2,ror#16
1150	ldr	$s3,[$tbl,$s3,lsl#2]	@ Td0[s3>>24]
1151	eor	$s2,$s2,$i3,ror#24
1152
1153	ldr	$t1,[$key,#-12]
1154	eor	$s0,$s0,$i1
1155	ldr	$t2,[$key,#-8]
1156	eor	$s3,$s3,$t3,ror#8
1157	ldr	$t3,[$key,#-4]
1158	and	$i1,lr,$s0,lsr#16
1159	eor	$s1,$s1,$t1
1160	and	$i2,lr,$s0,lsr#8
1161	eor	$s2,$s2,$t2
1162	and	$i3,lr,$s0
1163	eor	$s3,$s3,$t3
1164	mov	$s0,$s0,lsr#24
1165
1166	subs	$rounds,$rounds,#1
1167	bne	.Ldec_loop
1168
1169	add	$tbl,$tbl,#1024
1170
1171	ldr	$t2,[$tbl,#0]		@ prefetch Td4
1172	ldr	$t3,[$tbl,#32]
1173	ldr	$t1,[$tbl,#64]
1174	ldr	$t2,[$tbl,#96]
1175	ldr	$t3,[$tbl,#128]
1176	ldr	$t1,[$tbl,#160]
1177	ldr	$t2,[$tbl,#192]
1178	ldr	$t3,[$tbl,#224]
1179
1180	ldrb	$s0,[$tbl,$s0]		@ Td4[s0>>24]
1181	ldrb	$t1,[$tbl,$i1]		@ Td4[s0>>16]
1182	and	$i1,lr,$s1		@ i0
1183	ldrb	$t2,[$tbl,$i2]		@ Td4[s0>>8]
1184	and	$i2,lr,$s1,lsr#16
1185	ldrb	$t3,[$tbl,$i3]		@ Td4[s0>>0]
1186	and	$i3,lr,$s1,lsr#8
1187
1188	add	$s1,$tbl,$s1,lsr#24
1189	ldrb	$i1,[$tbl,$i1]		@ Td4[s1>>0]
1190	ldrb	$s1,[$s1]		@ Td4[s1>>24]
1191	ldrb	$i2,[$tbl,$i2]		@ Td4[s1>>16]
1192	eor	$s0,$i1,$s0,lsl#24
1193	ldrb	$i3,[$tbl,$i3]		@ Td4[s1>>8]
1194	eor	$s1,$t1,$s1,lsl#8
1195	and	$i1,lr,$s2,lsr#8	@ i0
1196	eor	$t2,$t2,$i2,lsl#8
1197	and	$i2,lr,$s2		@ i1
1198	ldrb	$i1,[$tbl,$i1]		@ Td4[s2>>8]
1199	eor	$t3,$t3,$i3,lsl#8
1200	ldrb	$i2,[$tbl,$i2]		@ Td4[s2>>0]
1201	and	$i3,lr,$s2,lsr#16
1202
1203	add	$s2,$tbl,$s2,lsr#24
1204	ldrb	$s2,[$s2]		@ Td4[s2>>24]
1205	eor	$s0,$s0,$i1,lsl#8
1206	ldrb	$i3,[$tbl,$i3]		@ Td4[s2>>16]
1207	eor	$s1,$i2,$s1,lsl#16
1208	and	$i1,lr,$s3,lsr#16	@ i0
1209	eor	$s2,$t2,$s2,lsl#16
1210	and	$i2,lr,$s3,lsr#8	@ i1
1211	ldrb	$i1,[$tbl,$i1]		@ Td4[s3>>16]
1212	eor	$t3,$t3,$i3,lsl#16
1213	ldrb	$i2,[$tbl,$i2]		@ Td4[s3>>8]
1214	and	$i3,lr,$s3		@ i2
1215
1216	add	$s3,$tbl,$s3,lsr#24
1217	ldrb	$i3,[$tbl,$i3]		@ Td4[s3>>0]
1218	ldrb	$s3,[$s3]		@ Td4[s3>>24]
1219	eor	$s0,$s0,$i1,lsl#16
1220	ldr	$i1,[$key,#0]
1221	eor	$s1,$s1,$i2,lsl#8
1222	ldr	$t1,[$key,#4]
1223	eor	$s2,$i3,$s2,lsl#8
1224	ldr	$t2,[$key,#8]
1225	eor	$s3,$t3,$s3,lsl#24
1226	ldr	$t3,[$key,#12]
1227
1228	eor	$s0,$s0,$i1
1229	eor	$s1,$s1,$t1
1230	eor	$s2,$s2,$t2
1231	eor	$s3,$s3,$t3
1232
1233	sub	$tbl,$tbl,#1024
1234	ldr	pc,[sp],#4		@ pop and return
1235.size	_armv4_AES_decrypt,.-_armv4_AES_decrypt
1236.asciz	"AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
1237.align	2
1238___
1239
1240$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
1241$code =~ s/\bret\b/bx\tlr/gm;
1242
1243open SELF,$0;
1244while(<SELF>) {
1245	next if (/^#!/);
1246	last if (!s/^#/@/ and !/^$/);
1247	print;
1248}
1249close SELF;
1250
1251print $code;
1252close STDOUT or die "error closing STDOUT";	# enforce flush
1253