• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#include "s390x_arch.h"
2
3.text
4
5.type	AES_Te,@object
6.align	256
7AES_Te:
8.long	0xc66363a5,0xc66363a5
9.long	0xf87c7c84,0xf87c7c84
10.long	0xee777799,0xee777799
11.long	0xf67b7b8d,0xf67b7b8d
12.long	0xfff2f20d,0xfff2f20d
13.long	0xd66b6bbd,0xd66b6bbd
14.long	0xde6f6fb1,0xde6f6fb1
15.long	0x91c5c554,0x91c5c554
16.long	0x60303050,0x60303050
17.long	0x02010103,0x02010103
18.long	0xce6767a9,0xce6767a9
19.long	0x562b2b7d,0x562b2b7d
20.long	0xe7fefe19,0xe7fefe19
21.long	0xb5d7d762,0xb5d7d762
22.long	0x4dababe6,0x4dababe6
23.long	0xec76769a,0xec76769a
24.long	0x8fcaca45,0x8fcaca45
25.long	0x1f82829d,0x1f82829d
26.long	0x89c9c940,0x89c9c940
27.long	0xfa7d7d87,0xfa7d7d87
28.long	0xeffafa15,0xeffafa15
29.long	0xb25959eb,0xb25959eb
30.long	0x8e4747c9,0x8e4747c9
31.long	0xfbf0f00b,0xfbf0f00b
32.long	0x41adadec,0x41adadec
33.long	0xb3d4d467,0xb3d4d467
34.long	0x5fa2a2fd,0x5fa2a2fd
35.long	0x45afafea,0x45afafea
36.long	0x239c9cbf,0x239c9cbf
37.long	0x53a4a4f7,0x53a4a4f7
38.long	0xe4727296,0xe4727296
39.long	0x9bc0c05b,0x9bc0c05b
40.long	0x75b7b7c2,0x75b7b7c2
41.long	0xe1fdfd1c,0xe1fdfd1c
42.long	0x3d9393ae,0x3d9393ae
43.long	0x4c26266a,0x4c26266a
44.long	0x6c36365a,0x6c36365a
45.long	0x7e3f3f41,0x7e3f3f41
46.long	0xf5f7f702,0xf5f7f702
47.long	0x83cccc4f,0x83cccc4f
48.long	0x6834345c,0x6834345c
49.long	0x51a5a5f4,0x51a5a5f4
50.long	0xd1e5e534,0xd1e5e534
51.long	0xf9f1f108,0xf9f1f108
52.long	0xe2717193,0xe2717193
53.long	0xabd8d873,0xabd8d873
54.long	0x62313153,0x62313153
55.long	0x2a15153f,0x2a15153f
56.long	0x0804040c,0x0804040c
57.long	0x95c7c752,0x95c7c752
58.long	0x46232365,0x46232365
59.long	0x9dc3c35e,0x9dc3c35e
60.long	0x30181828,0x30181828
61.long	0x379696a1,0x379696a1
62.long	0x0a05050f,0x0a05050f
63.long	0x2f9a9ab5,0x2f9a9ab5
64.long	0x0e070709,0x0e070709
65.long	0x24121236,0x24121236
66.long	0x1b80809b,0x1b80809b
67.long	0xdfe2e23d,0xdfe2e23d
68.long	0xcdebeb26,0xcdebeb26
69.long	0x4e272769,0x4e272769
70.long	0x7fb2b2cd,0x7fb2b2cd
71.long	0xea75759f,0xea75759f
72.long	0x1209091b,0x1209091b
73.long	0x1d83839e,0x1d83839e
74.long	0x582c2c74,0x582c2c74
75.long	0x341a1a2e,0x341a1a2e
76.long	0x361b1b2d,0x361b1b2d
77.long	0xdc6e6eb2,0xdc6e6eb2
78.long	0xb45a5aee,0xb45a5aee
79.long	0x5ba0a0fb,0x5ba0a0fb
80.long	0xa45252f6,0xa45252f6
81.long	0x763b3b4d,0x763b3b4d
82.long	0xb7d6d661,0xb7d6d661
83.long	0x7db3b3ce,0x7db3b3ce
84.long	0x5229297b,0x5229297b
85.long	0xdde3e33e,0xdde3e33e
86.long	0x5e2f2f71,0x5e2f2f71
87.long	0x13848497,0x13848497
88.long	0xa65353f5,0xa65353f5
89.long	0xb9d1d168,0xb9d1d168
90.long	0x00000000,0x00000000
91.long	0xc1eded2c,0xc1eded2c
92.long	0x40202060,0x40202060
93.long	0xe3fcfc1f,0xe3fcfc1f
94.long	0x79b1b1c8,0x79b1b1c8
95.long	0xb65b5bed,0xb65b5bed
96.long	0xd46a6abe,0xd46a6abe
97.long	0x8dcbcb46,0x8dcbcb46
98.long	0x67bebed9,0x67bebed9
99.long	0x7239394b,0x7239394b
100.long	0x944a4ade,0x944a4ade
101.long	0x984c4cd4,0x984c4cd4
102.long	0xb05858e8,0xb05858e8
103.long	0x85cfcf4a,0x85cfcf4a
104.long	0xbbd0d06b,0xbbd0d06b
105.long	0xc5efef2a,0xc5efef2a
106.long	0x4faaaae5,0x4faaaae5
107.long	0xedfbfb16,0xedfbfb16
108.long	0x864343c5,0x864343c5
109.long	0x9a4d4dd7,0x9a4d4dd7
110.long	0x66333355,0x66333355
111.long	0x11858594,0x11858594
112.long	0x8a4545cf,0x8a4545cf
113.long	0xe9f9f910,0xe9f9f910
114.long	0x04020206,0x04020206
115.long	0xfe7f7f81,0xfe7f7f81
116.long	0xa05050f0,0xa05050f0
117.long	0x783c3c44,0x783c3c44
118.long	0x259f9fba,0x259f9fba
119.long	0x4ba8a8e3,0x4ba8a8e3
120.long	0xa25151f3,0xa25151f3
121.long	0x5da3a3fe,0x5da3a3fe
122.long	0x804040c0,0x804040c0
123.long	0x058f8f8a,0x058f8f8a
124.long	0x3f9292ad,0x3f9292ad
125.long	0x219d9dbc,0x219d9dbc
126.long	0x70383848,0x70383848
127.long	0xf1f5f504,0xf1f5f504
128.long	0x63bcbcdf,0x63bcbcdf
129.long	0x77b6b6c1,0x77b6b6c1
130.long	0xafdada75,0xafdada75
131.long	0x42212163,0x42212163
132.long	0x20101030,0x20101030
133.long	0xe5ffff1a,0xe5ffff1a
134.long	0xfdf3f30e,0xfdf3f30e
135.long	0xbfd2d26d,0xbfd2d26d
136.long	0x81cdcd4c,0x81cdcd4c
137.long	0x180c0c14,0x180c0c14
138.long	0x26131335,0x26131335
139.long	0xc3ecec2f,0xc3ecec2f
140.long	0xbe5f5fe1,0xbe5f5fe1
141.long	0x359797a2,0x359797a2
142.long	0x884444cc,0x884444cc
143.long	0x2e171739,0x2e171739
144.long	0x93c4c457,0x93c4c457
145.long	0x55a7a7f2,0x55a7a7f2
146.long	0xfc7e7e82,0xfc7e7e82
147.long	0x7a3d3d47,0x7a3d3d47
148.long	0xc86464ac,0xc86464ac
149.long	0xba5d5de7,0xba5d5de7
150.long	0x3219192b,0x3219192b
151.long	0xe6737395,0xe6737395
152.long	0xc06060a0,0xc06060a0
153.long	0x19818198,0x19818198
154.long	0x9e4f4fd1,0x9e4f4fd1
155.long	0xa3dcdc7f,0xa3dcdc7f
156.long	0x44222266,0x44222266
157.long	0x542a2a7e,0x542a2a7e
158.long	0x3b9090ab,0x3b9090ab
159.long	0x0b888883,0x0b888883
160.long	0x8c4646ca,0x8c4646ca
161.long	0xc7eeee29,0xc7eeee29
162.long	0x6bb8b8d3,0x6bb8b8d3
163.long	0x2814143c,0x2814143c
164.long	0xa7dede79,0xa7dede79
165.long	0xbc5e5ee2,0xbc5e5ee2
166.long	0x160b0b1d,0x160b0b1d
167.long	0xaddbdb76,0xaddbdb76
168.long	0xdbe0e03b,0xdbe0e03b
169.long	0x64323256,0x64323256
170.long	0x743a3a4e,0x743a3a4e
171.long	0x140a0a1e,0x140a0a1e
172.long	0x924949db,0x924949db
173.long	0x0c06060a,0x0c06060a
174.long	0x4824246c,0x4824246c
175.long	0xb85c5ce4,0xb85c5ce4
176.long	0x9fc2c25d,0x9fc2c25d
177.long	0xbdd3d36e,0xbdd3d36e
178.long	0x43acacef,0x43acacef
179.long	0xc46262a6,0xc46262a6
180.long	0x399191a8,0x399191a8
181.long	0x319595a4,0x319595a4
182.long	0xd3e4e437,0xd3e4e437
183.long	0xf279798b,0xf279798b
184.long	0xd5e7e732,0xd5e7e732
185.long	0x8bc8c843,0x8bc8c843
186.long	0x6e373759,0x6e373759
187.long	0xda6d6db7,0xda6d6db7
188.long	0x018d8d8c,0x018d8d8c
189.long	0xb1d5d564,0xb1d5d564
190.long	0x9c4e4ed2,0x9c4e4ed2
191.long	0x49a9a9e0,0x49a9a9e0
192.long	0xd86c6cb4,0xd86c6cb4
193.long	0xac5656fa,0xac5656fa
194.long	0xf3f4f407,0xf3f4f407
195.long	0xcfeaea25,0xcfeaea25
196.long	0xca6565af,0xca6565af
197.long	0xf47a7a8e,0xf47a7a8e
198.long	0x47aeaee9,0x47aeaee9
199.long	0x10080818,0x10080818
200.long	0x6fbabad5,0x6fbabad5
201.long	0xf0787888,0xf0787888
202.long	0x4a25256f,0x4a25256f
203.long	0x5c2e2e72,0x5c2e2e72
204.long	0x381c1c24,0x381c1c24
205.long	0x57a6a6f1,0x57a6a6f1
206.long	0x73b4b4c7,0x73b4b4c7
207.long	0x97c6c651,0x97c6c651
208.long	0xcbe8e823,0xcbe8e823
209.long	0xa1dddd7c,0xa1dddd7c
210.long	0xe874749c,0xe874749c
211.long	0x3e1f1f21,0x3e1f1f21
212.long	0x964b4bdd,0x964b4bdd
213.long	0x61bdbddc,0x61bdbddc
214.long	0x0d8b8b86,0x0d8b8b86
215.long	0x0f8a8a85,0x0f8a8a85
216.long	0xe0707090,0xe0707090
217.long	0x7c3e3e42,0x7c3e3e42
218.long	0x71b5b5c4,0x71b5b5c4
219.long	0xcc6666aa,0xcc6666aa
220.long	0x904848d8,0x904848d8
221.long	0x06030305,0x06030305
222.long	0xf7f6f601,0xf7f6f601
223.long	0x1c0e0e12,0x1c0e0e12
224.long	0xc26161a3,0xc26161a3
225.long	0x6a35355f,0x6a35355f
226.long	0xae5757f9,0xae5757f9
227.long	0x69b9b9d0,0x69b9b9d0
228.long	0x17868691,0x17868691
229.long	0x99c1c158,0x99c1c158
230.long	0x3a1d1d27,0x3a1d1d27
231.long	0x279e9eb9,0x279e9eb9
232.long	0xd9e1e138,0xd9e1e138
233.long	0xebf8f813,0xebf8f813
234.long	0x2b9898b3,0x2b9898b3
235.long	0x22111133,0x22111133
236.long	0xd26969bb,0xd26969bb
237.long	0xa9d9d970,0xa9d9d970
238.long	0x078e8e89,0x078e8e89
239.long	0x339494a7,0x339494a7
240.long	0x2d9b9bb6,0x2d9b9bb6
241.long	0x3c1e1e22,0x3c1e1e22
242.long	0x15878792,0x15878792
243.long	0xc9e9e920,0xc9e9e920
244.long	0x87cece49,0x87cece49
245.long	0xaa5555ff,0xaa5555ff
246.long	0x50282878,0x50282878
247.long	0xa5dfdf7a,0xa5dfdf7a
248.long	0x038c8c8f,0x038c8c8f
249.long	0x59a1a1f8,0x59a1a1f8
250.long	0x09898980,0x09898980
251.long	0x1a0d0d17,0x1a0d0d17
252.long	0x65bfbfda,0x65bfbfda
253.long	0xd7e6e631,0xd7e6e631
254.long	0x844242c6,0x844242c6
255.long	0xd06868b8,0xd06868b8
256.long	0x824141c3,0x824141c3
257.long	0x299999b0,0x299999b0
258.long	0x5a2d2d77,0x5a2d2d77
259.long	0x1e0f0f11,0x1e0f0f11
260.long	0x7bb0b0cb,0x7bb0b0cb
261.long	0xa85454fc,0xa85454fc
262.long	0x6dbbbbd6,0x6dbbbbd6
263.long	0x2c16163a,0x2c16163a
264# Te4[256]
265.byte	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
266.byte	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
267.byte	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
268.byte	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
269.byte	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
270.byte	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
271.byte	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
272.byte	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
273.byte	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
274.byte	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
275.byte	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
276.byte	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
277.byte	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
278.byte	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
279.byte	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
280.byte	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
281.byte	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
282.byte	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
283.byte	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
284.byte	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
285.byte	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
286.byte	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
287.byte	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
288.byte	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
289.byte	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
290.byte	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
291.byte	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
292.byte	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
293.byte	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
294.byte	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
295.byte	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
296.byte	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
297# rcon[]
298.long	0x01000000, 0x02000000, 0x04000000, 0x08000000
299.long	0x10000000, 0x20000000, 0x40000000, 0x80000000
300.long	0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
301.align	256
302.size	AES_Te,.-AES_Te
303
304# void AES_encrypt(const unsigned char *inp, unsigned char *out,
305# 		 const AES_KEY *key) {
306.globl	AES_encrypt
307.type	AES_encrypt,@function
308AES_encrypt:
309	l	%r0,240(%r4)
310	lhi	%r1,16
311	clr	%r0,%r1
312	jl	.Lesoft
313
314	la	%r1,0(%r4)
315	#la	%r2,0(%r2)
316	la	%r4,0(%r3)
317	lghi	%r3,16		# single block length
318	.long	0xb92e0042	# km %r4,%r2
319	brc	1,.-4		# can this happen?
320	br	%r14
321.align	64
322.Lesoft:
323	stmg	%r3,%r14,3*8(%r15)
324
325	llgf	%r8,0(%r2)
326	llgf	%r9,4(%r2)
327	llgf	%r10,8(%r2)
328	llgf	%r11,12(%r2)
329
330	larl	%r12,AES_Te
331	bras	%r14,_s390x_AES_encrypt
332
333	lg	%r3,3*8(%r15)
334	st	%r8,0(%r3)
335	st	%r9,4(%r3)
336	st	%r10,8(%r3)
337	st	%r11,12(%r3)
338
339	lmg	%r6,%r14,6*8(%r15)
340	br	%r14
341.size	AES_encrypt,.-AES_encrypt
342
343.type   _s390x_AES_encrypt,@function
344.align	16
345_s390x_AES_encrypt:
346	stg	%r14,15*8(%r15)
347	x	%r8,0(%r4)
348	x	%r9,4(%r4)
349	x	%r10,8(%r4)
350	x	%r11,12(%r4)
351	l	%r13,240(%r4)
352	llill	%r0,2040
353	aghi	%r13,-1
354	j	.Lenc_loop
355.align	16
356.Lenc_loop:
357	sllg	%r1,%r8,3
358	srlg	%r2,%r8,5
359	srlg	%r3,%r8,13
360	srl	%r8,21
361	nr	%r8,%r0
362	ngr	%r1,%r0
363	nr	%r2,%r0
364	nr	%r3,%r0
365
366	srlg	%r5,%r9,13	# i0
367	sllg	%r6,%r9,3
368	srlg	%r7,%r9,5
369	srl	%r9,21
370	nr	%r5,%r0
371	nr	%r9,%r0
372	ngr	%r6,%r0
373	nr	%r7,%r0
374
375	l	%r8,0(%r8,%r12)	# Te0[s0>>24]
376	l	%r1,1(%r1,%r12)	# Te3[s0>>0]
377	l	%r2,2(%r2,%r12) # Te2[s0>>8]
378	l	%r3,3(%r3,%r12)	# Te1[s0>>16]
379
380	x	%r8,3(%r5,%r12)	# Te1[s1>>16]
381	l	%r9,0(%r9,%r12)	# Te0[s1>>24]
382	x	%r2,1(%r6,%r12)	# Te3[s1>>0]
383	x	%r3,2(%r7,%r12)	# Te2[s1>>8]
384
385	srlg	%r5,%r10,5	# i0
386	srlg	%r6,%r10,13	# i1
387	nr	%r5,%r0
388	nr	%r6,%r0
389	sllg	%r7,%r10,3
390	srl	%r10,21
391	nr	%r10,%r0
392	ngr	%r7,%r0
393
394	xr	%r9,%r1
395	srlg	%r14,%r11,5	# i1
396	sllg	%r1,%r11,3	# i0
397	nr	%r14,%r0
398	la	%r4,16(%r4)
399	ngr	%r1,%r0
400
401	x	%r8,2(%r5,%r12)	# Te2[s2>>8]
402	x	%r9,3(%r6,%r12)	# Te1[s2>>16]
403	l	%r10,0(%r10,%r12)	# Te0[s2>>24]
404	x	%r3,1(%r7,%r12)	# Te3[s2>>0]
405
406	srlg	%r7,%r11,13	# i2
407	xr	%r10,%r2
408	srl	%r11,21
409	nr	%r7,%r0
410	nr	%r11,%r0
411
412	x	%r8,0(%r4)
413	x	%r9,4(%r4)
414	x	%r10,8(%r4)
415	x	%r3,12(%r4)
416
417	x	%r8,1(%r1,%r12)	# Te3[s3>>0]
418	x	%r9,2(%r14,%r12)	# Te2[s3>>8]
419	x	%r10,3(%r7,%r12)	# Te1[s3>>16]
420	l	%r11,0(%r11,%r12)	# Te0[s3>>24]
421	xr	%r11,%r3
422
423	brct	%r13,.Lenc_loop
424	.align	16
425
426	sllg	%r1,%r8,3
427	srlg	%r2,%r8,5
428	ngr	%r1,%r0
429	srlg	%r3,%r8,13
430	srl	%r8,21
431	nr	%r8,%r0
432	nr	%r2,%r0
433	nr	%r3,%r0
434
435	srlg	%r5,%r9,13	# i0
436	sllg	%r6,%r9,3
437	ngr	%r6,%r0
438	srlg	%r7,%r9,5
439	srl	%r9,21
440	nr	%r5,%r0
441	nr	%r9,%r0
442	nr	%r7,%r0
443
444	llgc	%r8,2(%r8,%r12)	# Te4[s0>>24]
445	llgc	%r1,2(%r1,%r12)	# Te4[s0>>0]
446	sll	%r8,24
447	llgc	%r2,2(%r2,%r12)	# Te4[s0>>8]
448	llgc	%r3,2(%r3,%r12)	# Te4[s0>>16]
449	sll	%r2,8
450	sll	%r3,16
451
452	llgc	%r5,2(%r5,%r12)	# Te4[s1>>16]
453	llgc	%r9,2(%r9,%r12)	# Te4[s1>>24]
454	llgc	%r6,2(%r6,%r12)	# Te4[s1>>0]
455	llgc	%r7,2(%r7,%r12)	# Te4[s1>>8]
456	sll	%r5,16
457	sll	%r9,24
458	sll	%r7,8
459	or	%r8,%r5
460	or	%r9,%r1
461	or	%r2,%r6
462	or	%r3,%r7
463
464	srlg	%r5,%r10,5	# i0
465	srlg	%r6,%r10,13	# i1
466	nr	%r5,%r0
467	nr	%r6,%r0
468	sllg	%r7,%r10,3
469	srl	%r10,21
470	ngr	%r7,%r0
471	nr	%r10,%r0
472
473	sllg	%r1,%r11,3	# i0
474	srlg	%r14,%r11,5	# i1
475	ngr	%r1,%r0
476
477	llgc	%r5,2(%r5,%r12)	# Te4[s2>>8]
478	llgc	%r6,2(%r6,%r12)	# Te4[s2>>16]
479	sll	%r5,8
480	llgc	%r10,2(%r10,%r12)	# Te4[s2>>24]
481	llgc	%r7,2(%r7,%r12)	# Te4[s2>>0]
482	sll	%r6,16
483	nr	%r14,%r0
484	sll	%r10,24
485	or	%r8,%r5
486	or	%r9,%r6
487	or	%r10,%r2
488	or	%r3,%r7
489
490	srlg	%r7,%r11,13	# i2
491	srl	%r11,21
492	nr	%r7,%r0
493	nr	%r11,%r0
494
495	l	%r0,16(%r4)
496	l	%r2,20(%r4)
497
498	llgc	%r5,2(%r1,%r12)	# Te4[s3>>0]
499	llgc	%r6,2(%r14,%r12)	# Te4[s3>>8]
500	llgc	%r7,2(%r7,%r12)	# Te4[s3>>16]
501	llgc	%r11,2(%r11,%r12)	# Te4[s3>>24]
502	sll	%r6,8
503	sll	%r7,16
504	sll	%r11,24
505	or	%r8,%r5
506	or	%r9,%r6
507	or	%r10,%r7
508	or	%r11,%r3
509
510	lg	%r14,15*8(%r15)
511	xr	%r8,%r0
512	xr	%r9,%r2
513	x	%r10,24(%r4)
514	x	%r11,28(%r4)
515
516	br	%r14
517.size	_s390x_AES_encrypt,.-_s390x_AES_encrypt
518.type	AES_Td,@object
519.align	256
520AES_Td:
521.long	0x51f4a750,0x51f4a750
522.long	0x7e416553,0x7e416553
523.long	0x1a17a4c3,0x1a17a4c3
524.long	0x3a275e96,0x3a275e96
525.long	0x3bab6bcb,0x3bab6bcb
526.long	0x1f9d45f1,0x1f9d45f1
527.long	0xacfa58ab,0xacfa58ab
528.long	0x4be30393,0x4be30393
529.long	0x2030fa55,0x2030fa55
530.long	0xad766df6,0xad766df6
531.long	0x88cc7691,0x88cc7691
532.long	0xf5024c25,0xf5024c25
533.long	0x4fe5d7fc,0x4fe5d7fc
534.long	0xc52acbd7,0xc52acbd7
535.long	0x26354480,0x26354480
536.long	0xb562a38f,0xb562a38f
537.long	0xdeb15a49,0xdeb15a49
538.long	0x25ba1b67,0x25ba1b67
539.long	0x45ea0e98,0x45ea0e98
540.long	0x5dfec0e1,0x5dfec0e1
541.long	0xc32f7502,0xc32f7502
542.long	0x814cf012,0x814cf012
543.long	0x8d4697a3,0x8d4697a3
544.long	0x6bd3f9c6,0x6bd3f9c6
545.long	0x038f5fe7,0x038f5fe7
546.long	0x15929c95,0x15929c95
547.long	0xbf6d7aeb,0xbf6d7aeb
548.long	0x955259da,0x955259da
549.long	0xd4be832d,0xd4be832d
550.long	0x587421d3,0x587421d3
551.long	0x49e06929,0x49e06929
552.long	0x8ec9c844,0x8ec9c844
553.long	0x75c2896a,0x75c2896a
554.long	0xf48e7978,0xf48e7978
555.long	0x99583e6b,0x99583e6b
556.long	0x27b971dd,0x27b971dd
557.long	0xbee14fb6,0xbee14fb6
558.long	0xf088ad17,0xf088ad17
559.long	0xc920ac66,0xc920ac66
560.long	0x7dce3ab4,0x7dce3ab4
561.long	0x63df4a18,0x63df4a18
562.long	0xe51a3182,0xe51a3182
563.long	0x97513360,0x97513360
564.long	0x62537f45,0x62537f45
565.long	0xb16477e0,0xb16477e0
566.long	0xbb6bae84,0xbb6bae84
567.long	0xfe81a01c,0xfe81a01c
568.long	0xf9082b94,0xf9082b94
569.long	0x70486858,0x70486858
570.long	0x8f45fd19,0x8f45fd19
571.long	0x94de6c87,0x94de6c87
572.long	0x527bf8b7,0x527bf8b7
573.long	0xab73d323,0xab73d323
574.long	0x724b02e2,0x724b02e2
575.long	0xe31f8f57,0xe31f8f57
576.long	0x6655ab2a,0x6655ab2a
577.long	0xb2eb2807,0xb2eb2807
578.long	0x2fb5c203,0x2fb5c203
579.long	0x86c57b9a,0x86c57b9a
580.long	0xd33708a5,0xd33708a5
581.long	0x302887f2,0x302887f2
582.long	0x23bfa5b2,0x23bfa5b2
583.long	0x02036aba,0x02036aba
584.long	0xed16825c,0xed16825c
585.long	0x8acf1c2b,0x8acf1c2b
586.long	0xa779b492,0xa779b492
587.long	0xf307f2f0,0xf307f2f0
588.long	0x4e69e2a1,0x4e69e2a1
589.long	0x65daf4cd,0x65daf4cd
590.long	0x0605bed5,0x0605bed5
591.long	0xd134621f,0xd134621f
592.long	0xc4a6fe8a,0xc4a6fe8a
593.long	0x342e539d,0x342e539d
594.long	0xa2f355a0,0xa2f355a0
595.long	0x058ae132,0x058ae132
596.long	0xa4f6eb75,0xa4f6eb75
597.long	0x0b83ec39,0x0b83ec39
598.long	0x4060efaa,0x4060efaa
599.long	0x5e719f06,0x5e719f06
600.long	0xbd6e1051,0xbd6e1051
601.long	0x3e218af9,0x3e218af9
602.long	0x96dd063d,0x96dd063d
603.long	0xdd3e05ae,0xdd3e05ae
604.long	0x4de6bd46,0x4de6bd46
605.long	0x91548db5,0x91548db5
606.long	0x71c45d05,0x71c45d05
607.long	0x0406d46f,0x0406d46f
608.long	0x605015ff,0x605015ff
609.long	0x1998fb24,0x1998fb24
610.long	0xd6bde997,0xd6bde997
611.long	0x894043cc,0x894043cc
612.long	0x67d99e77,0x67d99e77
613.long	0xb0e842bd,0xb0e842bd
614.long	0x07898b88,0x07898b88
615.long	0xe7195b38,0xe7195b38
616.long	0x79c8eedb,0x79c8eedb
617.long	0xa17c0a47,0xa17c0a47
618.long	0x7c420fe9,0x7c420fe9
619.long	0xf8841ec9,0xf8841ec9
620.long	0x00000000,0x00000000
621.long	0x09808683,0x09808683
622.long	0x322bed48,0x322bed48
623.long	0x1e1170ac,0x1e1170ac
624.long	0x6c5a724e,0x6c5a724e
625.long	0xfd0efffb,0xfd0efffb
626.long	0x0f853856,0x0f853856
627.long	0x3daed51e,0x3daed51e
628.long	0x362d3927,0x362d3927
629.long	0x0a0fd964,0x0a0fd964
630.long	0x685ca621,0x685ca621
631.long	0x9b5b54d1,0x9b5b54d1
632.long	0x24362e3a,0x24362e3a
633.long	0x0c0a67b1,0x0c0a67b1
634.long	0x9357e70f,0x9357e70f
635.long	0xb4ee96d2,0xb4ee96d2
636.long	0x1b9b919e,0x1b9b919e
637.long	0x80c0c54f,0x80c0c54f
638.long	0x61dc20a2,0x61dc20a2
639.long	0x5a774b69,0x5a774b69
640.long	0x1c121a16,0x1c121a16
641.long	0xe293ba0a,0xe293ba0a
642.long	0xc0a02ae5,0xc0a02ae5
643.long	0x3c22e043,0x3c22e043
644.long	0x121b171d,0x121b171d
645.long	0x0e090d0b,0x0e090d0b
646.long	0xf28bc7ad,0xf28bc7ad
647.long	0x2db6a8b9,0x2db6a8b9
648.long	0x141ea9c8,0x141ea9c8
649.long	0x57f11985,0x57f11985
650.long	0xaf75074c,0xaf75074c
651.long	0xee99ddbb,0xee99ddbb
652.long	0xa37f60fd,0xa37f60fd
653.long	0xf701269f,0xf701269f
654.long	0x5c72f5bc,0x5c72f5bc
655.long	0x44663bc5,0x44663bc5
656.long	0x5bfb7e34,0x5bfb7e34
657.long	0x8b432976,0x8b432976
658.long	0xcb23c6dc,0xcb23c6dc
659.long	0xb6edfc68,0xb6edfc68
660.long	0xb8e4f163,0xb8e4f163
661.long	0xd731dcca,0xd731dcca
662.long	0x42638510,0x42638510
663.long	0x13972240,0x13972240
664.long	0x84c61120,0x84c61120
665.long	0x854a247d,0x854a247d
666.long	0xd2bb3df8,0xd2bb3df8
667.long	0xaef93211,0xaef93211
668.long	0xc729a16d,0xc729a16d
669.long	0x1d9e2f4b,0x1d9e2f4b
670.long	0xdcb230f3,0xdcb230f3
671.long	0x0d8652ec,0x0d8652ec
672.long	0x77c1e3d0,0x77c1e3d0
673.long	0x2bb3166c,0x2bb3166c
674.long	0xa970b999,0xa970b999
675.long	0x119448fa,0x119448fa
676.long	0x47e96422,0x47e96422
677.long	0xa8fc8cc4,0xa8fc8cc4
678.long	0xa0f03f1a,0xa0f03f1a
679.long	0x567d2cd8,0x567d2cd8
680.long	0x223390ef,0x223390ef
681.long	0x87494ec7,0x87494ec7
682.long	0xd938d1c1,0xd938d1c1
683.long	0x8ccaa2fe,0x8ccaa2fe
684.long	0x98d40b36,0x98d40b36
685.long	0xa6f581cf,0xa6f581cf
686.long	0xa57ade28,0xa57ade28
687.long	0xdab78e26,0xdab78e26
688.long	0x3fadbfa4,0x3fadbfa4
689.long	0x2c3a9de4,0x2c3a9de4
690.long	0x5078920d,0x5078920d
691.long	0x6a5fcc9b,0x6a5fcc9b
692.long	0x547e4662,0x547e4662
693.long	0xf68d13c2,0xf68d13c2
694.long	0x90d8b8e8,0x90d8b8e8
695.long	0x2e39f75e,0x2e39f75e
696.long	0x82c3aff5,0x82c3aff5
697.long	0x9f5d80be,0x9f5d80be
698.long	0x69d0937c,0x69d0937c
699.long	0x6fd52da9,0x6fd52da9
700.long	0xcf2512b3,0xcf2512b3
701.long	0xc8ac993b,0xc8ac993b
702.long	0x10187da7,0x10187da7
703.long	0xe89c636e,0xe89c636e
704.long	0xdb3bbb7b,0xdb3bbb7b
705.long	0xcd267809,0xcd267809
706.long	0x6e5918f4,0x6e5918f4
707.long	0xec9ab701,0xec9ab701
708.long	0x834f9aa8,0x834f9aa8
709.long	0xe6956e65,0xe6956e65
710.long	0xaaffe67e,0xaaffe67e
711.long	0x21bccf08,0x21bccf08
712.long	0xef15e8e6,0xef15e8e6
713.long	0xbae79bd9,0xbae79bd9
714.long	0x4a6f36ce,0x4a6f36ce
715.long	0xea9f09d4,0xea9f09d4
716.long	0x29b07cd6,0x29b07cd6
717.long	0x31a4b2af,0x31a4b2af
718.long	0x2a3f2331,0x2a3f2331
719.long	0xc6a59430,0xc6a59430
720.long	0x35a266c0,0x35a266c0
721.long	0x744ebc37,0x744ebc37
722.long	0xfc82caa6,0xfc82caa6
723.long	0xe090d0b0,0xe090d0b0
724.long	0x33a7d815,0x33a7d815
725.long	0xf104984a,0xf104984a
726.long	0x41ecdaf7,0x41ecdaf7
727.long	0x7fcd500e,0x7fcd500e
728.long	0x1791f62f,0x1791f62f
729.long	0x764dd68d,0x764dd68d
730.long	0x43efb04d,0x43efb04d
731.long	0xccaa4d54,0xccaa4d54
732.long	0xe49604df,0xe49604df
733.long	0x9ed1b5e3,0x9ed1b5e3
734.long	0x4c6a881b,0x4c6a881b
735.long	0xc12c1fb8,0xc12c1fb8
736.long	0x4665517f,0x4665517f
737.long	0x9d5eea04,0x9d5eea04
738.long	0x018c355d,0x018c355d
739.long	0xfa877473,0xfa877473
740.long	0xfb0b412e,0xfb0b412e
741.long	0xb3671d5a,0xb3671d5a
742.long	0x92dbd252,0x92dbd252
743.long	0xe9105633,0xe9105633
744.long	0x6dd64713,0x6dd64713
745.long	0x9ad7618c,0x9ad7618c
746.long	0x37a10c7a,0x37a10c7a
747.long	0x59f8148e,0x59f8148e
748.long	0xeb133c89,0xeb133c89
749.long	0xcea927ee,0xcea927ee
750.long	0xb761c935,0xb761c935
751.long	0xe11ce5ed,0xe11ce5ed
752.long	0x7a47b13c,0x7a47b13c
753.long	0x9cd2df59,0x9cd2df59
754.long	0x55f2733f,0x55f2733f
755.long	0x1814ce79,0x1814ce79
756.long	0x73c737bf,0x73c737bf
757.long	0x53f7cdea,0x53f7cdea
758.long	0x5ffdaa5b,0x5ffdaa5b
759.long	0xdf3d6f14,0xdf3d6f14
760.long	0x7844db86,0x7844db86
761.long	0xcaaff381,0xcaaff381
762.long	0xb968c43e,0xb968c43e
763.long	0x3824342c,0x3824342c
764.long	0xc2a3405f,0xc2a3405f
765.long	0x161dc372,0x161dc372
766.long	0xbce2250c,0xbce2250c
767.long	0x283c498b,0x283c498b
768.long	0xff0d9541,0xff0d9541
769.long	0x39a80171,0x39a80171
770.long	0x080cb3de,0x080cb3de
771.long	0xd8b4e49c,0xd8b4e49c
772.long	0x6456c190,0x6456c190
773.long	0x7bcb8461,0x7bcb8461
774.long	0xd532b670,0xd532b670
775.long	0x486c5c74,0x486c5c74
776.long	0xd0b85742,0xd0b85742
777# Td4[256]
778.byte	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
779.byte	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
780.byte	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
781.byte	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
782.byte	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
783.byte	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
784.byte	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
785.byte	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
786.byte	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
787.byte	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
788.byte	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
789.byte	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
790.byte	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
791.byte	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
792.byte	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
793.byte	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
794.byte	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
795.byte	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
796.byte	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
797.byte	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
798.byte	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
799.byte	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
800.byte	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
801.byte	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
802.byte	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
803.byte	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
804.byte	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
805.byte	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
806.byte	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
807.byte	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
808.byte	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
809.byte	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
810.size	AES_Td,.-AES_Td
811
812# void AES_decrypt(const unsigned char *inp, unsigned char *out,
813# 		 const AES_KEY *key) {
814.globl	AES_decrypt
815.type	AES_decrypt,@function
816AES_decrypt:
817	l	%r0,240(%r4)
818	lhi	%r1,16
819	clr	%r0,%r1
820	jl	.Ldsoft
821
822	la	%r1,0(%r4)
823	#la	%r2,0(%r2)
824	la	%r4,0(%r3)
825	lghi	%r3,16		# single block length
826	.long	0xb92e0042	# km %r4,%r2
827	brc	1,.-4		# can this happen?
828	br	%r14
829.align	64
830.Ldsoft:
831	stmg	%r3,%r14,3*8(%r15)
832
833	llgf	%r8,0(%r2)
834	llgf	%r9,4(%r2)
835	llgf	%r10,8(%r2)
836	llgf	%r11,12(%r2)
837
838	larl	%r12,AES_Td
839	bras	%r14,_s390x_AES_decrypt
840
841	lg	%r3,3*8(%r15)
842	st	%r8,0(%r3)
843	st	%r9,4(%r3)
844	st	%r10,8(%r3)
845	st	%r11,12(%r3)
846
847	lmg	%r6,%r14,6*8(%r15)
848	br	%r14
849.size	AES_decrypt,.-AES_decrypt
850
851.type   _s390x_AES_decrypt,@function
852.align	16
853_s390x_AES_decrypt:
854	stg	%r14,15*8(%r15)
855	x	%r8,0(%r4)
856	x	%r9,4(%r4)
857	x	%r10,8(%r4)
858	x	%r11,12(%r4)
859	l	%r13,240(%r4)
860	llill	%r0,2040
861	aghi	%r13,-1
862	j	.Ldec_loop
863.align	16
864.Ldec_loop:
865	srlg	%r1,%r8,13
866	srlg	%r2,%r8,5
867	sllg	%r3,%r8,3
868	srl	%r8,21
869	nr	%r8,%r0
870	nr	%r1,%r0
871	nr	%r2,%r0
872	ngr	%r3,%r0
873
874	sllg	%r5,%r9,3	# i0
875	srlg	%r6,%r9,13
876	srlg	%r7,%r9,5
877	srl	%r9,21
878	ngr	%r5,%r0
879	nr	%r9,%r0
880	nr	%r6,%r0
881	nr	%r7,%r0
882
883	l	%r8,0(%r8,%r12)	# Td0[s0>>24]
884	l	%r1,3(%r1,%r12)	# Td1[s0>>16]
885	l	%r2,2(%r2,%r12)	# Td2[s0>>8]
886	l	%r3,1(%r3,%r12)	# Td3[s0>>0]
887
888	x	%r8,1(%r5,%r12)	# Td3[s1>>0]
889	l	%r9,0(%r9,%r12)	# Td0[s1>>24]
890	x	%r2,3(%r6,%r12)	# Td1[s1>>16]
891	x	%r3,2(%r7,%r12)	# Td2[s1>>8]
892
893	srlg	%r5,%r10,5	# i0
894	sllg	%r6,%r10,3	# i1
895	srlg	%r7,%r10,13
896	srl	%r10,21
897	nr	%r5,%r0
898	ngr	%r6,%r0
899	nr	%r10,%r0
900	nr	%r7,%r0
901
902	xr	%r9,%r1
903	srlg	%r14,%r11,5	# i1
904	srlg	%r1,%r11,13	# i0
905	nr	%r14,%r0
906	la	%r4,16(%r4)
907	nr	%r1,%r0
908
909	x	%r8,2(%r5,%r12)	# Td2[s2>>8]
910	x	%r9,1(%r6,%r12)	# Td3[s2>>0]
911	l	%r10,0(%r10,%r12)	# Td0[s2>>24]
912	x	%r3,3(%r7,%r12)	# Td1[s2>>16]
913
914	sllg	%r7,%r11,3	# i2
915	srl	%r11,21
916	ngr	%r7,%r0
917	nr	%r11,%r0
918
919	xr	%r10,%r2
920	x	%r8,0(%r4)
921	x	%r9,4(%r4)
922	x	%r10,8(%r4)
923	x	%r3,12(%r4)
924
925	x	%r8,3(%r1,%r12)	# Td1[s3>>16]
926	x	%r9,2(%r14,%r12)	# Td2[s3>>8]
927	x	%r10,1(%r7,%r12)	# Td3[s3>>0]
928	l	%r11,0(%r11,%r12)	# Td0[s3>>24]
929	xr	%r11,%r3
930
931	brct	%r13,.Ldec_loop
932	.align	16
933
934	l	%r1,2048(%r12)	# prefetch Td4
935	l	%r2,2112(%r12)
936	l	%r3,2176(%r12)
937	l	%r5,2240(%r12)
938	llill	%r0,0xff
939
940	srlg	%r7,%r8,24	# i0
941	srlg	%r1,%r8,16
942	srlg	%r2,%r8,8
943	nr	%r8,%r0	# i3
944	nr	%r1,%r0
945
946	srlg	%r5,%r9,24
947	nr	%r2,%r0
948	srlg	%r6,%r9,16
949	srlg	%r14,%r9,8
950	nr	%r9,%r0	# i0
951	nr	%r6,%r0
952	nr	%r14,%r0
953
954	llgc	%r7,2048(%r7,%r12)	# Td4[s0>>24]
955	llgc	%r1,2048(%r1,%r12)	# Td4[s0>>16]
956	llgc	%r2,2048(%r2,%r12)	# Td4[s0>>8]
957	sll	%r1,16
958	llgc	%r3,2048(%r8,%r12)	# Td4[s0>>0]
959	sllg	%r8,%r7,24
960	sll	%r2,8
961
962	llgc	%r9,2048(%r9,%r12)	# Td4[s1>>0]
963	llgc	%r5,2048(%r5,%r12)	# Td4[s1>>24]
964	llgc	%r6,2048(%r6,%r12)	# Td4[s1>>16]
965	sll	%r5,24
966	llgc	%r7,2048(%r14,%r12)	# Td4[s1>>8]
967	sll	%r6,16
968	sll	%r7,8
969	or	%r8,%r9
970	or	%r1,%r5
971	or	%r2,%r6
972	or	%r3,%r7
973
974	srlg	%r5,%r10,8	# i0
975	srlg	%r6,%r10,24
976	srlg	%r7,%r10,16
977	nr	%r10,%r0	# i1
978	nr	%r5,%r0
979	nr	%r7,%r0
980	llgc	%r5,2048(%r5,%r12)	# Td4[s2>>8]
981	llgc	%r9,2048(%r10,%r12)	# Td4[s2>>0]
982	llgc	%r6,2048(%r6,%r12)	# Td4[s2>>24]
983	llgc	%r7,2048(%r7,%r12)	# Td4[s2>>16]
984	sll	%r5,8
985	sll	%r6,24
986	or	%r8,%r5
987	sll	%r7,16
988	or	%r2,%r6
989	or	%r3,%r7
990
991	srlg	%r5,%r11,16	# i0
992	srlg	%r6,%r11,8	# i1
993	srlg	%r7,%r11,24
994	nr	%r11,%r0	# i2
995	nr	%r5,%r0
996	nr	%r6,%r0
997
998	lg	%r14,15*8(%r15)
999	or	%r9,%r1
1000	l	%r0,16(%r4)
1001	l	%r1,20(%r4)
1002
1003	llgc	%r5,2048(%r5,%r12)	# Td4[s3>>16]
1004	llgc	%r6,2048(%r6,%r12)	# Td4[s3>>8]
1005	sll	%r5,16
1006	llgc	%r10,2048(%r11,%r12)	# Td4[s3>>0]
1007	llgc	%r11,2048(%r7,%r12)	# Td4[s3>>24]
1008	sll	%r6,8
1009	sll	%r11,24
1010	or	%r8,%r5
1011	or	%r9,%r6
1012	or	%r10,%r2
1013	or	%r11,%r3
1014
1015	xr	%r8,%r0
1016	xr	%r9,%r1
1017	x	%r10,24(%r4)
1018	x	%r11,28(%r4)
1019
1020	br	%r14
1021.size	_s390x_AES_decrypt,.-_s390x_AES_decrypt
1022# void AES_set_encrypt_key(const unsigned char *in, int bits,
1023# 		 AES_KEY *key) {
1024.globl	AES_set_encrypt_key
1025.type	AES_set_encrypt_key,@function
1026.align	16
1027AES_set_encrypt_key:
1028_s390x_AES_set_encrypt_key:
1029	lghi	%r0,0
1030	clgr	%r2,%r0
1031	je	.Lminus1
1032	clgr	%r4,%r0
1033	je	.Lminus1
1034
1035	lghi	%r0,128
1036	clr	%r3,%r0
1037	je	.Lproceed
1038	lghi	%r0,192
1039	clr	%r3,%r0
1040	je	.Lproceed
1041	lghi	%r0,256
1042	clr	%r3,%r0
1043	je	.Lproceed
1044	lghi	%r2,-2
1045	br	%r14
1046
1047.align	16
1048.Lproceed:
1049	# convert bits to km(c) code, [128,192,256]->[18,19,20]
1050	lhi	%r5,-128
1051	lhi	%r0,18
1052	ar	%r5,%r3
1053	srl	%r5,6
1054	ar	%r5,%r0
1055
1056	larl	%r1,OPENSSL_s390xcap_P
1057	llihh	%r0,0x8000
1058	srlg	%r0,%r0,0(%r5)
1059	ng	%r0,S390X_KM(%r1)  # check availability of both km...
1060	ng	%r0,S390X_KMC(%r1) # ...and kmc support for given key length
1061	jz	.Lekey_internal
1062
1063	lmg	%r0,%r1,0(%r2)	# just copy 128 bits...
1064	stmg	%r0,%r1,0(%r4)
1065	lhi	%r0,192
1066	cr	%r3,%r0
1067	jl	1f
1068	lg	%r1,16(%r2)
1069	stg	%r1,16(%r4)
1070	je	1f
1071	lg	%r1,24(%r2)
1072	stg	%r1,24(%r4)
10731:	st	%r3,236(%r4)	# save bits [for debugging purposes]
1074	lgr	%r0,%r5
1075	st	%r5,240(%r4)	# save km(c) code
1076	lghi	%r2,0
1077	br	%r14
1078.align	16
1079.Lekey_internal:
1080	stmg	%r4,%r13,4*8(%r15)	# all non-volatile regs and %r4
1081
1082	larl	%r12,AES_Te+2048
1083
1084	llgf	%r8,0(%r2)
1085	llgf	%r9,4(%r2)
1086	llgf	%r10,8(%r2)
1087	llgf	%r11,12(%r2)
1088	st	%r8,0(%r4)
1089	st	%r9,4(%r4)
1090	st	%r10,8(%r4)
1091	st	%r11,12(%r4)
1092	lghi	%r0,128
1093	cr	%r3,%r0
1094	jne	.Lnot128
1095
1096	llill	%r0,0xff
1097	lghi	%r3,0			# i=0
1098	lghi	%r13,10
1099	st	%r13,240(%r4)
1100
1101	llgfr	%r2,%r11			# temp=rk[3]
1102	srlg	%r5,%r11,8
1103	srlg	%r6,%r11,16
1104	srlg	%r7,%r11,24
1105	nr	%r2,%r0
1106	nr	%r5,%r0
1107	nr	%r6,%r0
1108
1109.align	16
1110.L128_loop:
1111	la	%r2,0(%r2,%r12)
1112	la	%r5,0(%r5,%r12)
1113	la	%r6,0(%r6,%r12)
1114	la	%r7,0(%r7,%r12)
1115	icm	%r2,2,0(%r2)		# Te4[rk[3]>>0]<<8
1116	icm	%r2,4,0(%r5)		# Te4[rk[3]>>8]<<16
1117	icm	%r2,8,0(%r6)		# Te4[rk[3]>>16]<<24
1118	icm	%r2,1,0(%r7)		# Te4[rk[3]>>24]
1119	x	%r2,256(%r3,%r12)	# rcon[i]
1120	xr	%r8,%r2			# rk[4]=rk[0]^...
1121	xr	%r9,%r8			# rk[5]=rk[1]^rk[4]
1122	xr	%r10,%r9			# rk[6]=rk[2]^rk[5]
1123	xr	%r11,%r10			# rk[7]=rk[3]^rk[6]
1124
1125	llgfr	%r2,%r11			# temp=rk[3]
1126	srlg	%r5,%r11,8
1127	srlg	%r6,%r11,16
1128	nr	%r2,%r0
1129	nr	%r5,%r0
1130	srlg	%r7,%r11,24
1131	nr	%r6,%r0
1132
1133	st	%r8,16(%r4)
1134	st	%r9,20(%r4)
1135	st	%r10,24(%r4)
1136	st	%r11,28(%r4)
1137	la	%r4,16(%r4)		# key+=4
1138	la	%r3,4(%r3)		# i++
1139	brct	%r13,.L128_loop
1140	lghi	%r0,10
1141	lghi	%r2,0
1142	lmg	%r4,%r13,4*8(%r15)
1143	br	%r14
1144
1145.align	16
1146.Lnot128:
1147	llgf	%r0,16(%r2)
1148	llgf	%r1,20(%r2)
1149	st	%r0,16(%r4)
1150	st	%r1,20(%r4)
1151	lghi	%r0,192
1152	cr	%r3,%r0
1153	jne	.Lnot192
1154
1155	llill	%r0,0xff
1156	lghi	%r3,0			# i=0
1157	lghi	%r13,12
1158	st	%r13,240(%r4)
1159	lghi	%r13,8
1160
1161	srlg	%r5,%r1,8
1162	srlg	%r6,%r1,16
1163	srlg	%r7,%r1,24
1164	nr	%r1,%r0
1165	nr	%r5,%r0
1166	nr	%r6,%r0
1167
1168.align	16
1169.L192_loop:
1170	la	%r1,0(%r1,%r12)
1171	la	%r5,0(%r5,%r12)
1172	la	%r6,0(%r6,%r12)
1173	la	%r7,0(%r7,%r12)
1174	icm	%r1,2,0(%r1)		# Te4[rk[5]>>0]<<8
1175	icm	%r1,4,0(%r5)		# Te4[rk[5]>>8]<<16
1176	icm	%r1,8,0(%r6)		# Te4[rk[5]>>16]<<24
1177	icm	%r1,1,0(%r7)		# Te4[rk[5]>>24]
1178	x	%r1,256(%r3,%r12)	# rcon[i]
1179	xr	%r8,%r1			# rk[6]=rk[0]^...
1180	xr	%r9,%r8			# rk[7]=rk[1]^rk[6]
1181	xr	%r10,%r9			# rk[8]=rk[2]^rk[7]
1182	xr	%r11,%r10			# rk[9]=rk[3]^rk[8]
1183
1184	st	%r8,24(%r4)
1185	st	%r9,28(%r4)
1186	st	%r10,32(%r4)
1187	st	%r11,36(%r4)
1188	brct	%r13,.L192_continue
1189	lghi	%r0,12
1190	lghi	%r2,0
1191	lmg	%r4,%r13,4*8(%r15)
1192	br	%r14
1193
1194.align	16
1195.L192_continue:
1196	lgr	%r1,%r11
1197	x	%r1,16(%r4)		# rk[10]=rk[4]^rk[9]
1198	st	%r1,40(%r4)
1199	x	%r1,20(%r4)		# rk[11]=rk[5]^rk[10]
1200	st	%r1,44(%r4)
1201
1202	srlg	%r5,%r1,8
1203	srlg	%r6,%r1,16
1204	srlg	%r7,%r1,24
1205	nr	%r1,%r0
1206	nr	%r5,%r0
1207	nr	%r6,%r0
1208
1209	la	%r4,24(%r4)		# key+=6
1210	la	%r3,4(%r3)		# i++
1211	j	.L192_loop
1212
1213.align	16
1214.Lnot192:
1215	llgf	%r0,24(%r2)
1216	llgf	%r1,28(%r2)
1217	st	%r0,24(%r4)
1218	st	%r1,28(%r4)
1219	llill	%r0,0xff
1220	lghi	%r3,0			# i=0
1221	lghi	%r13,14
1222	st	%r13,240(%r4)
1223	lghi	%r13,7
1224
1225	srlg	%r5,%r1,8
1226	srlg	%r6,%r1,16
1227	srlg	%r7,%r1,24
1228	nr	%r1,%r0
1229	nr	%r5,%r0
1230	nr	%r6,%r0
1231
1232.align	16
1233.L256_loop:
1234	la	%r1,0(%r1,%r12)
1235	la	%r5,0(%r5,%r12)
1236	la	%r6,0(%r6,%r12)
1237	la	%r7,0(%r7,%r12)
1238	icm	%r1,2,0(%r1)		# Te4[rk[7]>>0]<<8
1239	icm	%r1,4,0(%r5)		# Te4[rk[7]>>8]<<16
1240	icm	%r1,8,0(%r6)		# Te4[rk[7]>>16]<<24
1241	icm	%r1,1,0(%r7)		# Te4[rk[7]>>24]
1242	x	%r1,256(%r3,%r12)	# rcon[i]
1243	xr	%r8,%r1			# rk[8]=rk[0]^...
1244	xr	%r9,%r8			# rk[9]=rk[1]^rk[8]
1245	xr	%r10,%r9			# rk[10]=rk[2]^rk[9]
1246	xr	%r11,%r10			# rk[11]=rk[3]^rk[10]
1247	st	%r8,32(%r4)
1248	st	%r9,36(%r4)
1249	st	%r10,40(%r4)
1250	st	%r11,44(%r4)
1251	brct	%r13,.L256_continue
1252	lghi	%r0,14
1253	lghi	%r2,0
1254	lmg	%r4,%r13,4*8(%r15)
1255	br	%r14
1256
1257.align	16
1258.L256_continue:
1259	lgr	%r1,%r11			# temp=rk[11]
1260	srlg	%r5,%r11,8
1261	srlg	%r6,%r11,16
1262	srlg	%r7,%r11,24
1263	nr	%r1,%r0
1264	nr	%r5,%r0
1265	nr	%r6,%r0
1266	la	%r1,0(%r1,%r12)
1267	la	%r5,0(%r5,%r12)
1268	la	%r6,0(%r6,%r12)
1269	la	%r7,0(%r7,%r12)
1270	llgc	%r1,0(%r1)		# Te4[rk[11]>>0]
1271	icm	%r1,2,0(%r5)		# Te4[rk[11]>>8]<<8
1272	icm	%r1,4,0(%r6)		# Te4[rk[11]>>16]<<16
1273	icm	%r1,8,0(%r7)		# Te4[rk[11]>>24]<<24
1274	x	%r1,16(%r4)		# rk[12]=rk[4]^...
1275	st	%r1,48(%r4)
1276	x	%r1,20(%r4)		# rk[13]=rk[5]^rk[12]
1277	st	%r1,52(%r4)
1278	x	%r1,24(%r4)		# rk[14]=rk[6]^rk[13]
1279	st	%r1,56(%r4)
1280	x	%r1,28(%r4)		# rk[15]=rk[7]^rk[14]
1281	st	%r1,60(%r4)
1282
1283	srlg	%r5,%r1,8
1284	srlg	%r6,%r1,16
1285	srlg	%r7,%r1,24
1286	nr	%r1,%r0
1287	nr	%r5,%r0
1288	nr	%r6,%r0
1289
1290	la	%r4,32(%r4)		# key+=8
1291	la	%r3,4(%r3)		# i++
1292	j	.L256_loop
1293
1294.Lminus1:
1295	lghi	%r2,-1
1296	br	%r14
1297.size	AES_set_encrypt_key,.-AES_set_encrypt_key
1298
1299# void AES_set_decrypt_key(const unsigned char *in, int bits,
1300# 		 AES_KEY *key) {
1301.globl	AES_set_decrypt_key
1302.type	AES_set_decrypt_key,@function
1303.align	16
1304AES_set_decrypt_key:
1305	#stg	%r4,4*8(%r15)	# I rely on AES_set_encrypt_key to
1306	stg	%r14,14*8(%r15)	# save non-volatile registers and %r4!
1307	bras	%r14,_s390x_AES_set_encrypt_key
1308	#lg	%r4,4*8(%r15)
1309	lg	%r14,14*8(%r15)
1310	ltgr	%r2,%r2
1311	bnzr	%r14
1312	#l	%r0,240(%r4)
1313	lhi	%r1,16
1314	cr	%r0,%r1
1315	jl	.Lgo
1316	oill	%r0,S390X_DECRYPT	# set "decrypt" bit
1317	st	%r0,240(%r4)
1318	br	%r14
1319.align	16
1320.Lgo:	lgr	%r13,%r0	#llgf	%r13,240(%r4)
1321	la	%r5,0(%r4)
1322	sllg	%r6,%r13,4
1323	la	%r6,0(%r6,%r4)
1324	srl	%r13,1
1325	lghi	%r1,-16
1326
1327.align	16
1328.Linv:	lmg	%r8,%r9,0(%r5)
1329	lmg	%r10,%r11,0(%r6)
1330	stmg	%r8,%r9,0(%r6)
1331	stmg	%r10,%r11,0(%r5)
1332	la	%r5,16(%r5)
1333	la	%r6,0(%r1,%r6)
1334	brct	%r13,.Linv
1335	llgf	%r13,240(%r4)
1336	aghi	%r13,-1
1337	sll	%r13,2	# (rounds-1)*4
1338	llilh	%r5,0x8080
1339	llilh	%r6,0x1b1b
1340	llilh	%r7,0xfefe
1341	oill	%r5,0x8080
1342	oill	%r6,0x1b1b
1343	oill	%r7,0xfefe
1344
1345.align	16
1346.Lmix:	l	%r8,16(%r4)	# tp1
1347	lr	%r9,%r8
1348	ngr	%r9,%r5
1349	srlg	%r1,%r9,7
1350	slr	%r9,%r1
1351	nr	%r9,%r6
1352	sllg	%r1,%r8,1
1353	nr	%r1,%r7
1354	xr	%r9,%r1		# tp2
1355
1356	lr	%r10,%r9
1357	ngr	%r10,%r5
1358	srlg	%r1,%r10,7
1359	slr	%r10,%r1
1360	nr	%r10,%r6
1361	sllg	%r1,%r9,1
1362	nr	%r1,%r7
1363	xr	%r10,%r1		# tp4
1364
1365	lr	%r11,%r10
1366	ngr	%r11,%r5
1367	srlg	%r1,%r11,7
1368	slr	%r11,%r1
1369	nr	%r11,%r6
1370	sllg	%r1,%r10,1
1371	nr	%r1,%r7
1372	xr	%r11,%r1		# tp8
1373
1374	xr	%r9,%r8		# tp2^tp1
1375	xr	%r10,%r8		# tp4^tp1
1376	rll	%r8,%r8,24	# = ROTATE(tp1,8)
1377	xr	%r10,%r11		# ^=tp8
1378	xr	%r8,%r9		# ^=tp2^tp1
1379	xr	%r9,%r11		# tp2^tp1^tp8
1380	xr	%r8,%r10		# ^=tp4^tp1^tp8
1381	rll	%r9,%r9,8
1382	rll	%r10,%r10,16
1383	xr	%r8,%r9		# ^= ROTATE(tp8^tp2^tp1,24)
1384	rll	%r11,%r11,24
1385	xr	%r8,%r10    	# ^= ROTATE(tp8^tp4^tp1,16)
1386	xr	%r8,%r11		# ^= ROTATE(tp8,8)
1387
1388	st	%r8,16(%r4)
1389	la	%r4,4(%r4)
1390	brct	%r13,.Lmix
1391
1392	lmg	%r6,%r13,6*8(%r15)# as was saved by AES_set_encrypt_key!
1393	lghi	%r2,0
1394	br	%r14
1395.size	AES_set_decrypt_key,.-AES_set_decrypt_key
1396.globl	AES_cbc_encrypt
1397.type	AES_cbc_encrypt,@function
1398.align	16
1399AES_cbc_encrypt:
1400	xgr	%r3,%r4		# flip %r3 and %r4, out and len
1401	xgr	%r4,%r3
1402	xgr	%r3,%r4
1403	lhi	%r0,16
1404	cl	%r0,240(%r5)
1405	jh	.Lcbc_software
1406
1407	lg	%r0,0(%r6)	# copy ivec
1408	lg	%r1,8(%r6)
1409	stmg	%r0,%r1,16(%r15)
1410	lmg	%r0,%r1,0(%r5)	# copy key, cover 256 bit
1411	stmg	%r0,%r1,32(%r15)
1412	lmg	%r0,%r1,16(%r5)
1413	stmg	%r0,%r1,48(%r15)
1414	l	%r0,240(%r5)	# load kmc code
1415	lghi	%r5,15		# res=len%16, len-=res;
1416	ngr	%r5,%r3
1417	slgr	%r3,%r5
1418	la	%r1,16(%r15)	# parameter block - ivec || key
1419	jz	.Lkmc_truncated
1420	.long	0xb92f0042	# kmc %r4,%r2
1421	brc	1,.-4		# pay attention to "partial completion"
1422	ltr	%r5,%r5
1423	jnz	.Lkmc_truncated
1424.Lkmc_done:
1425	lmg	%r0,%r1,16(%r15)	# copy ivec to caller
1426	stg	%r0,0(%r6)
1427	stg	%r1,8(%r6)
1428	br	%r14
1429.align	16
1430.Lkmc_truncated:
1431	ahi	%r5,-1		# it's the way it's encoded in mvc
1432	tmll	%r0,S390X_DECRYPT
1433	jnz	.Lkmc_truncated_dec
1434	lghi	%r1,0
1435	stg	%r1,16*8(%r15)
1436	stg	%r1,16*8+8(%r15)
1437	bras	%r1,1f
1438	mvc	16*8(1,%r15),0(%r2)
14391:	ex	%r5,0(%r1)
1440	la	%r1,16(%r15)	# restore parameter block
1441	la	%r2,16*8(%r15)
1442	lghi	%r3,16
1443	.long	0xb92f0042	# kmc %r4,%r2
1444	j	.Lkmc_done
1445.align	16
1446.Lkmc_truncated_dec:
1447	stg	%r4,4*8(%r15)
1448	la	%r4,16*8(%r15)
1449	lghi	%r3,16
1450	.long	0xb92f0042	# kmc %r4,%r2
1451	lg	%r4,4*8(%r15)
1452	bras	%r1,2f
1453	mvc	0(1,%r4),16*8(%r15)
14542:	ex	%r5,0(%r1)
1455	j	.Lkmc_done
1456.align	16
1457.Lcbc_software:
1458	stmg	%r5,%r14,5*8(%r15)
1459	lhi	%r0,0
1460	cl	%r0,164(%r15)
1461	je	.Lcbc_decrypt
1462
1463	larl	%r12,AES_Te
1464
1465	llgf	%r8,0(%r6)
1466	llgf	%r9,4(%r6)
1467	llgf	%r10,8(%r6)
1468	llgf	%r11,12(%r6)
1469
1470	lghi	%r0,16
1471	slgr	%r3,%r0
1472	brc	4,.Lcbc_enc_tail	# if borrow
1473.Lcbc_enc_loop:
1474	stmg	%r2,%r4,2*8(%r15)
1475	x	%r8,0(%r2)
1476	x	%r9,4(%r2)
1477	x	%r10,8(%r2)
1478	x	%r11,12(%r2)
1479	lgr	%r4,%r5
1480
1481	bras	%r14,_s390x_AES_encrypt
1482
1483	lmg	%r2,%r5,2*8(%r15)
1484	st	%r8,0(%r4)
1485	st	%r9,4(%r4)
1486	st	%r10,8(%r4)
1487	st	%r11,12(%r4)
1488
1489	la	%r2,16(%r2)
1490	la	%r4,16(%r4)
1491	lghi	%r0,16
1492	ltgr	%r3,%r3
1493	jz	.Lcbc_enc_done
1494	slgr	%r3,%r0
1495	brc	4,.Lcbc_enc_tail	# if borrow
1496	j	.Lcbc_enc_loop
1497.align	16
1498.Lcbc_enc_done:
1499	lg	%r6,6*8(%r15)
1500	st	%r8,0(%r6)
1501	st	%r9,4(%r6)
1502	st	%r10,8(%r6)
1503	st	%r11,12(%r6)
1504
1505	lmg	%r7,%r14,7*8(%r15)
1506	br	%r14
1507
1508.align	16
1509.Lcbc_enc_tail:
1510	aghi	%r3,15
1511	lghi	%r0,0
1512	stg	%r0,16*8(%r15)
1513	stg	%r0,16*8+8(%r15)
1514	bras	%r1,3f
1515	mvc	16*8(1,%r15),0(%r2)
15163:	ex	%r3,0(%r1)
1517	lghi	%r3,0
1518	la	%r2,16*8(%r15)
1519	j	.Lcbc_enc_loop
1520
1521.align	16
1522.Lcbc_decrypt:
1523	larl	%r12,AES_Td
1524
1525	lg	%r0,0(%r6)
1526	lg	%r1,8(%r6)
1527	stmg	%r0,%r1,16*8(%r15)
1528
1529.Lcbc_dec_loop:
1530	stmg	%r2,%r4,2*8(%r15)
1531	llgf	%r8,0(%r2)
1532	llgf	%r9,4(%r2)
1533	llgf	%r10,8(%r2)
1534	llgf	%r11,12(%r2)
1535	lgr	%r4,%r5
1536
1537	bras	%r14,_s390x_AES_decrypt
1538
1539	lmg	%r2,%r5,2*8(%r15)
1540	sllg	%r8,%r8,32
1541	sllg	%r10,%r10,32
1542	lr	%r8,%r9
1543	lr	%r10,%r11
1544
1545	lg	%r0,0(%r2)
1546	lg	%r1,8(%r2)
1547	xg	%r8,16*8(%r15)
1548	xg	%r10,16*8+8(%r15)
1549	lghi	%r9,16
1550	slgr	%r3,%r9
1551	brc	4,.Lcbc_dec_tail	# if borrow
1552	brc	2,.Lcbc_dec_done	# if zero
1553	stg	%r8,0(%r4)
1554	stg	%r10,8(%r4)
1555	stmg	%r0,%r1,16*8(%r15)
1556
1557	la	%r2,16(%r2)
1558	la	%r4,16(%r4)
1559	j	.Lcbc_dec_loop
1560
1561.Lcbc_dec_done:
1562	stg	%r8,0(%r4)
1563	stg	%r10,8(%r4)
1564.Lcbc_dec_exit:
1565	lmg	%r6,%r14,6*8(%r15)
1566	stmg	%r0,%r1,0(%r6)
1567
1568	br	%r14
1569
1570.align	16
1571.Lcbc_dec_tail:
1572	aghi	%r3,15
1573	stg	%r8,16*8(%r15)
1574	stg	%r10,16*8+8(%r15)
1575	bras	%r9,4f
1576	mvc	0(1,%r4),16*8(%r15)
15774:	ex	%r3,0(%r9)
1578	j	.Lcbc_dec_exit
1579.size	AES_cbc_encrypt,.-AES_cbc_encrypt
1580.globl	AES_ctr32_encrypt
1581.type	AES_ctr32_encrypt,@function
1582.align	16
1583AES_ctr32_encrypt:
1584	xgr	%r3,%r4		# flip %r3 and %r4, %r4 and %r3
1585	xgr	%r4,%r3
1586	xgr	%r3,%r4
1587	llgfr	%r3,%r3	# safe in ctr32 subroutine even in 64-bit case
1588	l	%r0,240(%r5)
1589	lhi	%r1,16
1590	clr	%r0,%r1
1591	jl	.Lctr32_software
1592
1593	stg	%r10,10*8(%r15)
1594	stg	%r11,11*8(%r15)
1595
1596	clr	%r3,%r1		# does work even in 64-bit mode
1597	jle	.Lctr32_nokma		# kma is slower for <= 16 blocks
1598
1599	larl	%r1,OPENSSL_s390xcap_P
1600	lr	%r10,%r0
1601	llihh	%r11,0x8000
1602	srlg	%r11,%r11,0(%r10)
1603	ng	%r11,S390X_KMA(%r1)		# check kma capability vector
1604	jz	.Lctr32_nokma
1605
1606	lghi	%r1,-160-112
1607	lgr	%r11,%r15
1608	la	%r15,0(%r1,%r15)			# prepare parameter block
1609
1610	lhi	%r1,0x0600
1611	sllg	%r3,%r3,4
1612	or	%r0,%r1				# set HS and LAAD flags
1613
1614	stg	%r11,0(%r15)			# backchain
1615	la	%r1,160(%r15)
1616
1617	lmg	%r10,%r11,0(%r5)			# copy key
1618	stg	%r10,160+80(%r15)
1619	stg	%r11,160+88(%r15)
1620	lmg	%r10,%r11,16(%r5)
1621	stg	%r10,160+96(%r15)
1622	stg	%r11,160+104(%r15)
1623
1624	lmg	%r10,%r11,0(%r6)			# copy iv
1625	stg	%r10,160+64(%r15)
1626	ahi	%r11,-1				# kma requires counter-1
1627	stg	%r11,160+72(%r15)
1628	st	%r11,160+12(%r15)		# copy counter
1629
1630	lghi	%r10,0				# no AAD
1631	lghi	%r11,0
1632
1633	.long	0xb929a042	# kma %r4,%r10,%r2
1634	brc	1,.-4		# pay attention to "partial completion"
1635
1636	stg	%r0,160+80(%r15)		# wipe key
1637	stg	%r0,160+88(%r15)
1638	stg	%r0,160+96(%r15)
1639	stg	%r0,160+104(%r15)
1640	la	%r15,160+112(%r15)
1641
1642	lmg	%r10,%r11,10*8(%r15)
1643	br	%r14
1644
1645.align	16
1646.Lctr32_nokma:
1647	stmg	%r6,%r9,6*8(%r15)
1648
1649	slgr	%r4,%r2
1650	la	%r1,0(%r5)	# %r1 is permanent copy of %r5
1651	lg	%r5,0(%r6)	# load ivec
1652	lg	%r6,8(%r6)
1653
1654	# prepare and allocate stack frame at the top of 4K page
1655	# with 1K reserved for eventual signal handling
1656	lghi	%r8,-1024-256-16# guarantee at least 256-bytes buffer
1657	lghi	%r9,-4096
1658	algr	%r8,%r15
1659	lgr	%r7,%r15
1660	ngr	%r8,%r9		# align at page boundary
1661	slgr	%r7,%r8		# total buffer size
1662	lgr	%r10,%r15
1663	lghi	%r9,1024+16	# sl[g]fi is extended-immediate facility
1664	slgr	%r7,%r9		# deduct reservation to get usable buffer size
1665	# buffer size is at lest 256 and at most 3072+256-16
1666
1667	la	%r15,1024(%r8)	# alloca
1668	srlg	%r7,%r7,4	# convert bytes to blocks, minimum 16
1669	stg	%r10,0(%r15)	# back-chain
1670	stg	%r7,8(%r15)
1671
1672	slgr	%r3,%r7
1673	brc	1,.Lctr32_hw_switch	# not zero, no borrow
1674	algr	%r7,%r3	# input is shorter than allocated buffer
1675	lghi	%r3,0
1676	stg	%r7,8(%r15)
1677
1678.Lctr32_hw_switch:
1679.Lctr32_km_loop:
1680	la	%r10,16(%r15)
1681	lgr	%r11,%r7
1682.Lctr32_km_prepare:
1683	stg	%r5,0(%r10)
1684	stg	%r6,8(%r10)
1685	la	%r10,16(%r10)
1686	ahi	%r6,1		# 32-bit increment, preserves upper half
1687	brct	%r11,.Lctr32_km_prepare
1688
1689	la	%r8,16(%r15)	# inp
1690	sllg	%r9,%r7,4	# len
1691	la	%r10,16(%r15)	# out
1692	.long	0xb92e00a8	# km %r10,%r8
1693	brc	1,.-4		# pay attention to "partial completion"
1694
1695	la	%r10,16(%r15)
1696	lgr	%r11,%r7
1697	slgr	%r10,%r2
1698.Lctr32_km_xor:
1699	lg	%r8,0(%r2)
1700	lg	%r9,8(%r2)
1701	xg	%r8,0(%r10,%r2)
1702	xg	%r9,8(%r10,%r2)
1703	stg	%r8,0(%r4,%r2)
1704	stg	%r9,8(%r4,%r2)
1705	la	%r2,16(%r2)
1706	brct	%r11,.Lctr32_km_xor
1707
1708	slgr	%r3,%r7
1709	brc	1,.Lctr32_km_loop	# not zero, no borrow
1710	algr	%r7,%r3
1711	lghi	%r3,0
1712	brc	4+1,.Lctr32_km_loop	# not zero
1713
1714	lg	%r8,0(%r15)
1715	lg	%r9,8(%r15)
1716	la	%r10,16(%r15)
1717.Lctr32_km_zap:
1718	stg	%r8,0(%r10)
1719	stg	%r8,8(%r10)
1720	la	%r10,16(%r10)
1721	brct	%r9,.Lctr32_km_zap
1722
1723	la	%r15,0(%r8)
1724	lmg	%r6,%r11,6*8(%r15)
1725	br	%r14
1726.align	16
1727.Lctr32_software:
1728	stmg	%r5,%r14,5*8(%r15)
1729	slgr	%r2,%r4
1730	larl	%r12,AES_Te
1731	llgf	%r1,12(%r6)
1732
1733.Lctr32_loop:
1734	stmg	%r2,%r4,2*8(%r15)
1735	llgf	%r8,0(%r6)
1736	llgf	%r9,4(%r6)
1737	llgf	%r10,8(%r6)
1738	lgr	%r11,%r1
1739	st	%r1,16*8(%r15)
1740	lgr	%r4,%r5
1741
1742	bras	%r14,_s390x_AES_encrypt
1743
1744	lmg	%r2,%r6,2*8(%r15)
1745	llgf	%r1,16*8(%r15)
1746	x	%r8,0(%r2,%r4)
1747	x	%r9,4(%r2,%r4)
1748	x	%r10,8(%r2,%r4)
1749	x	%r11,12(%r2,%r4)
1750	stm	%r8,%r11,0(%r4)
1751
1752	la	%r4,16(%r4)
1753	ahi	%r1,1		# 32-bit increment
1754	brct	%r3,.Lctr32_loop
1755
1756	lmg	%r6,%r14,6*8(%r15)
1757	br	%r14
1758.size	AES_ctr32_encrypt,.-AES_ctr32_encrypt
1759.type	_s390x_xts_km,@function
1760.align	16
1761_s390x_xts_km:
1762	llgfr	%r8,%r0			# put aside the function code
1763	lghi	%r9,0x7f
1764	nr	%r9,%r0
1765	larl	%r1,OPENSSL_s390xcap_P
1766	llihh	%r0,0x8000
1767	srlg	%r0,%r0,32(%r9)		# check for 32+function code
1768	ng	%r0,S390X_KM(%r1)	# check km capability vector
1769	lgr	%r0,%r8			# restore the function code
1770	la	%r1,0(%r5)		# restore %r5
1771	jz	.Lxts_km_vanilla
1772
1773	lmg	%r6,%r7,144(%r15)	# put aside the tweak value
1774	algr	%r4,%r2
1775
1776	oill	%r0,32			# switch to xts function code
1777	aghi	%r9,-18			#
1778	sllg	%r9,%r9,3		# (function code - 18)*8, 0 or 16
1779	la	%r1,144-16(%r15)
1780	slgr	%r1,%r9			# parameter block position
1781	lmg	%r8,%r11,0(%r5)	# load 256 bits of key material,
1782	stmg	%r8,%r11,0(%r1)		# and copy it to parameter block.
1783					# yes, it contains junk and overlaps
1784					# with the tweak in 128-bit case.
1785					# it's done to avoid conditional
1786					# branch.
1787	stmg	%r6,%r7,144(%r15)	# "re-seat" the tweak value
1788
1789	.long	0xb92e0042		# km %r4,%r2
1790	brc	1,.-4			# pay attention to "partial completion"
1791
1792	lrvg	%r8,144+0(%r15)	# load the last tweak
1793	lrvg	%r9,144+8(%r15)
1794	stmg	%r0,%r3,144-32(%r15)	# wipe copy of the key
1795
1796	nill	%r0,0xffdf		# switch back to original function code
1797	la	%r1,0(%r5)		# restore pointer to %r5
1798	slgr	%r4,%r2
1799
1800	llgc	%r3,2*8-1(%r15)
1801	nill	%r3,0x0f		# %r3%=16
1802	br	%r14
1803
1804.align	16
1805.Lxts_km_vanilla:
1806	# prepare and allocate stack frame at the top of 4K page
1807	# with 1K reserved for eventual signal handling
1808	lghi	%r8,-1024-256-16# guarantee at least 256-bytes buffer
1809	lghi	%r9,-4096
1810	algr	%r8,%r15
1811	lgr	%r7,%r15
1812	ngr	%r8,%r9		# align at page boundary
1813	slgr	%r7,%r8		# total buffer size
1814	lgr	%r10,%r15
1815	lghi	%r9,1024+16	# sl[g]fi is extended-immediate facility
1816	slgr	%r7,%r9		# deduct reservation to get usable buffer size
1817	# buffer size is at lest 256 and at most 3072+256-16
1818
1819	la	%r15,1024(%r8)	# alloca
1820	nill	%r7,0xfff0	# round to 16*n
1821	stg	%r10,0(%r15)	# back-chain
1822	nill	%r3,0xfff0	# redundant
1823	stg	%r7,8(%r15)
1824
1825	slgr	%r3,%r7
1826	brc	1,.Lxts_km_go	# not zero, no borrow
1827	algr	%r7,%r3	# input is shorter than allocated buffer
1828	lghi	%r3,0
1829	stg	%r7,8(%r15)
1830
1831.Lxts_km_go:
1832	lrvg	%r8,144+0(%r10)	# load the tweak value in little-endian
1833	lrvg	%r9,144+8(%r10)
1834
1835	la	%r10,16(%r15)		# vector of ascending tweak values
1836	slgr	%r10,%r2
1837	srlg	%r11,%r7,4
1838	j	.Lxts_km_start
1839
1840.Lxts_km_loop:
1841	la	%r10,16(%r15)
1842	slgr	%r10,%r2
1843	srlg	%r11,%r7,4
1844.Lxts_km_prepare:
1845	lghi	%r5,0x87
1846	srag	%r6,%r9,63		# broadcast upper bit
1847	ngr	%r5,%r6			# rem
1848	algr	%r8,%r8
1849	alcgr	%r9,%r9
1850	xgr	%r8,%r5
1851.Lxts_km_start:
1852	lrvgr	%r5,%r8			# flip byte order
1853	lrvgr	%r6,%r9
1854	stg	%r5,0(%r10,%r2)
1855	stg	%r6,8(%r10,%r2)
1856	xg	%r5,0(%r2)
1857	xg	%r6,8(%r2)
1858	stg	%r5,0(%r4,%r2)
1859	stg	%r6,8(%r4,%r2)
1860	la	%r2,16(%r2)
1861	brct	%r11,.Lxts_km_prepare
1862
1863	slgr	%r2,%r7		# rewind %r2
1864	la	%r10,0(%r4,%r2)
1865	lgr	%r11,%r7
1866	.long	0xb92e00aa		# km %r10,%r10
1867	brc	1,.-4			# pay attention to "partial completion"
1868
1869	la	%r10,16(%r15)
1870	slgr	%r10,%r2
1871	srlg	%r11,%r7,4
1872.Lxts_km_xor:
1873	lg	%r5,0(%r4,%r2)
1874	lg	%r6,8(%r4,%r2)
1875	xg	%r5,0(%r10,%r2)
1876	xg	%r6,8(%r10,%r2)
1877	stg	%r5,0(%r4,%r2)
1878	stg	%r6,8(%r4,%r2)
1879	la	%r2,16(%r2)
1880	brct	%r11,.Lxts_km_xor
1881
1882	slgr	%r3,%r7
1883	brc	1,.Lxts_km_loop		# not zero, no borrow
1884	algr	%r7,%r3
1885	lghi	%r3,0
1886	brc	4+1,.Lxts_km_loop	# not zero
1887
1888	lg	%r5,0(%r15)		# back-chain
1889	llgf	%r7,12(%r15)	# bytes used
1890	la	%r6,16(%r15)
1891	srlg	%r7,%r7,4
1892.Lxts_km_zap:
1893	stg	%r5,0(%r6)
1894	stg	%r5,8(%r6)
1895	la	%r6,16(%r6)
1896	brct	%r7,.Lxts_km_zap
1897
1898	la	%r15,0(%r5)
1899	llgc	%r3,2*8-1(%r5)
1900	nill	%r3,0x0f		# %r3%=16
1901	bzr	%r14
1902
1903	# generate one more tweak...
1904	lghi	%r5,0x87
1905	srag	%r6,%r9,63		# broadcast upper bit
1906	ngr	%r5,%r6			# rem
1907	algr	%r8,%r8
1908	alcgr	%r9,%r9
1909	xgr	%r8,%r5
1910
1911	ltr	%r3,%r3		# clear zero flag
1912	br	%r14
1913.size	_s390x_xts_km,.-_s390x_xts_km
1914
1915.globl	AES_xts_encrypt
1916.type	AES_xts_encrypt,@function
1917.align	16
1918AES_xts_encrypt:
1919	xgr	%r3,%r4			# flip %r3 and %r4, %r4 and %r3
1920	xgr	%r4,%r3
1921	xgr	%r3,%r4
1922	stg	%r3,1*8(%r15)	# save copy of %r3
1923	srag	%r3,%r3,4		# formally wrong, because it expands
1924					# sign byte, but who can afford asking
1925					# to process more than 2^63-1 bytes?
1926					# I use it, because it sets condition
1927					# code...
1928	bcr	8,%r14			# abort if zero (i.e. less than 16)
1929	llgf	%r0,240(%r6)
1930	lhi	%r1,16
1931	clr	%r0,%r1
1932	jl	.Lxts_enc_software
1933
1934	stg	%r14,5*8(%r15)
1935	stmg	%r6,%r11,6*8(%r15)
1936
1937	sllg	%r3,%r3,4		# %r3&=~15
1938	slgr	%r4,%r2
1939
1940	# generate the tweak value
1941	lg	%r11,160(%r15)	# pointer to iv
1942	la	%r10,144(%r15)
1943	lmg	%r8,%r9,0(%r11)
1944	lghi	%r11,16
1945	stmg	%r8,%r9,0(%r10)
1946	la	%r1,0(%r6)		# %r6 is not needed anymore
1947	.long	0xb92e00aa		# km %r10,%r10, generate the tweak
1948	brc	1,.-4			# can this happen?
1949
1950	l	%r0,240(%r5)
1951	la	%r1,0(%r5)		# %r5 is not needed anymore
1952	bras	%r14,_s390x_xts_km
1953	jz	.Lxts_enc_km_done
1954
1955	aghi	%r2,-16		# take one step back
1956	la	%r7,0(%r4,%r2)	# put aside real %r4
1957.Lxts_enc_km_steal:
1958	llgc	%r5,16(%r2)
1959	llgc	%r6,0(%r4,%r2)
1960	stc	%r5,0(%r4,%r2)
1961	stc	%r6,16(%r4,%r2)
1962	la	%r2,1(%r2)
1963	brct	%r3,.Lxts_enc_km_steal
1964
1965	la	%r10,0(%r7)
1966	lghi	%r11,16
1967	lrvgr	%r5,%r8			# flip byte order
1968	lrvgr	%r6,%r9
1969	xg	%r5,0(%r10)
1970	xg	%r6,8(%r10)
1971	stg	%r5,0(%r10)
1972	stg	%r6,8(%r10)
1973	.long	0xb92e00aa		# km %r10,%r10
1974	brc	1,.-4			# can this happen?
1975	lrvgr	%r5,%r8			# flip byte order
1976	lrvgr	%r6,%r9
1977	xg	%r5,0(%r7)
1978	xg	%r6,8(%r7)
1979	stg	%r5,0(%r7)
1980	stg	%r6,8(%r7)
1981
1982.Lxts_enc_km_done:
1983	stg	%r15,144+0(%r15)	# wipe tweak
1984	stg	%r15,144+8(%r15)
1985	lg	%r14,5*8(%r15)
1986	lmg	%r6,%r11,6*8(%r15)
1987	br	%r14
1988.align	16
1989.Lxts_enc_software:
1990	stmg	%r6,%r14,6*8(%r15)
1991
1992	slgr	%r4,%r2
1993
1994	lg	%r11,160(%r15)	# ivp
1995	llgf	%r8,0(%r11)		# load iv
1996	llgf	%r9,4(%r11)
1997	llgf	%r10,8(%r11)
1998	llgf	%r11,12(%r11)
1999	stmg	%r2,%r5,2*8(%r15)
2000	la	%r4,0(%r6)
2001	larl	%r12,AES_Te
2002	bras	%r14,_s390x_AES_encrypt	# generate the tweak
2003	lmg	%r2,%r5,2*8(%r15)
2004	stm	%r8,%r11,144(%r15)	# save the tweak
2005	j	.Lxts_enc_enter
2006
2007.align	16
2008.Lxts_enc_loop:
2009	lrvg	%r9,144+0(%r15)	# load the tweak in little-endian
2010	lrvg	%r11,144+8(%r15)
2011	lghi	%r1,0x87
2012	srag	%r0,%r11,63		# broadcast upper bit
2013	ngr	%r1,%r0			# rem
2014	algr	%r9,%r9
2015	alcgr	%r11,%r11
2016	xgr	%r9,%r1
2017	lrvgr	%r9,%r9			# flip byte order
2018	lrvgr	%r11,%r11
2019	srlg	%r8,%r9,32		# smash the tweak to 4x32-bits
2020	stg	%r9,144+0(%r15)	# save the tweak
2021	llgfr	%r9,%r9
2022	srlg	%r10,%r11,32
2023	stg	%r11,144+8(%r15)
2024	llgfr	%r11,%r11
2025	la	%r2,16(%r2)		# %r2+=16
2026.Lxts_enc_enter:
2027	x	%r8,0(%r2)		# ^=*(%r2)
2028	x	%r9,4(%r2)
2029	x	%r10,8(%r2)
2030	x	%r11,12(%r2)
2031	stmg	%r2,%r3,2*8(%r15)	# only two registers are changing
2032	la	%r4,0(%r5)
2033	bras	%r14,_s390x_AES_encrypt
2034	lmg	%r2,%r5,2*8(%r15)
2035	x	%r8,144+0(%r15)	# ^=tweak
2036	x	%r9,144+4(%r15)
2037	x	%r10,144+8(%r15)
2038	x	%r11,144+12(%r15)
2039	st	%r8,0(%r4,%r2)
2040	st	%r9,4(%r4,%r2)
2041	st	%r10,8(%r4,%r2)
2042	st	%r11,12(%r4,%r2)
2043	brctg	%r3,.Lxts_enc_loop
2044
2045	llgc	%r3,15(%r15)
2046	nill	%r3,0x0f		# %r3%16
2047	jz	.Lxts_enc_done
2048
2049	la	%r7,0(%r2,%r4)	# put aside real %r4
2050.Lxts_enc_steal:
2051	llgc	%r0,16(%r2)
2052	llgc	%r1,0(%r4,%r2)
2053	stc	%r0,0(%r4,%r2)
2054	stc	%r1,16(%r4,%r2)
2055	la	%r2,1(%r2)
2056	brct	%r3,.Lxts_enc_steal
2057	la	%r4,0(%r7)		# restore real %r4
2058
2059	# generate last tweak...
2060	lrvg	%r9,144+0(%r15)	# load the tweak in little-endian
2061	lrvg	%r11,144+8(%r15)
2062	lghi	%r1,0x87
2063	srag	%r0,%r11,63		# broadcast upper bit
2064	ngr	%r1,%r0			# rem
2065	algr	%r9,%r9
2066	alcgr	%r11,%r11
2067	xgr	%r9,%r1
2068	lrvgr	%r9,%r9			# flip byte order
2069	lrvgr	%r11,%r11
2070	srlg	%r8,%r9,32		# smash the tweak to 4x32-bits
2071	stg	%r9,144+0(%r15)	# save the tweak
2072	llgfr	%r9,%r9
2073	srlg	%r10,%r11,32
2074	stg	%r11,144+8(%r15)
2075	llgfr	%r11,%r11
2076
2077	x	%r8,0(%r4)		# ^=*(inp)|stolen cipther-text
2078	x	%r9,4(%r4)
2079	x	%r10,8(%r4)
2080	x	%r11,12(%r4)
2081	stg	%r4,4*8(%r15)
2082	la	%r4,0(%r5)
2083	bras	%r14,_s390x_AES_encrypt
2084	lg	%r4,4*8(%r15)
2085	x	%r8,144(%r15)	# ^=tweak
2086	x	%r9,148(%r15)
2087	x	%r10,152(%r15)
2088	x	%r11,156(%r15)
2089	st	%r8,0(%r4)
2090	st	%r9,4(%r4)
2091	st	%r10,8(%r4)
2092	st	%r11,12(%r4)
2093
2094.Lxts_enc_done:
2095	stg	%r15,144+0(%r15)	# wipe tweak
2096	stg	%r15,144+8(%r15)
2097	lmg	%r6,%r14,6*8(%r15)
2098	br	%r14
2099.size	AES_xts_encrypt,.-AES_xts_encrypt
2100.globl	AES_xts_decrypt
2101.type	AES_xts_decrypt,@function
2102.align	16
2103AES_xts_decrypt:
2104	xgr	%r3,%r4			# flip %r3 and %r4, %r4 and %r3
2105	xgr	%r4,%r3
2106	xgr	%r3,%r4
2107	stg	%r3,1*8(%r15)	# save copy of %r3
2108	aghi	%r3,-16
2109	bcr	4,%r14			# abort if less than zero. formally
2110					# wrong, because %r3 is unsigned,
2111					# but who can afford asking to
2112					# process more than 2^63-1 bytes?
2113	tmll	%r3,0x0f
2114	jnz	.Lxts_dec_proceed
2115	aghi	%r3,16
2116.Lxts_dec_proceed:
2117	llgf	%r0,240(%r6)
2118	lhi	%r1,16
2119	clr	%r0,%r1
2120	jl	.Lxts_dec_software
2121
2122	stg	%r14,5*8(%r15)
2123	stmg	%r6,%r11,6*8(%r15)
2124
2125	nill	%r3,0xfff0		# %r3&=~15
2126	slgr	%r4,%r2
2127
2128	# generate the tweak value
2129	lg	%r11,160(%r15)	# pointer to iv
2130	la	%r10,144(%r15)
2131	lmg	%r8,%r9,0(%r11)
2132	lghi	%r11,16
2133	stmg	%r8,%r9,0(%r10)
2134	la	%r1,0(%r6)		# %r6 is not needed past this point
2135	.long	0xb92e00aa		# km %r10,%r10, generate the tweak
2136	brc	1,.-4			# can this happen?
2137
2138	l	%r0,240(%r5)
2139	la	%r1,0(%r5)		# %r5 is not needed anymore
2140
2141	ltgr	%r3,%r3
2142	jz	.Lxts_dec_km_short
2143	bras	%r14,_s390x_xts_km
2144	jz	.Lxts_dec_km_done
2145
2146	lrvgr	%r10,%r8			# make copy in reverse byte order
2147	lrvgr	%r11,%r9
2148	j	.Lxts_dec_km_2ndtweak
2149
2150.Lxts_dec_km_short:
2151	llgc	%r3,15(%r15)
2152	nill	%r3,0x0f		# %r3%=16
2153	lrvg	%r8,144+0(%r15)	# load the tweak
2154	lrvg	%r9,144+8(%r15)
2155	lrvgr	%r10,%r8			# make copy in reverse byte order
2156	lrvgr	%r11,%r9
2157
2158.Lxts_dec_km_2ndtweak:
2159	lghi	%r5,0x87
2160	srag	%r6,%r9,63		# broadcast upper bit
2161	ngr	%r5,%r6			# rem
2162	algr	%r8,%r8
2163	alcgr	%r9,%r9
2164	xgr	%r8,%r5
2165	lrvgr	%r5,%r8			# flip byte order
2166	lrvgr	%r6,%r9
2167
2168	xg	%r5,0(%r2)
2169	xg	%r6,8(%r2)
2170	stg	%r5,0(%r4,%r2)
2171	stg	%r6,8(%r4,%r2)
2172	la	%r6,0(%r4,%r2)
2173	lghi	%r7,16
2174	.long	0xb92e0066		# km %r6,%r6
2175	brc	1,.-4			# can this happen?
2176	lrvgr	%r5,%r8
2177	lrvgr	%r6,%r9
2178	xg	%r5,0(%r4,%r2)
2179	xg	%r6,8(%r4,%r2)
2180	stg	%r5,0(%r4,%r2)
2181	stg	%r6,8(%r4,%r2)
2182
2183	la	%r7,0(%r4,%r2)	# put aside real %r4
2184.Lxts_dec_km_steal:
2185	llgc	%r5,16(%r2)
2186	llgc	%r6,0(%r4,%r2)
2187	stc	%r5,0(%r4,%r2)
2188	stc	%r6,16(%r4,%r2)
2189	la	%r2,1(%r2)
2190	brct	%r3,.Lxts_dec_km_steal
2191
2192	lgr	%r8,%r10
2193	lgr	%r9,%r11
2194	xg	%r8,0(%r7)
2195	xg	%r9,8(%r7)
2196	stg	%r8,0(%r7)
2197	stg	%r9,8(%r7)
2198	la	%r8,0(%r7)
2199	lghi	%r9,16
2200	.long	0xb92e0088		# km %r8,%r8
2201	brc	1,.-4			# can this happen?
2202	xg	%r10,0(%r7)
2203	xg	%r11,8(%r7)
2204	stg	%r10,0(%r7)
2205	stg	%r11,8(%r7)
2206.Lxts_dec_km_done:
2207	stg	%r15,144+0(%r15)	# wipe tweak
2208	stg	%r15,144+8(%r15)
2209	lg	%r14,5*8(%r15)
2210	lmg	%r6,%r11,6*8(%r15)
2211	br	%r14
2212.align	16
2213.Lxts_dec_software:
2214	stmg	%r6,%r14,6*8(%r15)
2215
2216	srlg	%r3,%r3,4
2217	slgr	%r4,%r2
2218
2219	lg	%r11,160(%r15)	# ivp
2220	llgf	%r8,0(%r11)		# load iv
2221	llgf	%r9,4(%r11)
2222	llgf	%r10,8(%r11)
2223	llgf	%r11,12(%r11)
2224	stmg	%r2,%r5,2*8(%r15)
2225	la	%r4,0(%r6)
2226	larl	%r12,AES_Te
2227	bras	%r14,_s390x_AES_encrypt	# generate the tweak
2228	lmg	%r2,%r5,2*8(%r15)
2229	larl	%r12,AES_Td
2230	ltgr	%r3,%r3
2231	stm	%r8,%r11,144(%r15)	# save the tweak
2232	jz	.Lxts_dec_short
2233	j	.Lxts_dec_enter
2234
2235.align	16
2236.Lxts_dec_loop:
2237	lrvg	%r9,144+0(%r15)	# load the tweak in little-endian
2238	lrvg	%r11,144+8(%r15)
2239	lghi	%r1,0x87
2240	srag	%r0,%r11,63		# broadcast upper bit
2241	ngr	%r1,%r0			# rem
2242	algr	%r9,%r9
2243	alcgr	%r11,%r11
2244	xgr	%r9,%r1
2245	lrvgr	%r9,%r9			# flip byte order
2246	lrvgr	%r11,%r11
2247	srlg	%r8,%r9,32		# smash the tweak to 4x32-bits
2248	stg	%r9,144+0(%r15)	# save the tweak
2249	llgfr	%r9,%r9
2250	srlg	%r10,%r11,32
2251	stg	%r11,144+8(%r15)
2252	llgfr	%r11,%r11
2253.Lxts_dec_enter:
2254	x	%r8,0(%r2)		# tweak^=*(inp)
2255	x	%r9,4(%r2)
2256	x	%r10,8(%r2)
2257	x	%r11,12(%r2)
2258	stmg	%r2,%r3,2*8(%r15)	# only two registers are changing
2259	la	%r4,0(%r5)
2260	bras	%r14,_s390x_AES_decrypt
2261	lmg	%r2,%r5,2*8(%r15)
2262	x	%r8,144+0(%r15)	# ^=tweak
2263	x	%r9,144+4(%r15)
2264	x	%r10,144+8(%r15)
2265	x	%r11,144+12(%r15)
2266	st	%r8,0(%r4,%r2)
2267	st	%r9,4(%r4,%r2)
2268	st	%r10,8(%r4,%r2)
2269	st	%r11,12(%r4,%r2)
2270	la	%r2,16(%r2)
2271	brctg	%r3,.Lxts_dec_loop
2272
2273	llgc	%r3,15(%r15)
2274	nill	%r3,0x0f		# %r3%16
2275	jz	.Lxts_dec_done
2276
2277	# generate pair of tweaks...
2278	lrvg	%r9,144+0(%r15)	# load the tweak in little-endian
2279	lrvg	%r11,144+8(%r15)
2280	lghi	%r1,0x87
2281	srag	%r0,%r11,63		# broadcast upper bit
2282	ngr	%r1,%r0			# rem
2283	algr	%r9,%r9
2284	alcgr	%r11,%r11
2285	xgr	%r9,%r1
2286	lrvgr	%r6,%r9			# flip byte order
2287	lrvgr	%r7,%r11
2288	stmg	%r6,%r7,144(%r15)	# save the 1st tweak
2289	j	.Lxts_dec_2ndtweak
2290
2291.align	16
2292.Lxts_dec_short:
2293	llgc	%r3,15(%r15)
2294	nill	%r3,0x0f		# %r3%16
2295	lrvg	%r9,144+0(%r15)	# load the tweak in little-endian
2296	lrvg	%r11,144+8(%r15)
2297.Lxts_dec_2ndtweak:
2298	lghi	%r1,0x87
2299	srag	%r0,%r11,63		# broadcast upper bit
2300	ngr	%r1,%r0			# rem
2301	algr	%r9,%r9
2302	alcgr	%r11,%r11
2303	xgr	%r9,%r1
2304	lrvgr	%r9,%r9			# flip byte order
2305	lrvgr	%r11,%r11
2306	srlg	%r8,%r9,32		# smash the tweak to 4x32-bits
2307	stg	%r9,144-16+0(%r15)	# save the 2nd tweak
2308	llgfr	%r9,%r9
2309	srlg	%r10,%r11,32
2310	stg	%r11,144-16+8(%r15)
2311	llgfr	%r11,%r11
2312
2313	x	%r8,0(%r2)		# tweak_the_2nd^=*(inp)
2314	x	%r9,4(%r2)
2315	x	%r10,8(%r2)
2316	x	%r11,12(%r2)
2317	stmg	%r2,%r3,2*8(%r15)
2318	la	%r4,0(%r5)
2319	bras	%r14,_s390x_AES_decrypt
2320	lmg	%r2,%r5,2*8(%r15)
2321	x	%r8,144-16+0(%r15)	# ^=tweak_the_2nd
2322	x	%r9,144-16+4(%r15)
2323	x	%r10,144-16+8(%r15)
2324	x	%r11,144-16+12(%r15)
2325	st	%r8,0(%r4,%r2)
2326	st	%r9,4(%r4,%r2)
2327	st	%r10,8(%r4,%r2)
2328	st	%r11,12(%r4,%r2)
2329
2330	la	%r7,0(%r4,%r2)	# put aside real %r4
2331.Lxts_dec_steal:
2332	llgc	%r0,16(%r2)
2333	llgc	%r1,0(%r4,%r2)
2334	stc	%r0,0(%r4,%r2)
2335	stc	%r1,16(%r4,%r2)
2336	la	%r2,1(%r2)
2337	brct	%r3,.Lxts_dec_steal
2338	la	%r4,0(%r7)		# restore real %r4
2339
2340	lm	%r8,%r11,144(%r15)	# load the 1st tweak
2341	x	%r8,0(%r4)		# tweak^=*(inp)|stolen cipher-text
2342	x	%r9,4(%r4)
2343	x	%r10,8(%r4)
2344	x	%r11,12(%r4)
2345	stg	%r4,4*8(%r15)
2346	la	%r4,0(%r5)
2347	bras	%r14,_s390x_AES_decrypt
2348	lg	%r4,4*8(%r15)
2349	x	%r8,144+0(%r15)	# ^=tweak
2350	x	%r9,144+4(%r15)
2351	x	%r10,144+8(%r15)
2352	x	%r11,144+12(%r15)
2353	st	%r8,0(%r4)
2354	st	%r9,4(%r4)
2355	st	%r10,8(%r4)
2356	st	%r11,12(%r4)
2357	stg	%r15,144-16+0(%r15)	# wipe 2nd tweak
2358	stg	%r15,144-16+8(%r15)
2359.Lxts_dec_done:
2360	stg	%r15,144+0(%r15)	# wipe tweak
2361	stg	%r15,144+8(%r15)
2362	lmg	%r6,%r14,6*8(%r15)
2363	br	%r14
2364.size	AES_xts_decrypt,.-AES_xts_decrypt
2365.string	"AES for s390x, CRYPTOGAMS by <appro@openssl.org>"
2366