• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#include "ring_core_generated/prefix_symbols_asm.h"
12.text
13.extern	OPENSSL_ia32cap_P
14.hidden OPENSSL_ia32cap_P
15.globl	gcm_init_clmul
16.hidden gcm_init_clmul
17.type	gcm_init_clmul,@function
18.align	16
19gcm_init_clmul:
20.cfi_startproc
21.L_init_clmul:
22	movdqu	(%rsi),%xmm2
23	pshufd	$78,%xmm2,%xmm2
24
25
26	pshufd	$255,%xmm2,%xmm4
27	movdqa	%xmm2,%xmm3
28	psllq	$1,%xmm2
29	pxor	%xmm5,%xmm5
30	psrlq	$63,%xmm3
31	pcmpgtd	%xmm4,%xmm5
32	pslldq	$8,%xmm3
33	por	%xmm3,%xmm2
34
35
36	pand	.L0x1c2_polynomial(%rip),%xmm5
37	pxor	%xmm5,%xmm2
38
39
40	pshufd	$78,%xmm2,%xmm6
41	movdqa	%xmm2,%xmm0
42	pxor	%xmm2,%xmm6
43	movdqa	%xmm0,%xmm1
44	pshufd	$78,%xmm0,%xmm3
45	pxor	%xmm0,%xmm3
46.byte	102,15,58,68,194,0
47.byte	102,15,58,68,202,17
48.byte	102,15,58,68,222,0
49	pxor	%xmm0,%xmm3
50	pxor	%xmm1,%xmm3
51
52	movdqa	%xmm3,%xmm4
53	psrldq	$8,%xmm3
54	pslldq	$8,%xmm4
55	pxor	%xmm3,%xmm1
56	pxor	%xmm4,%xmm0
57
58	movdqa	%xmm0,%xmm4
59	movdqa	%xmm0,%xmm3
60	psllq	$5,%xmm0
61	pxor	%xmm0,%xmm3
62	psllq	$1,%xmm0
63	pxor	%xmm3,%xmm0
64	psllq	$57,%xmm0
65	movdqa	%xmm0,%xmm3
66	pslldq	$8,%xmm0
67	psrldq	$8,%xmm3
68	pxor	%xmm4,%xmm0
69	pxor	%xmm3,%xmm1
70
71
72	movdqa	%xmm0,%xmm4
73	psrlq	$1,%xmm0
74	pxor	%xmm4,%xmm1
75	pxor	%xmm0,%xmm4
76	psrlq	$5,%xmm0
77	pxor	%xmm4,%xmm0
78	psrlq	$1,%xmm0
79	pxor	%xmm1,%xmm0
80	pshufd	$78,%xmm2,%xmm3
81	pshufd	$78,%xmm0,%xmm4
82	pxor	%xmm2,%xmm3
83	movdqu	%xmm2,0(%rdi)
84	pxor	%xmm0,%xmm4
85	movdqu	%xmm0,16(%rdi)
86.byte	102,15,58,15,227,8
87	movdqu	%xmm4,32(%rdi)
88	movdqa	%xmm0,%xmm1
89	pshufd	$78,%xmm0,%xmm3
90	pxor	%xmm0,%xmm3
91.byte	102,15,58,68,194,0
92.byte	102,15,58,68,202,17
93.byte	102,15,58,68,222,0
94	pxor	%xmm0,%xmm3
95	pxor	%xmm1,%xmm3
96
97	movdqa	%xmm3,%xmm4
98	psrldq	$8,%xmm3
99	pslldq	$8,%xmm4
100	pxor	%xmm3,%xmm1
101	pxor	%xmm4,%xmm0
102
103	movdqa	%xmm0,%xmm4
104	movdqa	%xmm0,%xmm3
105	psllq	$5,%xmm0
106	pxor	%xmm0,%xmm3
107	psllq	$1,%xmm0
108	pxor	%xmm3,%xmm0
109	psllq	$57,%xmm0
110	movdqa	%xmm0,%xmm3
111	pslldq	$8,%xmm0
112	psrldq	$8,%xmm3
113	pxor	%xmm4,%xmm0
114	pxor	%xmm3,%xmm1
115
116
117	movdqa	%xmm0,%xmm4
118	psrlq	$1,%xmm0
119	pxor	%xmm4,%xmm1
120	pxor	%xmm0,%xmm4
121	psrlq	$5,%xmm0
122	pxor	%xmm4,%xmm0
123	psrlq	$1,%xmm0
124	pxor	%xmm1,%xmm0
125	movdqa	%xmm0,%xmm5
126	movdqa	%xmm0,%xmm1
127	pshufd	$78,%xmm0,%xmm3
128	pxor	%xmm0,%xmm3
129.byte	102,15,58,68,194,0
130.byte	102,15,58,68,202,17
131.byte	102,15,58,68,222,0
132	pxor	%xmm0,%xmm3
133	pxor	%xmm1,%xmm3
134
135	movdqa	%xmm3,%xmm4
136	psrldq	$8,%xmm3
137	pslldq	$8,%xmm4
138	pxor	%xmm3,%xmm1
139	pxor	%xmm4,%xmm0
140
141	movdqa	%xmm0,%xmm4
142	movdqa	%xmm0,%xmm3
143	psllq	$5,%xmm0
144	pxor	%xmm0,%xmm3
145	psllq	$1,%xmm0
146	pxor	%xmm3,%xmm0
147	psllq	$57,%xmm0
148	movdqa	%xmm0,%xmm3
149	pslldq	$8,%xmm0
150	psrldq	$8,%xmm3
151	pxor	%xmm4,%xmm0
152	pxor	%xmm3,%xmm1
153
154
155	movdqa	%xmm0,%xmm4
156	psrlq	$1,%xmm0
157	pxor	%xmm4,%xmm1
158	pxor	%xmm0,%xmm4
159	psrlq	$5,%xmm0
160	pxor	%xmm4,%xmm0
161	psrlq	$1,%xmm0
162	pxor	%xmm1,%xmm0
163	pshufd	$78,%xmm5,%xmm3
164	pshufd	$78,%xmm0,%xmm4
165	pxor	%xmm5,%xmm3
166	movdqu	%xmm5,48(%rdi)
167	pxor	%xmm0,%xmm4
168	movdqu	%xmm0,64(%rdi)
169.byte	102,15,58,15,227,8
170	movdqu	%xmm4,80(%rdi)
171	.byte	0xf3,0xc3
172.cfi_endproc
173.size	gcm_init_clmul,.-gcm_init_clmul
174.globl	gcm_gmult_clmul
175.hidden gcm_gmult_clmul
176.type	gcm_gmult_clmul,@function
177.align	16
178gcm_gmult_clmul:
179.cfi_startproc
180.L_gmult_clmul:
181	movdqu	(%rdi),%xmm0
182	movdqa	.Lbswap_mask(%rip),%xmm5
183	movdqu	(%rsi),%xmm2
184	movdqu	32(%rsi),%xmm4
185.byte	102,15,56,0,197
186	movdqa	%xmm0,%xmm1
187	pshufd	$78,%xmm0,%xmm3
188	pxor	%xmm0,%xmm3
189.byte	102,15,58,68,194,0
190.byte	102,15,58,68,202,17
191.byte	102,15,58,68,220,0
192	pxor	%xmm0,%xmm3
193	pxor	%xmm1,%xmm3
194
195	movdqa	%xmm3,%xmm4
196	psrldq	$8,%xmm3
197	pslldq	$8,%xmm4
198	pxor	%xmm3,%xmm1
199	pxor	%xmm4,%xmm0
200
201	movdqa	%xmm0,%xmm4
202	movdqa	%xmm0,%xmm3
203	psllq	$5,%xmm0
204	pxor	%xmm0,%xmm3
205	psllq	$1,%xmm0
206	pxor	%xmm3,%xmm0
207	psllq	$57,%xmm0
208	movdqa	%xmm0,%xmm3
209	pslldq	$8,%xmm0
210	psrldq	$8,%xmm3
211	pxor	%xmm4,%xmm0
212	pxor	%xmm3,%xmm1
213
214
215	movdqa	%xmm0,%xmm4
216	psrlq	$1,%xmm0
217	pxor	%xmm4,%xmm1
218	pxor	%xmm0,%xmm4
219	psrlq	$5,%xmm0
220	pxor	%xmm4,%xmm0
221	psrlq	$1,%xmm0
222	pxor	%xmm1,%xmm0
223.byte	102,15,56,0,197
224	movdqu	%xmm0,(%rdi)
225	.byte	0xf3,0xc3
226.cfi_endproc
227.size	gcm_gmult_clmul,.-gcm_gmult_clmul
228.globl	gcm_ghash_clmul
229.hidden gcm_ghash_clmul
230.type	gcm_ghash_clmul,@function
231.align	32
232gcm_ghash_clmul:
233.cfi_startproc
234.L_ghash_clmul:
235	movdqa	.Lbswap_mask(%rip),%xmm10
236
237	movdqu	(%rdi),%xmm0
238	movdqu	(%rsi),%xmm2
239	movdqu	32(%rsi),%xmm7
240.byte	102,65,15,56,0,194
241
242	subq	$0x10,%rcx
243	jz	.Lodd_tail
244
245	movdqu	16(%rsi),%xmm6
246	leaq	OPENSSL_ia32cap_P(%rip),%rax
247	movl	4(%rax),%eax
248	cmpq	$0x30,%rcx
249	jb	.Lskip4x
250
251	andl	$71303168,%eax
252	cmpl	$4194304,%eax
253	je	.Lskip4x
254
255	subq	$0x30,%rcx
256	movq	$0xA040608020C0E000,%rax
257	movdqu	48(%rsi),%xmm14
258	movdqu	64(%rsi),%xmm15
259
260
261
262
263	movdqu	48(%rdx),%xmm3
264	movdqu	32(%rdx),%xmm11
265.byte	102,65,15,56,0,218
266.byte	102,69,15,56,0,218
267	movdqa	%xmm3,%xmm5
268	pshufd	$78,%xmm3,%xmm4
269	pxor	%xmm3,%xmm4
270.byte	102,15,58,68,218,0
271.byte	102,15,58,68,234,17
272.byte	102,15,58,68,231,0
273
274	movdqa	%xmm11,%xmm13
275	pshufd	$78,%xmm11,%xmm12
276	pxor	%xmm11,%xmm12
277.byte	102,68,15,58,68,222,0
278.byte	102,68,15,58,68,238,17
279.byte	102,68,15,58,68,231,16
280	xorps	%xmm11,%xmm3
281	xorps	%xmm13,%xmm5
282	movups	80(%rsi),%xmm7
283	xorps	%xmm12,%xmm4
284
285	movdqu	16(%rdx),%xmm11
286	movdqu	0(%rdx),%xmm8
287.byte	102,69,15,56,0,218
288.byte	102,69,15,56,0,194
289	movdqa	%xmm11,%xmm13
290	pshufd	$78,%xmm11,%xmm12
291	pxor	%xmm8,%xmm0
292	pxor	%xmm11,%xmm12
293.byte	102,69,15,58,68,222,0
294	movdqa	%xmm0,%xmm1
295	pshufd	$78,%xmm0,%xmm8
296	pxor	%xmm0,%xmm8
297.byte	102,69,15,58,68,238,17
298.byte	102,68,15,58,68,231,0
299	xorps	%xmm11,%xmm3
300	xorps	%xmm13,%xmm5
301
302	leaq	64(%rdx),%rdx
303	subq	$0x40,%rcx
304	jc	.Ltail4x
305
306	jmp	.Lmod4_loop
307.align	32
308.Lmod4_loop:
309.byte	102,65,15,58,68,199,0
310	xorps	%xmm12,%xmm4
311	movdqu	48(%rdx),%xmm11
312.byte	102,69,15,56,0,218
313.byte	102,65,15,58,68,207,17
314	xorps	%xmm3,%xmm0
315	movdqu	32(%rdx),%xmm3
316	movdqa	%xmm11,%xmm13
317.byte	102,68,15,58,68,199,16
318	pshufd	$78,%xmm11,%xmm12
319	xorps	%xmm5,%xmm1
320	pxor	%xmm11,%xmm12
321.byte	102,65,15,56,0,218
322	movups	32(%rsi),%xmm7
323	xorps	%xmm4,%xmm8
324.byte	102,68,15,58,68,218,0
325	pshufd	$78,%xmm3,%xmm4
326
327	pxor	%xmm0,%xmm8
328	movdqa	%xmm3,%xmm5
329	pxor	%xmm1,%xmm8
330	pxor	%xmm3,%xmm4
331	movdqa	%xmm8,%xmm9
332.byte	102,68,15,58,68,234,17
333	pslldq	$8,%xmm8
334	psrldq	$8,%xmm9
335	pxor	%xmm8,%xmm0
336	movdqa	.L7_mask(%rip),%xmm8
337	pxor	%xmm9,%xmm1
338.byte	102,76,15,110,200
339
340	pand	%xmm0,%xmm8
341.byte	102,69,15,56,0,200
342	pxor	%xmm0,%xmm9
343.byte	102,68,15,58,68,231,0
344	psllq	$57,%xmm9
345	movdqa	%xmm9,%xmm8
346	pslldq	$8,%xmm9
347.byte	102,15,58,68,222,0
348	psrldq	$8,%xmm8
349	pxor	%xmm9,%xmm0
350	pxor	%xmm8,%xmm1
351	movdqu	0(%rdx),%xmm8
352
353	movdqa	%xmm0,%xmm9
354	psrlq	$1,%xmm0
355.byte	102,15,58,68,238,17
356	xorps	%xmm11,%xmm3
357	movdqu	16(%rdx),%xmm11
358.byte	102,69,15,56,0,218
359.byte	102,15,58,68,231,16
360	xorps	%xmm13,%xmm5
361	movups	80(%rsi),%xmm7
362.byte	102,69,15,56,0,194
363	pxor	%xmm9,%xmm1
364	pxor	%xmm0,%xmm9
365	psrlq	$5,%xmm0
366
367	movdqa	%xmm11,%xmm13
368	pxor	%xmm12,%xmm4
369	pshufd	$78,%xmm11,%xmm12
370	pxor	%xmm9,%xmm0
371	pxor	%xmm8,%xmm1
372	pxor	%xmm11,%xmm12
373.byte	102,69,15,58,68,222,0
374	psrlq	$1,%xmm0
375	pxor	%xmm1,%xmm0
376	movdqa	%xmm0,%xmm1
377.byte	102,69,15,58,68,238,17
378	xorps	%xmm11,%xmm3
379	pshufd	$78,%xmm0,%xmm8
380	pxor	%xmm0,%xmm8
381
382.byte	102,68,15,58,68,231,0
383	xorps	%xmm13,%xmm5
384
385	leaq	64(%rdx),%rdx
386	subq	$0x40,%rcx
387	jnc	.Lmod4_loop
388
389.Ltail4x:
390.byte	102,65,15,58,68,199,0
391.byte	102,65,15,58,68,207,17
392.byte	102,68,15,58,68,199,16
393	xorps	%xmm12,%xmm4
394	xorps	%xmm3,%xmm0
395	xorps	%xmm5,%xmm1
396	pxor	%xmm0,%xmm1
397	pxor	%xmm4,%xmm8
398
399	pxor	%xmm1,%xmm8
400	pxor	%xmm0,%xmm1
401
402	movdqa	%xmm8,%xmm9
403	psrldq	$8,%xmm8
404	pslldq	$8,%xmm9
405	pxor	%xmm8,%xmm1
406	pxor	%xmm9,%xmm0
407
408	movdqa	%xmm0,%xmm4
409	movdqa	%xmm0,%xmm3
410	psllq	$5,%xmm0
411	pxor	%xmm0,%xmm3
412	psllq	$1,%xmm0
413	pxor	%xmm3,%xmm0
414	psllq	$57,%xmm0
415	movdqa	%xmm0,%xmm3
416	pslldq	$8,%xmm0
417	psrldq	$8,%xmm3
418	pxor	%xmm4,%xmm0
419	pxor	%xmm3,%xmm1
420
421
422	movdqa	%xmm0,%xmm4
423	psrlq	$1,%xmm0
424	pxor	%xmm4,%xmm1
425	pxor	%xmm0,%xmm4
426	psrlq	$5,%xmm0
427	pxor	%xmm4,%xmm0
428	psrlq	$1,%xmm0
429	pxor	%xmm1,%xmm0
430	addq	$0x40,%rcx
431	jz	.Ldone
432	movdqu	32(%rsi),%xmm7
433	subq	$0x10,%rcx
434	jz	.Lodd_tail
435.Lskip4x:
436
437
438
439
440
441	movdqu	(%rdx),%xmm8
442	movdqu	16(%rdx),%xmm3
443.byte	102,69,15,56,0,194
444.byte	102,65,15,56,0,218
445	pxor	%xmm8,%xmm0
446
447	movdqa	%xmm3,%xmm5
448	pshufd	$78,%xmm3,%xmm4
449	pxor	%xmm3,%xmm4
450.byte	102,15,58,68,218,0
451.byte	102,15,58,68,234,17
452.byte	102,15,58,68,231,0
453
454	leaq	32(%rdx),%rdx
455	nop
456	subq	$0x20,%rcx
457	jbe	.Leven_tail
458	nop
459	jmp	.Lmod_loop
460
461.align	32
462.Lmod_loop:
463	movdqa	%xmm0,%xmm1
464	movdqa	%xmm4,%xmm8
465	pshufd	$78,%xmm0,%xmm4
466	pxor	%xmm0,%xmm4
467
468.byte	102,15,58,68,198,0
469.byte	102,15,58,68,206,17
470.byte	102,15,58,68,231,16
471
472	pxor	%xmm3,%xmm0
473	pxor	%xmm5,%xmm1
474	movdqu	(%rdx),%xmm9
475	pxor	%xmm0,%xmm8
476.byte	102,69,15,56,0,202
477	movdqu	16(%rdx),%xmm3
478
479	pxor	%xmm1,%xmm8
480	pxor	%xmm9,%xmm1
481	pxor	%xmm8,%xmm4
482.byte	102,65,15,56,0,218
483	movdqa	%xmm4,%xmm8
484	psrldq	$8,%xmm8
485	pslldq	$8,%xmm4
486	pxor	%xmm8,%xmm1
487	pxor	%xmm4,%xmm0
488
489	movdqa	%xmm3,%xmm5
490
491	movdqa	%xmm0,%xmm9
492	movdqa	%xmm0,%xmm8
493	psllq	$5,%xmm0
494	pxor	%xmm0,%xmm8
495.byte	102,15,58,68,218,0
496	psllq	$1,%xmm0
497	pxor	%xmm8,%xmm0
498	psllq	$57,%xmm0
499	movdqa	%xmm0,%xmm8
500	pslldq	$8,%xmm0
501	psrldq	$8,%xmm8
502	pxor	%xmm9,%xmm0
503	pshufd	$78,%xmm5,%xmm4
504	pxor	%xmm8,%xmm1
505	pxor	%xmm5,%xmm4
506
507	movdqa	%xmm0,%xmm9
508	psrlq	$1,%xmm0
509.byte	102,15,58,68,234,17
510	pxor	%xmm9,%xmm1
511	pxor	%xmm0,%xmm9
512	psrlq	$5,%xmm0
513	pxor	%xmm9,%xmm0
514	leaq	32(%rdx),%rdx
515	psrlq	$1,%xmm0
516.byte	102,15,58,68,231,0
517	pxor	%xmm1,%xmm0
518
519	subq	$0x20,%rcx
520	ja	.Lmod_loop
521
522.Leven_tail:
523	movdqa	%xmm0,%xmm1
524	movdqa	%xmm4,%xmm8
525	pshufd	$78,%xmm0,%xmm4
526	pxor	%xmm0,%xmm4
527
528.byte	102,15,58,68,198,0
529.byte	102,15,58,68,206,17
530.byte	102,15,58,68,231,16
531
532	pxor	%xmm3,%xmm0
533	pxor	%xmm5,%xmm1
534	pxor	%xmm0,%xmm8
535	pxor	%xmm1,%xmm8
536	pxor	%xmm8,%xmm4
537	movdqa	%xmm4,%xmm8
538	psrldq	$8,%xmm8
539	pslldq	$8,%xmm4
540	pxor	%xmm8,%xmm1
541	pxor	%xmm4,%xmm0
542
543	movdqa	%xmm0,%xmm4
544	movdqa	%xmm0,%xmm3
545	psllq	$5,%xmm0
546	pxor	%xmm0,%xmm3
547	psllq	$1,%xmm0
548	pxor	%xmm3,%xmm0
549	psllq	$57,%xmm0
550	movdqa	%xmm0,%xmm3
551	pslldq	$8,%xmm0
552	psrldq	$8,%xmm3
553	pxor	%xmm4,%xmm0
554	pxor	%xmm3,%xmm1
555
556
557	movdqa	%xmm0,%xmm4
558	psrlq	$1,%xmm0
559	pxor	%xmm4,%xmm1
560	pxor	%xmm0,%xmm4
561	psrlq	$5,%xmm0
562	pxor	%xmm4,%xmm0
563	psrlq	$1,%xmm0
564	pxor	%xmm1,%xmm0
565	testq	%rcx,%rcx
566	jnz	.Ldone
567
568.Lodd_tail:
569	movdqu	(%rdx),%xmm8
570.byte	102,69,15,56,0,194
571	pxor	%xmm8,%xmm0
572	movdqa	%xmm0,%xmm1
573	pshufd	$78,%xmm0,%xmm3
574	pxor	%xmm0,%xmm3
575.byte	102,15,58,68,194,0
576.byte	102,15,58,68,202,17
577.byte	102,15,58,68,223,0
578	pxor	%xmm0,%xmm3
579	pxor	%xmm1,%xmm3
580
581	movdqa	%xmm3,%xmm4
582	psrldq	$8,%xmm3
583	pslldq	$8,%xmm4
584	pxor	%xmm3,%xmm1
585	pxor	%xmm4,%xmm0
586
587	movdqa	%xmm0,%xmm4
588	movdqa	%xmm0,%xmm3
589	psllq	$5,%xmm0
590	pxor	%xmm0,%xmm3
591	psllq	$1,%xmm0
592	pxor	%xmm3,%xmm0
593	psllq	$57,%xmm0
594	movdqa	%xmm0,%xmm3
595	pslldq	$8,%xmm0
596	psrldq	$8,%xmm3
597	pxor	%xmm4,%xmm0
598	pxor	%xmm3,%xmm1
599
600
601	movdqa	%xmm0,%xmm4
602	psrlq	$1,%xmm0
603	pxor	%xmm4,%xmm1
604	pxor	%xmm0,%xmm4
605	psrlq	$5,%xmm0
606	pxor	%xmm4,%xmm0
607	psrlq	$1,%xmm0
608	pxor	%xmm1,%xmm0
609.Ldone:
610.byte	102,65,15,56,0,194
611	movdqu	%xmm0,(%rdi)
612	.byte	0xf3,0xc3
613.cfi_endproc
614.size	gcm_ghash_clmul,.-gcm_ghash_clmul
615.globl	gcm_init_avx
616.hidden gcm_init_avx
617.type	gcm_init_avx,@function
618.align	32
619gcm_init_avx:
620.cfi_startproc
621	vzeroupper
622
623	vmovdqu	(%rsi),%xmm2
624	vpshufd	$78,%xmm2,%xmm2
625
626
627	vpshufd	$255,%xmm2,%xmm4
628	vpsrlq	$63,%xmm2,%xmm3
629	vpsllq	$1,%xmm2,%xmm2
630	vpxor	%xmm5,%xmm5,%xmm5
631	vpcmpgtd	%xmm4,%xmm5,%xmm5
632	vpslldq	$8,%xmm3,%xmm3
633	vpor	%xmm3,%xmm2,%xmm2
634
635
636	vpand	.L0x1c2_polynomial(%rip),%xmm5,%xmm5
637	vpxor	%xmm5,%xmm2,%xmm2
638
639	vpunpckhqdq	%xmm2,%xmm2,%xmm6
640	vmovdqa	%xmm2,%xmm0
641	vpxor	%xmm2,%xmm6,%xmm6
642	movq	$4,%r10
643	jmp	.Linit_start_avx
644.align	32
645.Linit_loop_avx:
646	vpalignr	$8,%xmm3,%xmm4,%xmm5
647	vmovdqu	%xmm5,-16(%rdi)
648	vpunpckhqdq	%xmm0,%xmm0,%xmm3
649	vpxor	%xmm0,%xmm3,%xmm3
650	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
651	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
652	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
653	vpxor	%xmm0,%xmm1,%xmm4
654	vpxor	%xmm4,%xmm3,%xmm3
655
656	vpslldq	$8,%xmm3,%xmm4
657	vpsrldq	$8,%xmm3,%xmm3
658	vpxor	%xmm4,%xmm0,%xmm0
659	vpxor	%xmm3,%xmm1,%xmm1
660	vpsllq	$57,%xmm0,%xmm3
661	vpsllq	$62,%xmm0,%xmm4
662	vpxor	%xmm3,%xmm4,%xmm4
663	vpsllq	$63,%xmm0,%xmm3
664	vpxor	%xmm3,%xmm4,%xmm4
665	vpslldq	$8,%xmm4,%xmm3
666	vpsrldq	$8,%xmm4,%xmm4
667	vpxor	%xmm3,%xmm0,%xmm0
668	vpxor	%xmm4,%xmm1,%xmm1
669
670	vpsrlq	$1,%xmm0,%xmm4
671	vpxor	%xmm0,%xmm1,%xmm1
672	vpxor	%xmm4,%xmm0,%xmm0
673	vpsrlq	$5,%xmm4,%xmm4
674	vpxor	%xmm4,%xmm0,%xmm0
675	vpsrlq	$1,%xmm0,%xmm0
676	vpxor	%xmm1,%xmm0,%xmm0
677.Linit_start_avx:
678	vmovdqa	%xmm0,%xmm5
679	vpunpckhqdq	%xmm0,%xmm0,%xmm3
680	vpxor	%xmm0,%xmm3,%xmm3
681	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm1
682	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm0
683	vpclmulqdq	$0x00,%xmm6,%xmm3,%xmm3
684	vpxor	%xmm0,%xmm1,%xmm4
685	vpxor	%xmm4,%xmm3,%xmm3
686
687	vpslldq	$8,%xmm3,%xmm4
688	vpsrldq	$8,%xmm3,%xmm3
689	vpxor	%xmm4,%xmm0,%xmm0
690	vpxor	%xmm3,%xmm1,%xmm1
691	vpsllq	$57,%xmm0,%xmm3
692	vpsllq	$62,%xmm0,%xmm4
693	vpxor	%xmm3,%xmm4,%xmm4
694	vpsllq	$63,%xmm0,%xmm3
695	vpxor	%xmm3,%xmm4,%xmm4
696	vpslldq	$8,%xmm4,%xmm3
697	vpsrldq	$8,%xmm4,%xmm4
698	vpxor	%xmm3,%xmm0,%xmm0
699	vpxor	%xmm4,%xmm1,%xmm1
700
701	vpsrlq	$1,%xmm0,%xmm4
702	vpxor	%xmm0,%xmm1,%xmm1
703	vpxor	%xmm4,%xmm0,%xmm0
704	vpsrlq	$5,%xmm4,%xmm4
705	vpxor	%xmm4,%xmm0,%xmm0
706	vpsrlq	$1,%xmm0,%xmm0
707	vpxor	%xmm1,%xmm0,%xmm0
708	vpshufd	$78,%xmm5,%xmm3
709	vpshufd	$78,%xmm0,%xmm4
710	vpxor	%xmm5,%xmm3,%xmm3
711	vmovdqu	%xmm5,0(%rdi)
712	vpxor	%xmm0,%xmm4,%xmm4
713	vmovdqu	%xmm0,16(%rdi)
714	leaq	48(%rdi),%rdi
715	subq	$1,%r10
716	jnz	.Linit_loop_avx
717
718	vpalignr	$8,%xmm4,%xmm3,%xmm5
719	vmovdqu	%xmm5,-16(%rdi)
720
721	vzeroupper
722	.byte	0xf3,0xc3
723.cfi_endproc
724.size	gcm_init_avx,.-gcm_init_avx
725.globl	gcm_ghash_avx
726.hidden gcm_ghash_avx
727.type	gcm_ghash_avx,@function
728.align	32
729gcm_ghash_avx:
730.cfi_startproc
731	vzeroupper
732
733	vmovdqu	(%rdi),%xmm10
734	leaq	.L0x1c2_polynomial(%rip),%r10
735	leaq	64(%rsi),%rsi
736	vmovdqu	.Lbswap_mask(%rip),%xmm13
737	vpshufb	%xmm13,%xmm10,%xmm10
738	cmpq	$0x80,%rcx
739	jb	.Lshort_avx
740	subq	$0x80,%rcx
741
742	vmovdqu	112(%rdx),%xmm14
743	vmovdqu	0-64(%rsi),%xmm6
744	vpshufb	%xmm13,%xmm14,%xmm14
745	vmovdqu	32-64(%rsi),%xmm7
746
747	vpunpckhqdq	%xmm14,%xmm14,%xmm9
748	vmovdqu	96(%rdx),%xmm15
749	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
750	vpxor	%xmm14,%xmm9,%xmm9
751	vpshufb	%xmm13,%xmm15,%xmm15
752	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
753	vmovdqu	16-64(%rsi),%xmm6
754	vpunpckhqdq	%xmm15,%xmm15,%xmm8
755	vmovdqu	80(%rdx),%xmm14
756	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
757	vpxor	%xmm15,%xmm8,%xmm8
758
759	vpshufb	%xmm13,%xmm14,%xmm14
760	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
761	vpunpckhqdq	%xmm14,%xmm14,%xmm9
762	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
763	vmovdqu	48-64(%rsi),%xmm6
764	vpxor	%xmm14,%xmm9,%xmm9
765	vmovdqu	64(%rdx),%xmm15
766	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
767	vmovdqu	80-64(%rsi),%xmm7
768
769	vpshufb	%xmm13,%xmm15,%xmm15
770	vpxor	%xmm0,%xmm3,%xmm3
771	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
772	vpxor	%xmm1,%xmm4,%xmm4
773	vpunpckhqdq	%xmm15,%xmm15,%xmm8
774	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
775	vmovdqu	64-64(%rsi),%xmm6
776	vpxor	%xmm2,%xmm5,%xmm5
777	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
778	vpxor	%xmm15,%xmm8,%xmm8
779
780	vmovdqu	48(%rdx),%xmm14
781	vpxor	%xmm3,%xmm0,%xmm0
782	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
783	vpxor	%xmm4,%xmm1,%xmm1
784	vpshufb	%xmm13,%xmm14,%xmm14
785	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
786	vmovdqu	96-64(%rsi),%xmm6
787	vpxor	%xmm5,%xmm2,%xmm2
788	vpunpckhqdq	%xmm14,%xmm14,%xmm9
789	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
790	vmovdqu	128-64(%rsi),%xmm7
791	vpxor	%xmm14,%xmm9,%xmm9
792
793	vmovdqu	32(%rdx),%xmm15
794	vpxor	%xmm0,%xmm3,%xmm3
795	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
796	vpxor	%xmm1,%xmm4,%xmm4
797	vpshufb	%xmm13,%xmm15,%xmm15
798	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
799	vmovdqu	112-64(%rsi),%xmm6
800	vpxor	%xmm2,%xmm5,%xmm5
801	vpunpckhqdq	%xmm15,%xmm15,%xmm8
802	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
803	vpxor	%xmm15,%xmm8,%xmm8
804
805	vmovdqu	16(%rdx),%xmm14
806	vpxor	%xmm3,%xmm0,%xmm0
807	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
808	vpxor	%xmm4,%xmm1,%xmm1
809	vpshufb	%xmm13,%xmm14,%xmm14
810	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
811	vmovdqu	144-64(%rsi),%xmm6
812	vpxor	%xmm5,%xmm2,%xmm2
813	vpunpckhqdq	%xmm14,%xmm14,%xmm9
814	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
815	vmovdqu	176-64(%rsi),%xmm7
816	vpxor	%xmm14,%xmm9,%xmm9
817
818	vmovdqu	(%rdx),%xmm15
819	vpxor	%xmm0,%xmm3,%xmm3
820	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
821	vpxor	%xmm1,%xmm4,%xmm4
822	vpshufb	%xmm13,%xmm15,%xmm15
823	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
824	vmovdqu	160-64(%rsi),%xmm6
825	vpxor	%xmm2,%xmm5,%xmm5
826	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
827
828	leaq	128(%rdx),%rdx
829	cmpq	$0x80,%rcx
830	jb	.Ltail_avx
831
832	vpxor	%xmm10,%xmm15,%xmm15
833	subq	$0x80,%rcx
834	jmp	.Loop8x_avx
835
836.align	32
837.Loop8x_avx:
838	vpunpckhqdq	%xmm15,%xmm15,%xmm8
839	vmovdqu	112(%rdx),%xmm14
840	vpxor	%xmm0,%xmm3,%xmm3
841	vpxor	%xmm15,%xmm8,%xmm8
842	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm10
843	vpshufb	%xmm13,%xmm14,%xmm14
844	vpxor	%xmm1,%xmm4,%xmm4
845	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm11
846	vmovdqu	0-64(%rsi),%xmm6
847	vpunpckhqdq	%xmm14,%xmm14,%xmm9
848	vpxor	%xmm2,%xmm5,%xmm5
849	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm12
850	vmovdqu	32-64(%rsi),%xmm7
851	vpxor	%xmm14,%xmm9,%xmm9
852
853	vmovdqu	96(%rdx),%xmm15
854	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
855	vpxor	%xmm3,%xmm10,%xmm10
856	vpshufb	%xmm13,%xmm15,%xmm15
857	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
858	vxorps	%xmm4,%xmm11,%xmm11
859	vmovdqu	16-64(%rsi),%xmm6
860	vpunpckhqdq	%xmm15,%xmm15,%xmm8
861	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
862	vpxor	%xmm5,%xmm12,%xmm12
863	vxorps	%xmm15,%xmm8,%xmm8
864
865	vmovdqu	80(%rdx),%xmm14
866	vpxor	%xmm10,%xmm12,%xmm12
867	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
868	vpxor	%xmm11,%xmm12,%xmm12
869	vpslldq	$8,%xmm12,%xmm9
870	vpxor	%xmm0,%xmm3,%xmm3
871	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
872	vpsrldq	$8,%xmm12,%xmm12
873	vpxor	%xmm9,%xmm10,%xmm10
874	vmovdqu	48-64(%rsi),%xmm6
875	vpshufb	%xmm13,%xmm14,%xmm14
876	vxorps	%xmm12,%xmm11,%xmm11
877	vpxor	%xmm1,%xmm4,%xmm4
878	vpunpckhqdq	%xmm14,%xmm14,%xmm9
879	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
880	vmovdqu	80-64(%rsi),%xmm7
881	vpxor	%xmm14,%xmm9,%xmm9
882	vpxor	%xmm2,%xmm5,%xmm5
883
884	vmovdqu	64(%rdx),%xmm15
885	vpalignr	$8,%xmm10,%xmm10,%xmm12
886	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
887	vpshufb	%xmm13,%xmm15,%xmm15
888	vpxor	%xmm3,%xmm0,%xmm0
889	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
890	vmovdqu	64-64(%rsi),%xmm6
891	vpunpckhqdq	%xmm15,%xmm15,%xmm8
892	vpxor	%xmm4,%xmm1,%xmm1
893	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
894	vxorps	%xmm15,%xmm8,%xmm8
895	vpxor	%xmm5,%xmm2,%xmm2
896
897	vmovdqu	48(%rdx),%xmm14
898	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
899	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
900	vpshufb	%xmm13,%xmm14,%xmm14
901	vpxor	%xmm0,%xmm3,%xmm3
902	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
903	vmovdqu	96-64(%rsi),%xmm6
904	vpunpckhqdq	%xmm14,%xmm14,%xmm9
905	vpxor	%xmm1,%xmm4,%xmm4
906	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
907	vmovdqu	128-64(%rsi),%xmm7
908	vpxor	%xmm14,%xmm9,%xmm9
909	vpxor	%xmm2,%xmm5,%xmm5
910
911	vmovdqu	32(%rdx),%xmm15
912	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
913	vpshufb	%xmm13,%xmm15,%xmm15
914	vpxor	%xmm3,%xmm0,%xmm0
915	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
916	vmovdqu	112-64(%rsi),%xmm6
917	vpunpckhqdq	%xmm15,%xmm15,%xmm8
918	vpxor	%xmm4,%xmm1,%xmm1
919	vpclmulqdq	$0x00,%xmm7,%xmm9,%xmm2
920	vpxor	%xmm15,%xmm8,%xmm8
921	vpxor	%xmm5,%xmm2,%xmm2
922	vxorps	%xmm12,%xmm10,%xmm10
923
924	vmovdqu	16(%rdx),%xmm14
925	vpalignr	$8,%xmm10,%xmm10,%xmm12
926	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm3
927	vpshufb	%xmm13,%xmm14,%xmm14
928	vpxor	%xmm0,%xmm3,%xmm3
929	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm4
930	vmovdqu	144-64(%rsi),%xmm6
931	vpclmulqdq	$0x10,(%r10),%xmm10,%xmm10
932	vxorps	%xmm11,%xmm12,%xmm12
933	vpunpckhqdq	%xmm14,%xmm14,%xmm9
934	vpxor	%xmm1,%xmm4,%xmm4
935	vpclmulqdq	$0x10,%xmm7,%xmm8,%xmm5
936	vmovdqu	176-64(%rsi),%xmm7
937	vpxor	%xmm14,%xmm9,%xmm9
938	vpxor	%xmm2,%xmm5,%xmm5
939
940	vmovdqu	(%rdx),%xmm15
941	vpclmulqdq	$0x00,%xmm6,%xmm14,%xmm0
942	vpshufb	%xmm13,%xmm15,%xmm15
943	vpclmulqdq	$0x11,%xmm6,%xmm14,%xmm1
944	vmovdqu	160-64(%rsi),%xmm6
945	vpxor	%xmm12,%xmm15,%xmm15
946	vpclmulqdq	$0x10,%xmm7,%xmm9,%xmm2
947	vpxor	%xmm10,%xmm15,%xmm15
948
949	leaq	128(%rdx),%rdx
950	subq	$0x80,%rcx
951	jnc	.Loop8x_avx
952
953	addq	$0x80,%rcx
954	jmp	.Ltail_no_xor_avx
955
956.align	32
957.Lshort_avx:
958	vmovdqu	-16(%rdx,%rcx,1),%xmm14
959	leaq	(%rdx,%rcx,1),%rdx
960	vmovdqu	0-64(%rsi),%xmm6
961	vmovdqu	32-64(%rsi),%xmm7
962	vpshufb	%xmm13,%xmm14,%xmm15
963
964	vmovdqa	%xmm0,%xmm3
965	vmovdqa	%xmm1,%xmm4
966	vmovdqa	%xmm2,%xmm5
967	subq	$0x10,%rcx
968	jz	.Ltail_avx
969
970	vpunpckhqdq	%xmm15,%xmm15,%xmm8
971	vpxor	%xmm0,%xmm3,%xmm3
972	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
973	vpxor	%xmm15,%xmm8,%xmm8
974	vmovdqu	-32(%rdx),%xmm14
975	vpxor	%xmm1,%xmm4,%xmm4
976	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
977	vmovdqu	16-64(%rsi),%xmm6
978	vpshufb	%xmm13,%xmm14,%xmm15
979	vpxor	%xmm2,%xmm5,%xmm5
980	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
981	vpsrldq	$8,%xmm7,%xmm7
982	subq	$0x10,%rcx
983	jz	.Ltail_avx
984
985	vpunpckhqdq	%xmm15,%xmm15,%xmm8
986	vpxor	%xmm0,%xmm3,%xmm3
987	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
988	vpxor	%xmm15,%xmm8,%xmm8
989	vmovdqu	-48(%rdx),%xmm14
990	vpxor	%xmm1,%xmm4,%xmm4
991	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
992	vmovdqu	48-64(%rsi),%xmm6
993	vpshufb	%xmm13,%xmm14,%xmm15
994	vpxor	%xmm2,%xmm5,%xmm5
995	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
996	vmovdqu	80-64(%rsi),%xmm7
997	subq	$0x10,%rcx
998	jz	.Ltail_avx
999
1000	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1001	vpxor	%xmm0,%xmm3,%xmm3
1002	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1003	vpxor	%xmm15,%xmm8,%xmm8
1004	vmovdqu	-64(%rdx),%xmm14
1005	vpxor	%xmm1,%xmm4,%xmm4
1006	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1007	vmovdqu	64-64(%rsi),%xmm6
1008	vpshufb	%xmm13,%xmm14,%xmm15
1009	vpxor	%xmm2,%xmm5,%xmm5
1010	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1011	vpsrldq	$8,%xmm7,%xmm7
1012	subq	$0x10,%rcx
1013	jz	.Ltail_avx
1014
1015	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1016	vpxor	%xmm0,%xmm3,%xmm3
1017	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1018	vpxor	%xmm15,%xmm8,%xmm8
1019	vmovdqu	-80(%rdx),%xmm14
1020	vpxor	%xmm1,%xmm4,%xmm4
1021	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1022	vmovdqu	96-64(%rsi),%xmm6
1023	vpshufb	%xmm13,%xmm14,%xmm15
1024	vpxor	%xmm2,%xmm5,%xmm5
1025	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1026	vmovdqu	128-64(%rsi),%xmm7
1027	subq	$0x10,%rcx
1028	jz	.Ltail_avx
1029
1030	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1031	vpxor	%xmm0,%xmm3,%xmm3
1032	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1033	vpxor	%xmm15,%xmm8,%xmm8
1034	vmovdqu	-96(%rdx),%xmm14
1035	vpxor	%xmm1,%xmm4,%xmm4
1036	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1037	vmovdqu	112-64(%rsi),%xmm6
1038	vpshufb	%xmm13,%xmm14,%xmm15
1039	vpxor	%xmm2,%xmm5,%xmm5
1040	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1041	vpsrldq	$8,%xmm7,%xmm7
1042	subq	$0x10,%rcx
1043	jz	.Ltail_avx
1044
1045	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1046	vpxor	%xmm0,%xmm3,%xmm3
1047	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1048	vpxor	%xmm15,%xmm8,%xmm8
1049	vmovdqu	-112(%rdx),%xmm14
1050	vpxor	%xmm1,%xmm4,%xmm4
1051	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1052	vmovdqu	144-64(%rsi),%xmm6
1053	vpshufb	%xmm13,%xmm14,%xmm15
1054	vpxor	%xmm2,%xmm5,%xmm5
1055	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1056	vmovq	184-64(%rsi),%xmm7
1057	subq	$0x10,%rcx
1058	jmp	.Ltail_avx
1059
1060.align	32
1061.Ltail_avx:
1062	vpxor	%xmm10,%xmm15,%xmm15
1063.Ltail_no_xor_avx:
1064	vpunpckhqdq	%xmm15,%xmm15,%xmm8
1065	vpxor	%xmm0,%xmm3,%xmm3
1066	vpclmulqdq	$0x00,%xmm6,%xmm15,%xmm0
1067	vpxor	%xmm15,%xmm8,%xmm8
1068	vpxor	%xmm1,%xmm4,%xmm4
1069	vpclmulqdq	$0x11,%xmm6,%xmm15,%xmm1
1070	vpxor	%xmm2,%xmm5,%xmm5
1071	vpclmulqdq	$0x00,%xmm7,%xmm8,%xmm2
1072
1073	vmovdqu	(%r10),%xmm12
1074
1075	vpxor	%xmm0,%xmm3,%xmm10
1076	vpxor	%xmm1,%xmm4,%xmm11
1077	vpxor	%xmm2,%xmm5,%xmm5
1078
1079	vpxor	%xmm10,%xmm5,%xmm5
1080	vpxor	%xmm11,%xmm5,%xmm5
1081	vpslldq	$8,%xmm5,%xmm9
1082	vpsrldq	$8,%xmm5,%xmm5
1083	vpxor	%xmm9,%xmm10,%xmm10
1084	vpxor	%xmm5,%xmm11,%xmm11
1085
1086	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
1087	vpalignr	$8,%xmm10,%xmm10,%xmm10
1088	vpxor	%xmm9,%xmm10,%xmm10
1089
1090	vpclmulqdq	$0x10,%xmm12,%xmm10,%xmm9
1091	vpalignr	$8,%xmm10,%xmm10,%xmm10
1092	vpxor	%xmm11,%xmm10,%xmm10
1093	vpxor	%xmm9,%xmm10,%xmm10
1094
1095	cmpq	$0,%rcx
1096	jne	.Lshort_avx
1097
1098	vpshufb	%xmm13,%xmm10,%xmm10
1099	vmovdqu	%xmm10,(%rdi)
1100	vzeroupper
1101	.byte	0xf3,0xc3
1102.cfi_endproc
1103.size	gcm_ghash_avx,.-gcm_ghash_avx
1104.align	64
1105.Lbswap_mask:
1106.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1107.L0x1c2_polynomial:
1108.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1109.L7_mask:
1110.long	7,0,7,0
1111.align	64
1112
1113.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1114.align	64
1115#endif
1116.section	.note.GNU-stack,"",@progbits
1117