• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1	.text
2	.file	"matmul.c"
3	.section	.rodata.cst8,"aM",@progbits,8
4	.p2align	3               # -- Begin function init_array
5.LCPI0_0:
6	.quad	4602678819172646912     # double 0.5
7	.text
8	.globl	init_array
9	.p2align	4, 0x90
10	.type	init_array,@function
11init_array:                             # @init_array
12	.cfi_startproc
13# %bb.0:                                # %entry
14	pushq	%rbp
15	.cfi_def_cfa_offset 16
16	.cfi_offset %rbp, -16
17	movq	%rsp, %rbp
18	.cfi_def_cfa_register %rbp
19	leaq	B(%rip), %rax
20	leaq	A(%rip), %rcx
21	xorl	%r8d, %r8d
22	movsd	.LCPI0_0(%rip), %xmm0   # xmm0 = mem[0],zero
23	xorl	%r9d, %r9d
24	.p2align	4, 0x90
25.LBB0_1:                                # %polly.loop_header
26                                        # =>This Loop Header: Depth=1
27                                        #     Child Loop BB0_2 Depth 2
28	movl	$1, %edi
29	xorl	%edx, %edx
30	.p2align	4, 0x90
31.LBB0_2:                                # %polly.loop_header1
32                                        #   Parent Loop BB0_1 Depth=1
33                                        # =>  This Inner Loop Header: Depth=2
34	movl	%edx, %esi
35	andl	$1022, %esi             # imm = 0x3FE
36	orl	$1, %esi
37	xorps	%xmm1, %xmm1
38	cvtsi2sdl	%esi, %xmm1
39	mulsd	%xmm0, %xmm1
40	cvtsd2ss	%xmm1, %xmm1
41	movss	%xmm1, -4(%rcx,%rdi,4)
42	movss	%xmm1, -4(%rax,%rdi,4)
43	leal	(%r9,%rdx), %esi
44	andl	$1023, %esi             # imm = 0x3FF
45	addl	$1, %esi
46	xorps	%xmm1, %xmm1
47	cvtsi2sdl	%esi, %xmm1
48	mulsd	%xmm0, %xmm1
49	cvtsd2ss	%xmm1, %xmm1
50	movss	%xmm1, (%rcx,%rdi,4)
51	movss	%xmm1, (%rax,%rdi,4)
52	addq	$2, %rdi
53	addl	%r8d, %edx
54	cmpq	$1537, %rdi             # imm = 0x601
55	jne	.LBB0_2
56# %bb.3:                                # %polly.loop_exit3
57                                        #   in Loop: Header=BB0_1 Depth=1
58	addq	$1, %r9
59	addq	$6144, %rax             # imm = 0x1800
60	addq	$6144, %rcx             # imm = 0x1800
61	addl	$2, %r8d
62	cmpq	$1536, %r9              # imm = 0x600
63	jne	.LBB0_1
64# %bb.4:                                # %polly.exiting
65	popq	%rbp
66	.cfi_def_cfa %rsp, 8
67	retq
68.Lfunc_end0:
69	.size	init_array, .Lfunc_end0-init_array
70	.cfi_endproc
71                                        # -- End function
72	.globl	print_array             # -- Begin function print_array
73	.p2align	4, 0x90
74	.type	print_array,@function
75print_array:                            # @print_array
76	.cfi_startproc
77# %bb.0:                                # %entry
78	pushq	%rbp
79	.cfi_def_cfa_offset 16
80	.cfi_offset %rbp, -16
81	movq	%rsp, %rbp
82	.cfi_def_cfa_register %rbp
83	pushq	%r15
84	pushq	%r14
85	pushq	%r13
86	pushq	%r12
87	pushq	%rbx
88	pushq	%rax
89	.cfi_offset %rbx, -56
90	.cfi_offset %r12, -48
91	.cfi_offset %r13, -40
92	.cfi_offset %r14, -32
93	.cfi_offset %r15, -24
94	leaq	C(%rip), %r13
95	xorl	%eax, %eax
96	movl	$3435973837, %r12d      # imm = 0xCCCCCCCD
97	leaq	.L.str(%rip), %r14
98	.p2align	4, 0x90
99.LBB1_1:                                # %for.cond1.preheader
100                                        # =>This Loop Header: Depth=1
101                                        #     Child Loop BB1_2 Depth 2
102	movq	%rax, -48(%rbp)         # 8-byte Spill
103	movq	stdout(%rip), %rsi
104	xorl	%ebx, %ebx
105	.p2align	4, 0x90
106.LBB1_2:                                # %for.body3
107                                        #   Parent Loop BB1_1 Depth=1
108                                        # =>  This Inner Loop Header: Depth=2
109	movl	%ebx, %eax
110	imulq	%r12, %rax
111	shrq	$38, %rax
112	leal	(%rax,%rax,4), %r15d
113	shll	$4, %r15d
114	addl	$79, %r15d
115	movss	(%r13,%rbx,4), %xmm0    # xmm0 = mem[0],zero,zero,zero
116	cvtss2sd	%xmm0, %xmm0
117	movb	$1, %al
118	movq	%rsi, %rdi
119	movq	%r14, %rsi
120	callq	fprintf
121	cmpl	%ebx, %r15d
122	jne	.LBB1_4
123# %bb.3:                                # %if.then
124                                        #   in Loop: Header=BB1_2 Depth=2
125	movq	stdout(%rip), %rsi
126	movl	$10, %edi
127	callq	fputc@PLT
128.LBB1_4:                                # %for.inc
129                                        #   in Loop: Header=BB1_2 Depth=2
130	addq	$1, %rbx
131	movq	stdout(%rip), %rsi
132	cmpq	$1536, %rbx             # imm = 0x600
133	jne	.LBB1_2
134# %bb.5:                                # %for.end
135                                        #   in Loop: Header=BB1_1 Depth=1
136	movl	$10, %edi
137	callq	fputc@PLT
138	movq	-48(%rbp), %rax         # 8-byte Reload
139	addq	$1, %rax
140	addq	$6144, %r13             # imm = 0x1800
141	cmpq	$1536, %rax             # imm = 0x600
142	jne	.LBB1_1
143# %bb.6:                                # %for.end12
144	addq	$8, %rsp
145	popq	%rbx
146	popq	%r12
147	popq	%r13
148	popq	%r14
149	popq	%r15
150	popq	%rbp
151	.cfi_def_cfa %rsp, 8
152	retq
153.Lfunc_end1:
154	.size	print_array, .Lfunc_end1-print_array
155	.cfi_endproc
156                                        # -- End function
157	.globl	main                    # -- Begin function main
158	.p2align	4, 0x90
159	.type	main,@function
160main:                                   # @main
161	.cfi_startproc
162# %bb.0:                                # %entry
163	pushq	%rbp
164	.cfi_def_cfa_offset 16
165	.cfi_offset %rbp, -16
166	movq	%rsp, %rbp
167	.cfi_def_cfa_register %rbp
168	pushq	%r15
169	pushq	%r14
170	pushq	%r13
171	pushq	%r12
172	pushq	%rbx
173	subq	$264, %rsp              # imm = 0x108
174	.cfi_offset %rbx, -56
175	.cfi_offset %r12, -48
176	.cfi_offset %r13, -40
177	.cfi_offset %r14, -32
178	.cfi_offset %r15, -24
179	callq	init_array
180	leaq	C(%rip), %rdi
181	xorl	%eax, %eax
182	movq	%rax, -48(%rbp)         # 8-byte Spill
183	xorl	%esi, %esi
184	movl	$9437184, %edx          # imm = 0x900000
185	callq	memset@PLT
186	movl	$64, %eax
187	movq	%rax, -80(%rbp)         # 8-byte Spill
188	leaq	A(%rip), %rax
189	movq	%rax, -72(%rbp)         # 8-byte Spill
190	.p2align	4, 0x90
191.LBB2_1:                                # %polly.loop_header8
192                                        # =>This Loop Header: Depth=1
193                                        #     Child Loop BB2_2 Depth 2
194                                        #       Child Loop BB2_3 Depth 3
195                                        #         Child Loop BB2_4 Depth 4
196                                        #           Child Loop BB2_5 Depth 5
197	leaq	B+192(%rip), %r9
198	xorl	%edi, %edi
199	xorl	%eax, %eax
200	.p2align	4, 0x90
201.LBB2_2:                                # %polly.loop_header14
202                                        #   Parent Loop BB2_1 Depth=1
203                                        # =>  This Loop Header: Depth=2
204                                        #       Child Loop BB2_3 Depth 3
205                                        #         Child Loop BB2_4 Depth 4
206                                        #           Child Loop BB2_5 Depth 5
207	movq	%rax, -168(%rbp)        # 8-byte Spill
208	movq	%rdi, -176(%rbp)        # 8-byte Spill
209	shlq	$6, %rdi
210	leaq	16(%rdi), %rdx
211	leaq	32(%rdi), %rsi
212	leaq	48(%rdi), %rcx
213	movq	-72(%rbp), %r12         # 8-byte Reload
214	movq	%r9, -184(%rbp)         # 8-byte Spill
215	xorl	%eax, %eax
216	.p2align	4, 0x90
217.LBB2_3:                                # %polly.loop_header20
218                                        #   Parent Loop BB2_1 Depth=1
219                                        #     Parent Loop BB2_2 Depth=2
220                                        # =>    This Loop Header: Depth=3
221                                        #         Child Loop BB2_4 Depth 4
222                                        #           Child Loop BB2_5 Depth 5
223	movq	%rax, -192(%rbp)        # 8-byte Spill
224	movq	%r12, -200(%rbp)        # 8-byte Spill
225	movq	-48(%rbp), %r14         # 8-byte Reload
226	.p2align	4, 0x90
227.LBB2_4:                                # %polly.loop_header26
228                                        #   Parent Loop BB2_1 Depth=1
229                                        #     Parent Loop BB2_2 Depth=2
230                                        #       Parent Loop BB2_3 Depth=3
231                                        # =>      This Loop Header: Depth=4
232                                        #           Child Loop BB2_5 Depth 5
233	leaq	(%r14,%r14,2), %rbx
234	shlq	$11, %rbx
235	leaq	C(%rip), %rax
236	addq	%rax, %rbx
237	leaq	(%rbx,%rdi,4), %r8
238	leaq	(%rbx,%rdx,4), %r15
239	leaq	(%rbx,%rsi,4), %r10
240	leaq	(%rbx,%rcx,4), %r11
241	movups	(%rbx,%rdi,4), %xmm8
242	movups	16(%rbx,%rdi,4), %xmm0
243	movaps	%xmm0, -144(%rbp)       # 16-byte Spill
244	movups	32(%rbx,%rdi,4), %xmm6
245	movups	48(%rbx,%rdi,4), %xmm1
246	movups	(%rbx,%rdx,4), %xmm15
247	movups	16(%rbx,%rdx,4), %xmm0
248	movaps	%xmm0, -64(%rbp)        # 16-byte Spill
249	movups	32(%rbx,%rdx,4), %xmm0
250	movaps	%xmm0, -96(%rbp)        # 16-byte Spill
251	movups	48(%rbx,%rdx,4), %xmm0
252	movaps	%xmm0, -112(%rbp)       # 16-byte Spill
253	movups	(%rbx,%rsi,4), %xmm11
254	movups	16(%rbx,%rsi,4), %xmm0
255	movaps	%xmm0, -160(%rbp)       # 16-byte Spill
256	movups	32(%rbx,%rsi,4), %xmm12
257	movups	48(%rbx,%rsi,4), %xmm0
258	movaps	%xmm0, -128(%rbp)       # 16-byte Spill
259	movups	(%rbx,%rcx,4), %xmm9
260	movups	16(%rbx,%rcx,4), %xmm13
261	movups	32(%rbx,%rcx,4), %xmm2
262	movups	48(%rbx,%rcx,4), %xmm3
263	movq	%r9, %rbx
264	movl	$0, %r13d
265	.p2align	4, 0x90
266.LBB2_5:                                # %vector.ph
267                                        #   Parent Loop BB2_1 Depth=1
268                                        #     Parent Loop BB2_2 Depth=2
269                                        #       Parent Loop BB2_3 Depth=3
270                                        #         Parent Loop BB2_4 Depth=4
271                                        # =>        This Inner Loop Header: Depth=5
272	movaps	%xmm12, -240(%rbp)      # 16-byte Spill
273	movaps	%xmm2, -256(%rbp)       # 16-byte Spill
274	movaps	%xmm3, -272(%rbp)       # 16-byte Spill
275	movaps	%xmm8, %xmm10
276	movaps	-144(%rbp), %xmm7       # 16-byte Reload
277	unpcklps	%xmm7, %xmm10   # xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
278	movaps	%xmm1, %xmm4
279	shufps	$0, %xmm6, %xmm4        # xmm4 = xmm4[0,0],xmm6[0,0]
280	shufps	$36, %xmm4, %xmm10      # xmm10 = xmm10[0,1],xmm4[2,0]
281	movaps	%xmm7, %xmm5
282	shufps	$17, %xmm8, %xmm5       # xmm5 = xmm5[1,0],xmm8[1,0]
283	movaps	%xmm6, %xmm4
284	unpcklps	%xmm1, %xmm4    # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
285	shufps	$226, %xmm4, %xmm5      # xmm5 = xmm5[2,0],xmm4[2,3]
286	movaps	%xmm8, %xmm12
287	unpckhps	%xmm7, %xmm12   # xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
288	movaps	%xmm1, %xmm4
289	shufps	$34, %xmm6, %xmm4       # xmm4 = xmm4[2,0],xmm6[2,0]
290	shufps	$36, %xmm4, %xmm12      # xmm12 = xmm12[0,1],xmm4[2,0]
291	shufps	$51, %xmm8, %xmm7       # xmm7 = xmm7[3,0],xmm8[3,0]
292	unpckhps	%xmm1, %xmm6    # xmm6 = xmm6[2],xmm1[2],xmm6[3],xmm1[3]
293	shufps	$226, %xmm6, %xmm7      # xmm7 = xmm7[2,0],xmm6[2,3]
294	movaps	-160(%rbx), %xmm0
295	movaps	-144(%rbx), %xmm1
296	movaps	%xmm1, %xmm6
297	shufps	$0, %xmm0, %xmm6        # xmm6 = xmm6[0,0],xmm0[0,0]
298	movaps	-192(%rbx), %xmm3
299	movaps	-176(%rbx), %xmm4
300	movaps	%xmm3, %xmm8
301	unpcklps	%xmm4, %xmm8    # xmm8 = xmm8[0],xmm4[0],xmm8[1],xmm4[1]
302	shufps	$36, %xmm6, %xmm8       # xmm8 = xmm8[0,1],xmm6[2,0]
303	movaps	%xmm0, %xmm2
304	unpcklps	%xmm1, %xmm2    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
305	movaps	%xmm4, %xmm6
306	shufps	$17, %xmm3, %xmm6       # xmm6 = xmm6[1,0],xmm3[1,0]
307	shufps	$226, %xmm2, %xmm6      # xmm6 = xmm6[2,0],xmm2[2,3]
308	movaps	%xmm1, %xmm2
309	shufps	$34, %xmm0, %xmm2       # xmm2 = xmm2[2,0],xmm0[2,0]
310	movaps	%xmm3, %xmm14
311	unpckhps	%xmm4, %xmm14   # xmm14 = xmm14[2],xmm4[2],xmm14[3],xmm4[3]
312	shufps	$36, %xmm2, %xmm14      # xmm14 = xmm14[0,1],xmm2[2,0]
313	unpckhps	%xmm1, %xmm0    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
314	shufps	$51, %xmm3, %xmm4       # xmm4 = xmm4[3,0],xmm3[3,0]
315	shufps	$226, %xmm0, %xmm4      # xmm4 = xmm4[2,0],xmm0[2,3]
316	movss	(%r12,%r13,4), %xmm0    # xmm0 = mem[0],zero,zero,zero
317	shufps	$0, %xmm0, %xmm0        # xmm0 = xmm0[0,0,0,0]
318	mulps	%xmm0, %xmm8
319	addps	%xmm10, %xmm8
320	mulps	%xmm0, %xmm6
321	addps	%xmm5, %xmm6
322	mulps	%xmm0, %xmm14
323	addps	%xmm12, %xmm14
324	mulps	%xmm0, %xmm4
325	movaps	%xmm0, %xmm5
326	addps	%xmm7, %xmm4
327	movaps	%xmm14, %xmm0
328	unpckhps	%xmm4, %xmm0    # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
329	movaps	%xmm6, %xmm1
330	shufps	$51, %xmm8, %xmm1       # xmm1 = xmm1[3,0],xmm8[3,0]
331	shufps	$226, %xmm0, %xmm1      # xmm1 = xmm1[2,0],xmm0[2,3]
332	movaps	%xmm1, -304(%rbp)       # 16-byte Spill
333	movaps	%xmm4, %xmm0
334	shufps	$34, %xmm14, %xmm0      # xmm0 = xmm0[2,0],xmm14[2,0]
335	movaps	%xmm8, %xmm1
336	unpckhps	%xmm6, %xmm1    # xmm1 = xmm1[2],xmm6[2],xmm1[3],xmm6[3]
337	shufps	$36, %xmm0, %xmm1       # xmm1 = xmm1[0,1],xmm0[2,0]
338	movaps	%xmm1, -288(%rbp)       # 16-byte Spill
339	movaps	%xmm14, %xmm0
340	unpcklps	%xmm4, %xmm0    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
341	movaps	%xmm6, %xmm1
342	shufps	$17, %xmm8, %xmm1       # xmm1 = xmm1[1,0],xmm8[1,0]
343	shufps	$226, %xmm0, %xmm1      # xmm1 = xmm1[2,0],xmm0[2,3]
344	movaps	%xmm1, -144(%rbp)       # 16-byte Spill
345	shufps	$0, %xmm14, %xmm4       # xmm4 = xmm4[0,0],xmm14[0,0]
346	unpcklps	%xmm6, %xmm8    # xmm8 = xmm8[0],xmm6[0],xmm8[1],xmm6[1]
347	shufps	$36, %xmm4, %xmm8       # xmm8 = xmm8[0,1],xmm4[2,0]
348	movaps	%xmm15, %xmm14
349	movaps	-64(%rbp), %xmm4        # 16-byte Reload
350	unpcklps	%xmm4, %xmm14   # xmm14 = xmm14[0],xmm4[0],xmm14[1],xmm4[1]
351	movaps	-112(%rbp), %xmm1       # 16-byte Reload
352	movaps	%xmm1, %xmm0
353	movaps	-96(%rbp), %xmm3        # 16-byte Reload
354	shufps	$0, %xmm3, %xmm0        # xmm0 = xmm0[0,0],xmm3[0,0]
355	shufps	$36, %xmm0, %xmm14      # xmm14 = xmm14[0,1],xmm0[2,0]
356	movaps	%xmm4, %xmm12
357	shufps	$17, %xmm15, %xmm12     # xmm12 = xmm12[1,0],xmm15[1,0]
358	movaps	%xmm3, %xmm2
359	unpcklps	%xmm1, %xmm2    # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
360	shufps	$226, %xmm2, %xmm12     # xmm12 = xmm12[2,0],xmm2[2,3]
361	movaps	%xmm15, %xmm7
362	unpckhps	%xmm4, %xmm7    # xmm7 = xmm7[2],xmm4[2],xmm7[3],xmm4[3]
363	movaps	%xmm1, %xmm2
364	shufps	$34, %xmm3, %xmm2       # xmm2 = xmm2[2,0],xmm3[2,0]
365	shufps	$36, %xmm2, %xmm7       # xmm7 = xmm7[0,1],xmm2[2,0]
366	shufps	$51, %xmm15, %xmm4      # xmm4 = xmm4[3,0],xmm15[3,0]
367	unpckhps	%xmm1, %xmm3    # xmm3 = xmm3[2],xmm1[2],xmm3[3],xmm1[3]
368	shufps	$226, %xmm3, %xmm4      # xmm4 = xmm4[2,0],xmm3[2,3]
369	movaps	%xmm4, -64(%rbp)        # 16-byte Spill
370	movaps	-96(%rbx), %xmm2
371	movaps	-80(%rbx), %xmm1
372	movaps	%xmm1, %xmm4
373	shufps	$0, %xmm2, %xmm4        # xmm4 = xmm4[0,0],xmm2[0,0]
374	movaps	-112(%rbx), %xmm10
375	movaps	-128(%rbx), %xmm0
376	movaps	%xmm0, %xmm15
377	unpcklps	%xmm10, %xmm15  # xmm15 = xmm15[0],xmm10[0],xmm15[1],xmm10[1]
378	shufps	$36, %xmm4, %xmm15      # xmm15 = xmm15[0,1],xmm4[2,0]
379	movaps	%xmm2, %xmm4
380	unpcklps	%xmm1, %xmm4    # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
381	movaps	%xmm10, %xmm6
382	shufps	$17, %xmm0, %xmm6       # xmm6 = xmm6[1,0],xmm0[1,0]
383	shufps	$226, %xmm4, %xmm6      # xmm6 = xmm6[2,0],xmm4[2,3]
384	movaps	%xmm1, %xmm3
385	shufps	$34, %xmm2, %xmm3       # xmm3 = xmm3[2,0],xmm2[2,0]
386	movaps	%xmm0, %xmm4
387	unpckhps	%xmm10, %xmm4   # xmm4 = xmm4[2],xmm10[2],xmm4[3],xmm10[3]
388	shufps	$36, %xmm3, %xmm4       # xmm4 = xmm4[0,1],xmm3[2,0]
389	unpckhps	%xmm1, %xmm2    # xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
390	shufps	$51, %xmm0, %xmm10      # xmm10 = xmm10[3,0],xmm0[3,0]
391	shufps	$226, %xmm2, %xmm10     # xmm10 = xmm10[2,0],xmm2[2,3]
392	movaps	%xmm5, -224(%rbp)       # 16-byte Spill
393	mulps	%xmm5, %xmm15
394	addps	%xmm14, %xmm15
395	mulps	%xmm5, %xmm6
396	addps	%xmm12, %xmm6
397	mulps	%xmm5, %xmm4
398	addps	%xmm7, %xmm4
399	mulps	%xmm5, %xmm10
400	addps	-64(%rbp), %xmm10       # 16-byte Folded Reload
401	movaps	%xmm4, %xmm0
402	unpckhps	%xmm10, %xmm0   # xmm0 = xmm0[2],xmm10[2],xmm0[3],xmm10[3]
403	movaps	%xmm6, %xmm1
404	shufps	$51, %xmm15, %xmm1      # xmm1 = xmm1[3,0],xmm15[3,0]
405	shufps	$226, %xmm0, %xmm1      # xmm1 = xmm1[2,0],xmm0[2,3]
406	movaps	%xmm1, -112(%rbp)       # 16-byte Spill
407	movaps	%xmm10, %xmm0
408	shufps	$34, %xmm4, %xmm0       # xmm0 = xmm0[2,0],xmm4[2,0]
409	movaps	%xmm15, %xmm1
410	unpckhps	%xmm6, %xmm1    # xmm1 = xmm1[2],xmm6[2],xmm1[3],xmm6[3]
411	shufps	$36, %xmm0, %xmm1       # xmm1 = xmm1[0,1],xmm0[2,0]
412	movaps	%xmm1, -96(%rbp)        # 16-byte Spill
413	movaps	%xmm4, %xmm0
414	unpcklps	%xmm10, %xmm0   # xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1]
415	movaps	%xmm6, %xmm1
416	shufps	$17, %xmm15, %xmm1      # xmm1 = xmm1[1,0],xmm15[1,0]
417	shufps	$226, %xmm0, %xmm1      # xmm1 = xmm1[2,0],xmm0[2,3]
418	movaps	%xmm1, -64(%rbp)        # 16-byte Spill
419	shufps	$0, %xmm4, %xmm10       # xmm10 = xmm10[0,0],xmm4[0,0]
420	unpcklps	%xmm6, %xmm15   # xmm15 = xmm15[0],xmm6[0],xmm15[1],xmm6[1]
421	shufps	$36, %xmm10, %xmm15     # xmm15 = xmm15[0,1],xmm10[2,0]
422	movaps	%xmm11, %xmm10
423	movaps	-160(%rbp), %xmm14      # 16-byte Reload
424	unpcklps	%xmm14, %xmm10  # xmm10 = xmm10[0],xmm14[0],xmm10[1],xmm14[1]
425	movaps	-128(%rbp), %xmm2       # 16-byte Reload
426	movaps	%xmm2, %xmm0
427	movaps	-240(%rbp), %xmm3       # 16-byte Reload
428	shufps	$0, %xmm3, %xmm0        # xmm0 = xmm0[0,0],xmm3[0,0]
429	shufps	$36, %xmm0, %xmm10      # xmm10 = xmm10[0,1],xmm0[2,0]
430	movaps	%xmm14, %xmm12
431	shufps	$17, %xmm11, %xmm12     # xmm12 = xmm12[1,0],xmm11[1,0]
432	movaps	%xmm3, %xmm0
433	unpcklps	%xmm2, %xmm0    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
434	shufps	$226, %xmm0, %xmm12     # xmm12 = xmm12[2,0],xmm0[2,3]
435	movaps	%xmm11, %xmm0
436	unpckhps	%xmm14, %xmm0   # xmm0 = xmm0[2],xmm14[2],xmm0[3],xmm14[3]
437	movaps	%xmm2, %xmm1
438	shufps	$34, %xmm3, %xmm1       # xmm1 = xmm1[2,0],xmm3[2,0]
439	shufps	$36, %xmm1, %xmm0       # xmm0 = xmm0[0,1],xmm1[2,0]
440	shufps	$51, %xmm11, %xmm14     # xmm14 = xmm14[3,0],xmm11[3,0]
441	unpckhps	%xmm2, %xmm3    # xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
442	shufps	$226, %xmm3, %xmm14     # xmm14 = xmm14[2,0],xmm3[2,3]
443	movaps	-32(%rbx), %xmm1
444	movaps	-16(%rbx), %xmm2
445	movaps	%xmm2, %xmm3
446	shufps	$0, %xmm1, %xmm3        # xmm3 = xmm3[0,0],xmm1[0,0]
447	movaps	-48(%rbx), %xmm4
448	movaps	-64(%rbx), %xmm5
449	movaps	%xmm5, %xmm11
450	unpcklps	%xmm4, %xmm11   # xmm11 = xmm11[0],xmm4[0],xmm11[1],xmm4[1]
451	shufps	$36, %xmm3, %xmm11      # xmm11 = xmm11[0,1],xmm3[2,0]
452	movaps	%xmm1, %xmm3
453	unpcklps	%xmm2, %xmm3    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
454	movaps	%xmm4, %xmm7
455	shufps	$17, %xmm5, %xmm7       # xmm7 = xmm7[1,0],xmm5[1,0]
456	shufps	$226, %xmm3, %xmm7      # xmm7 = xmm7[2,0],xmm3[2,3]
457	movaps	%xmm2, %xmm3
458	shufps	$34, %xmm1, %xmm3       # xmm3 = xmm3[2,0],xmm1[2,0]
459	movaps	%xmm5, %xmm6
460	unpckhps	%xmm4, %xmm6    # xmm6 = xmm6[2],xmm4[2],xmm6[3],xmm4[3]
461	shufps	$36, %xmm3, %xmm6       # xmm6 = xmm6[0,1],xmm3[2,0]
462	unpckhps	%xmm2, %xmm1    # xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
463	shufps	$51, %xmm5, %xmm4       # xmm4 = xmm4[3,0],xmm5[3,0]
464	shufps	$226, %xmm1, %xmm4      # xmm4 = xmm4[2,0],xmm1[2,3]
465	movaps	-224(%rbp), %xmm1       # 16-byte Reload
466	mulps	%xmm1, %xmm11
467	addps	%xmm10, %xmm11
468	mulps	%xmm1, %xmm7
469	addps	%xmm12, %xmm7
470	mulps	%xmm1, %xmm6
471	addps	%xmm0, %xmm6
472	mulps	%xmm1, %xmm4
473	addps	%xmm14, %xmm4
474	movaps	%xmm6, %xmm0
475	unpckhps	%xmm4, %xmm0    # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
476	movaps	%xmm7, %xmm1
477	shufps	$51, %xmm11, %xmm1      # xmm1 = xmm1[3,0],xmm11[3,0]
478	shufps	$226, %xmm0, %xmm1      # xmm1 = xmm1[2,0],xmm0[2,3]
479	movaps	%xmm1, -128(%rbp)       # 16-byte Spill
480	movaps	%xmm4, %xmm0
481	shufps	$34, %xmm6, %xmm0       # xmm0 = xmm0[2,0],xmm6[2,0]
482	movaps	%xmm11, %xmm12
483	unpckhps	%xmm7, %xmm12   # xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
484	shufps	$36, %xmm0, %xmm12      # xmm12 = xmm12[0,1],xmm0[2,0]
485	movaps	%xmm6, %xmm0
486	unpcklps	%xmm4, %xmm0    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
487	movaps	%xmm7, %xmm1
488	shufps	$17, %xmm11, %xmm1      # xmm1 = xmm1[1,0],xmm11[1,0]
489	shufps	$226, %xmm0, %xmm1      # xmm1 = xmm1[2,0],xmm0[2,3]
490	movaps	%xmm1, -160(%rbp)       # 16-byte Spill
491	shufps	$0, %xmm6, %xmm4        # xmm4 = xmm4[0,0],xmm6[0,0]
492	unpcklps	%xmm7, %xmm11   # xmm11 = xmm11[0],xmm7[0],xmm11[1],xmm7[1]
493	shufps	$36, %xmm4, %xmm11      # xmm11 = xmm11[0,1],xmm4[2,0]
494	movaps	%xmm9, %xmm10
495	unpcklps	%xmm13, %xmm10  # xmm10 = xmm10[0],xmm13[0],xmm10[1],xmm13[1]
496	movaps	-272(%rbp), %xmm2       # 16-byte Reload
497	movaps	%xmm2, %xmm0
498	movaps	-256(%rbp), %xmm3       # 16-byte Reload
499	shufps	$0, %xmm3, %xmm0        # xmm0 = xmm0[0,0],xmm3[0,0]
500	shufps	$36, %xmm0, %xmm10      # xmm10 = xmm10[0,1],xmm0[2,0]
501	movaps	%xmm13, %xmm14
502	shufps	$17, %xmm9, %xmm14      # xmm14 = xmm14[1,0],xmm9[1,0]
503	movaps	%xmm3, %xmm0
504	unpcklps	%xmm2, %xmm0    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
505	shufps	$226, %xmm0, %xmm14     # xmm14 = xmm14[2,0],xmm0[2,3]
506	movaps	%xmm9, %xmm0
507	unpckhps	%xmm13, %xmm0   # xmm0 = xmm0[2],xmm13[2],xmm0[3],xmm13[3]
508	movaps	%xmm2, %xmm1
509	shufps	$34, %xmm3, %xmm1       # xmm1 = xmm1[2,0],xmm3[2,0]
510	shufps	$36, %xmm1, %xmm0       # xmm0 = xmm0[0,1],xmm1[2,0]
511	shufps	$51, %xmm9, %xmm13      # xmm13 = xmm13[3,0],xmm9[3,0]
512	unpckhps	%xmm2, %xmm3    # xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
513	shufps	$226, %xmm3, %xmm13     # xmm13 = xmm13[2,0],xmm3[2,3]
514	movaps	32(%rbx), %xmm1
515	movaps	48(%rbx), %xmm2
516	movaps	%xmm2, %xmm3
517	shufps	$0, %xmm1, %xmm3        # xmm3 = xmm3[0,0],xmm1[0,0]
518	movaps	16(%rbx), %xmm4
519	movaps	(%rbx), %xmm5
520	movaps	%xmm5, %xmm9
521	unpcklps	%xmm4, %xmm9    # xmm9 = xmm9[0],xmm4[0],xmm9[1],xmm4[1]
522	shufps	$36, %xmm3, %xmm9       # xmm9 = xmm9[0,1],xmm3[2,0]
523	movaps	%xmm1, %xmm3
524	unpcklps	%xmm2, %xmm3    # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
525	movaps	%xmm4, %xmm7
526	shufps	$17, %xmm5, %xmm7       # xmm7 = xmm7[1,0],xmm5[1,0]
527	shufps	$226, %xmm3, %xmm7      # xmm7 = xmm7[2,0],xmm3[2,3]
528	movaps	%xmm2, %xmm3
529	shufps	$34, %xmm1, %xmm3       # xmm3 = xmm3[2,0],xmm1[2,0]
530	movaps	%xmm5, %xmm6
531	unpckhps	%xmm4, %xmm6    # xmm6 = xmm6[2],xmm4[2],xmm6[3],xmm4[3]
532	shufps	$36, %xmm3, %xmm6       # xmm6 = xmm6[0,1],xmm3[2,0]
533	unpckhps	%xmm2, %xmm1    # xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
534	shufps	$51, %xmm5, %xmm4       # xmm4 = xmm4[3,0],xmm5[3,0]
535	shufps	$226, %xmm1, %xmm4      # xmm4 = xmm4[2,0],xmm1[2,3]
536	movaps	-224(%rbp), %xmm1       # 16-byte Reload
537	mulps	%xmm1, %xmm9
538	addps	%xmm10, %xmm9
539	mulps	%xmm1, %xmm7
540	addps	%xmm14, %xmm7
541	mulps	%xmm1, %xmm6
542	addps	%xmm0, %xmm6
543	mulps	%xmm1, %xmm4
544	addps	%xmm13, %xmm4
545	movaps	%xmm6, %xmm0
546	unpckhps	%xmm4, %xmm0    # xmm0 = xmm0[2],xmm4[2],xmm0[3],xmm4[3]
547	movaps	%xmm7, %xmm3
548	shufps	$51, %xmm9, %xmm3       # xmm3 = xmm3[3,0],xmm9[3,0]
549	shufps	$226, %xmm0, %xmm3      # xmm3 = xmm3[2,0],xmm0[2,3]
550	movaps	%xmm4, %xmm0
551	shufps	$34, %xmm6, %xmm0       # xmm0 = xmm0[2,0],xmm6[2,0]
552	movaps	%xmm9, %xmm2
553	unpckhps	%xmm7, %xmm2    # xmm2 = xmm2[2],xmm7[2],xmm2[3],xmm7[3]
554	shufps	$36, %xmm0, %xmm2       # xmm2 = xmm2[0,1],xmm0[2,0]
555	movaps	%xmm6, %xmm0
556	unpcklps	%xmm4, %xmm0    # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
557	movaps	%xmm7, %xmm13
558	shufps	$17, %xmm9, %xmm13      # xmm13 = xmm13[1,0],xmm9[1,0]
559	shufps	$226, %xmm0, %xmm13     # xmm13 = xmm13[2,0],xmm0[2,3]
560	shufps	$0, %xmm6, %xmm4        # xmm4 = xmm4[0,0],xmm6[0,0]
561	movaps	-288(%rbp), %xmm6       # 16-byte Reload
562	movaps	-304(%rbp), %xmm1       # 16-byte Reload
563	unpcklps	%xmm7, %xmm9    # xmm9 = xmm9[0],xmm7[0],xmm9[1],xmm7[1]
564	shufps	$36, %xmm4, %xmm9       # xmm9 = xmm9[0,1],xmm4[2,0]
565	addq	$1, %r13
566	addq	$6144, %rbx             # imm = 0x1800
567	cmpq	$64, %r13
568	jne	.LBB2_5
569# %bb.6:                                # %polly.loop_exit34
570                                        #   in Loop: Header=BB2_4 Depth=4
571	movups	%xmm8, (%r8)
572	movaps	-144(%rbp), %xmm0       # 16-byte Reload
573	movups	%xmm0, 16(%r8)
574	movups	%xmm6, 32(%r8)
575	movups	%xmm1, 48(%r8)
576	movaps	-112(%rbp), %xmm0       # 16-byte Reload
577	movups	%xmm0, 48(%r15)
578	movaps	-96(%rbp), %xmm0        # 16-byte Reload
579	movups	%xmm0, 32(%r15)
580	movaps	-64(%rbp), %xmm0        # 16-byte Reload
581	movups	%xmm0, 16(%r15)
582	movups	%xmm15, (%r15)
583	movaps	-128(%rbp), %xmm0       # 16-byte Reload
584	movups	%xmm0, 48(%r10)
585	movaps	-160(%rbp), %xmm0       # 16-byte Reload
586	movups	%xmm0, 16(%r10)
587	movups	%xmm11, (%r10)
588	movups	%xmm12, 32(%r10)
589	movups	%xmm3, 48(%r11)
590	movups	%xmm13, 16(%r11)
591	movups	%xmm9, (%r11)
592	movups	%xmm2, 32(%r11)
593	addq	$1, %r14
594	addq	$6144, %r12             # imm = 0x1800
595	cmpq	-80(%rbp), %r14         # 8-byte Folded Reload
596	jne	.LBB2_4
597# %bb.7:                                # %polly.loop_exit28
598                                        #   in Loop: Header=BB2_3 Depth=3
599	movq	-192(%rbp), %rax        # 8-byte Reload
600	addq	$64, %rax
601	addq	$393216, %r9            # imm = 0x60000
602	movq	-200(%rbp), %r12        # 8-byte Reload
603	addq	$256, %r12              # imm = 0x100
604	cmpq	$1536, %rax             # imm = 0x600
605	jb	.LBB2_3
606# %bb.8:                                # %polly.loop_exit22
607                                        #   in Loop: Header=BB2_2 Depth=2
608	movq	-168(%rbp), %rax        # 8-byte Reload
609	addq	$64, %rax
610	movq	-176(%rbp), %rdi        # 8-byte Reload
611	addq	$1, %rdi
612	movq	-184(%rbp), %r9         # 8-byte Reload
613	addq	$256, %r9               # imm = 0x100
614	cmpq	$1536, %rax             # imm = 0x600
615	jb	.LBB2_2
616# %bb.9:                                # %polly.loop_exit16
617                                        #   in Loop: Header=BB2_1 Depth=1
618	movq	-48(%rbp), %rax         # 8-byte Reload
619	movq	%rax, %rcx
620	addq	$64, %rcx
621	addq	$64, -80(%rbp)          # 8-byte Folded Spill
622	addq	$393216, -72(%rbp)      # 8-byte Folded Spill
623                                        # imm = 0x60000
624	movq	%rcx, %rax
625	movq	%rcx, -48(%rbp)         # 8-byte Spill
626	cmpq	$1536, %rcx             # imm = 0x600
627	jb	.LBB2_1
628# %bb.10:                               # %polly.exiting
629	xorl	%eax, %eax
630	addq	$264, %rsp              # imm = 0x108
631	popq	%rbx
632	popq	%r12
633	popq	%r13
634	popq	%r14
635	popq	%r15
636	popq	%rbp
637	.cfi_def_cfa %rsp, 8
638	retq
639.Lfunc_end2:
640	.size	main, .Lfunc_end2-main
641	.cfi_endproc
642                                        # -- End function
643	.type	A,@object               # @A
644	.comm	A,9437184,16
645	.type	B,@object               # @B
646	.comm	B,9437184,16
647	.type	.L.str,@object          # @.str
648	.section	.rodata.str1.1,"aMS",@progbits,1
649.L.str:
650	.asciz	"%lf "
651	.size	.L.str, 5
652
653	.type	C,@object               # @C
654	.comm	C,9437184,16
655
656	.ident	"clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
657	.section	".note.GNU-stack","",@progbits
658