• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1	.text
2	.file	"matmul.c"
3	.section	.rodata.cst8,"aM",@progbits,8
4	.p2align	3               # -- Begin function init_array
5.LCPI0_0:
6	.quad	4602678819172646912     # double 0.5
7	.text
8	.globl	init_array
9	.p2align	4, 0x90
10	.type	init_array,@function
11init_array:                             # @init_array
12	.cfi_startproc
13# %bb.0:                                # %entry
14	pushq	%rbp
15	.cfi_def_cfa_offset 16
16	.cfi_offset %rbp, -16
17	movq	%rsp, %rbp
18	.cfi_def_cfa_register %rbp
19	leaq	B(%rip), %rax
20	leaq	A(%rip), %rcx
21	xorl	%r8d, %r8d
22	movsd	.LCPI0_0(%rip), %xmm0   # xmm0 = mem[0],zero
23	xorl	%r9d, %r9d
24	.p2align	4, 0x90
25.LBB0_1:                                # %polly.loop_header
26                                        # =>This Loop Header: Depth=1
27                                        #     Child Loop BB0_2 Depth 2
28	movl	$1, %edi
29	xorl	%edx, %edx
30	.p2align	4, 0x90
31.LBB0_2:                                # %polly.loop_header1
32                                        #   Parent Loop BB0_1 Depth=1
33                                        # =>  This Inner Loop Header: Depth=2
34	movl	%edx, %esi
35	andl	$1022, %esi             # imm = 0x3FE
36	orl	$1, %esi
37	xorps	%xmm1, %xmm1
38	cvtsi2sdl	%esi, %xmm1
39	mulsd	%xmm0, %xmm1
40	cvtsd2ss	%xmm1, %xmm1
41	movss	%xmm1, -4(%rcx,%rdi,4)
42	movss	%xmm1, -4(%rax,%rdi,4)
43	leal	(%r9,%rdx), %esi
44	andl	$1023, %esi             # imm = 0x3FF
45	addl	$1, %esi
46	xorps	%xmm1, %xmm1
47	cvtsi2sdl	%esi, %xmm1
48	mulsd	%xmm0, %xmm1
49	cvtsd2ss	%xmm1, %xmm1
50	movss	%xmm1, (%rcx,%rdi,4)
51	movss	%xmm1, (%rax,%rdi,4)
52	addq	$2, %rdi
53	addl	%r8d, %edx
54	cmpq	$1537, %rdi             # imm = 0x601
55	jne	.LBB0_2
56# %bb.3:                                # %polly.loop_exit3
57                                        #   in Loop: Header=BB0_1 Depth=1
58	addq	$1, %r9
59	addq	$6144, %rax             # imm = 0x1800
60	addq	$6144, %rcx             # imm = 0x1800
61	addl	$2, %r8d
62	cmpq	$1536, %r9              # imm = 0x600
63	jne	.LBB0_1
64# %bb.4:                                # %polly.exiting
65	popq	%rbp
66	.cfi_def_cfa %rsp, 8
67	retq
68.Lfunc_end0:
69	.size	init_array, .Lfunc_end0-init_array
70	.cfi_endproc
71                                        # -- End function
72	.globl	print_array             # -- Begin function print_array
73	.p2align	4, 0x90
74	.type	print_array,@function
75print_array:                            # @print_array
76	.cfi_startproc
77# %bb.0:                                # %entry
78	pushq	%rbp
79	.cfi_def_cfa_offset 16
80	.cfi_offset %rbp, -16
81	movq	%rsp, %rbp
82	.cfi_def_cfa_register %rbp
83	pushq	%r15
84	pushq	%r14
85	pushq	%r13
86	pushq	%r12
87	pushq	%rbx
88	pushq	%rax
89	.cfi_offset %rbx, -56
90	.cfi_offset %r12, -48
91	.cfi_offset %r13, -40
92	.cfi_offset %r14, -32
93	.cfi_offset %r15, -24
94	leaq	C(%rip), %r13
95	xorl	%eax, %eax
96	movl	$3435973837, %r12d      # imm = 0xCCCCCCCD
97	leaq	.L.str(%rip), %r14
98	.p2align	4, 0x90
99.LBB1_1:                                # %for.cond1.preheader
100                                        # =>This Loop Header: Depth=1
101                                        #     Child Loop BB1_2 Depth 2
102	movq	%rax, -48(%rbp)         # 8-byte Spill
103	movq	stdout(%rip), %rsi
104	xorl	%ebx, %ebx
105	.p2align	4, 0x90
106.LBB1_2:                                # %for.body3
107                                        #   Parent Loop BB1_1 Depth=1
108                                        # =>  This Inner Loop Header: Depth=2
109	movl	%ebx, %eax
110	imulq	%r12, %rax
111	shrq	$38, %rax
112	leal	(%rax,%rax,4), %r15d
113	shll	$4, %r15d
114	addl	$79, %r15d
115	movss	(%r13,%rbx,4), %xmm0    # xmm0 = mem[0],zero,zero,zero
116	cvtss2sd	%xmm0, %xmm0
117	movb	$1, %al
118	movq	%rsi, %rdi
119	movq	%r14, %rsi
120	callq	fprintf
121	cmpl	%ebx, %r15d
122	jne	.LBB1_4
123# %bb.3:                                # %if.then
124                                        #   in Loop: Header=BB1_2 Depth=2
125	movq	stdout(%rip), %rsi
126	movl	$10, %edi
127	callq	fputc@PLT
128.LBB1_4:                                # %for.inc
129                                        #   in Loop: Header=BB1_2 Depth=2
130	addq	$1, %rbx
131	movq	stdout(%rip), %rsi
132	cmpq	$1536, %rbx             # imm = 0x600
133	jne	.LBB1_2
134# %bb.5:                                # %for.end
135                                        #   in Loop: Header=BB1_1 Depth=1
136	movl	$10, %edi
137	callq	fputc@PLT
138	movq	-48(%rbp), %rax         # 8-byte Reload
139	addq	$1, %rax
140	addq	$6144, %r13             # imm = 0x1800
141	cmpq	$1536, %rax             # imm = 0x600
142	jne	.LBB1_1
143# %bb.6:                                # %for.end12
144	addq	$8, %rsp
145	popq	%rbx
146	popq	%r12
147	popq	%r13
148	popq	%r14
149	popq	%r15
150	popq	%rbp
151	.cfi_def_cfa %rsp, 8
152	retq
153.Lfunc_end1:
154	.size	print_array, .Lfunc_end1-print_array
155	.cfi_endproc
156                                        # -- End function
157	.globl	main                    # -- Begin function main
158	.p2align	4, 0x90
159	.type	main,@function
160main:                                   # @main
161	.cfi_startproc
162# %bb.0:                                # %entry
163	pushq	%rbp
164	.cfi_def_cfa_offset 16
165	.cfi_offset %rbp, -16
166	movq	%rsp, %rbp
167	.cfi_def_cfa_register %rbp
168	pushq	%r15
169	pushq	%r14
170	pushq	%r13
171	pushq	%r12
172	pushq	%rbx
173	subq	$344, %rsp              # imm = 0x158
174	.cfi_offset %rbx, -56
175	.cfi_offset %r12, -48
176	.cfi_offset %r13, -40
177	.cfi_offset %r14, -32
178	.cfi_offset %r15, -24
179	callq	init_array
180	leaq	C(%rip), %rdi
181	xorl	%eax, %eax
182	movq	%rax, -48(%rbp)         # 8-byte Spill
183	xorl	%esi, %esi
184	movl	$9437184, %edx          # imm = 0x900000
185	callq	memset@PLT
186	movl	$64, %eax
187	movq	%rax, -64(%rbp)         # 8-byte Spill
188	leaq	A(%rip), %rax
189	movq	%rax, -56(%rbp)         # 8-byte Spill
190	.p2align	4, 0x90
191.LBB2_1:                                # %polly.loop_header8
192                                        # =>This Loop Header: Depth=1
193                                        #     Child Loop BB2_2 Depth 2
194                                        #       Child Loop BB2_3 Depth 3
195                                        #         Child Loop BB2_4 Depth 4
196                                        #           Child Loop BB2_5 Depth 5
197	leaq	B+240(%rip), %rax
198	xorl	%edi, %edi
199	.p2align	4, 0x90
200.LBB2_2:                                # %polly.loop_header14
201                                        #   Parent Loop BB2_1 Depth=1
202                                        # =>  This Loop Header: Depth=2
203                                        #       Child Loop BB2_3 Depth 3
204                                        #         Child Loop BB2_4 Depth 4
205                                        #           Child Loop BB2_5 Depth 5
206	movq	%rdi, %rcx
207	orq	$4, %rcx
208	movq	%rcx, -80(%rbp)         # 8-byte Spill
209	movq	%rdi, %rcx
210	orq	$8, %rcx
211	movq	%rcx, -264(%rbp)        # 8-byte Spill
212	movq	%rdi, %rcx
213	orq	$12, %rcx
214	movq	%rcx, -256(%rbp)        # 8-byte Spill
215	movq	%rdi, %rcx
216	orq	$16, %rcx
217	movq	%rcx, -248(%rbp)        # 8-byte Spill
218	movq	%rdi, %rcx
219	orq	$20, %rcx
220	movq	%rcx, -240(%rbp)        # 8-byte Spill
221	movq	%rdi, %rcx
222	orq	$24, %rcx
223	movq	%rcx, -232(%rbp)        # 8-byte Spill
224	movq	%rdi, %rcx
225	orq	$28, %rcx
226	movq	%rcx, -224(%rbp)        # 8-byte Spill
227	movq	%rdi, %rcx
228	orq	$32, %rcx
229	movq	%rcx, -216(%rbp)        # 8-byte Spill
230	movq	%rdi, %rcx
231	orq	$36, %rcx
232	movq	%rcx, -208(%rbp)        # 8-byte Spill
233	movq	%rdi, %rcx
234	orq	$40, %rcx
235	movq	%rcx, -200(%rbp)        # 8-byte Spill
236	movq	%rdi, %rcx
237	orq	$44, %rcx
238	movq	%rcx, -192(%rbp)        # 8-byte Spill
239	movq	%rdi, %rcx
240	orq	$48, %rcx
241	movq	%rcx, -184(%rbp)        # 8-byte Spill
242	movq	%rdi, %rcx
243	orq	$52, %rcx
244	movq	%rcx, -176(%rbp)        # 8-byte Spill
245	movq	%rdi, %rcx
246	orq	$56, %rcx
247	movq	%rcx, -168(%rbp)        # 8-byte Spill
248	movq	%rdi, %rcx
249	orq	$60, %rcx
250	movq	%rcx, -160(%rbp)        # 8-byte Spill
251	movq	-56(%rbp), %rdx         # 8-byte Reload
252	movq	%rax, -136(%rbp)        # 8-byte Spill
253	movq	%rax, -72(%rbp)         # 8-byte Spill
254	xorl	%eax, %eax
255	movq	%rdi, -272(%rbp)        # 8-byte Spill
256	.p2align	4, 0x90
257.LBB2_3:                                # %polly.loop_header20
258                                        #   Parent Loop BB2_1 Depth=1
259                                        #     Parent Loop BB2_2 Depth=2
260                                        # =>    This Loop Header: Depth=3
261                                        #         Child Loop BB2_4 Depth 4
262                                        #           Child Loop BB2_5 Depth 5
263	movq	%rax, -144(%rbp)        # 8-byte Spill
264	movq	%rdx, -152(%rbp)        # 8-byte Spill
265	movq	-48(%rbp), %rax         # 8-byte Reload
266	.p2align	4, 0x90
267.LBB2_4:                                # %polly.loop_header26
268                                        #   Parent Loop BB2_1 Depth=1
269                                        #     Parent Loop BB2_2 Depth=2
270                                        #       Parent Loop BB2_3 Depth=3
271                                        # =>      This Loop Header: Depth=4
272                                        #           Child Loop BB2_5 Depth 5
273	movq	%rax, -376(%rbp)        # 8-byte Spill
274	leaq	(%rax,%rax,2), %rax
275	shlq	$11, %rax
276	leaq	C(%rip), %rsi
277	addq	%rsi, %rax
278	leaq	(%rax,%rdi,4), %rcx
279	movq	%rcx, -368(%rbp)        # 8-byte Spill
280	movq	-80(%rbp), %rcx         # 8-byte Reload
281	leaq	(%rax,%rcx,4), %rcx
282	movq	%rcx, -360(%rbp)        # 8-byte Spill
283	movq	-264(%rbp), %rbx        # 8-byte Reload
284	leaq	(%rax,%rbx,4), %rcx
285	movq	%rcx, -352(%rbp)        # 8-byte Spill
286	movq	-256(%rbp), %r8         # 8-byte Reload
287	movq	%rdi, %rsi
288	leaq	(%rax,%r8,4), %rdi
289	movq	%rdi, -344(%rbp)        # 8-byte Spill
290	movq	-248(%rbp), %rdi        # 8-byte Reload
291	leaq	(%rax,%rdi,4), %rcx
292	movq	%rcx, -336(%rbp)        # 8-byte Spill
293	movq	-240(%rbp), %r9         # 8-byte Reload
294	leaq	(%rax,%r9,4), %rcx
295	movq	%rcx, -328(%rbp)        # 8-byte Spill
296	movq	-232(%rbp), %r10        # 8-byte Reload
297	leaq	(%rax,%r10,4), %rcx
298	movq	%rcx, -320(%rbp)        # 8-byte Spill
299	movq	-224(%rbp), %r14        # 8-byte Reload
300	leaq	(%rax,%r14,4), %rcx
301	movq	%rcx, -312(%rbp)        # 8-byte Spill
302	movq	-216(%rbp), %r15        # 8-byte Reload
303	leaq	(%rax,%r15,4), %rcx
304	movq	%rcx, -304(%rbp)        # 8-byte Spill
305	movq	-208(%rbp), %r12        # 8-byte Reload
306	leaq	(%rax,%r12,4), %rcx
307	movq	%rcx, -296(%rbp)        # 8-byte Spill
308	movq	-200(%rbp), %r13        # 8-byte Reload
309	leaq	(%rax,%r13,4), %rcx
310	movq	%rcx, -288(%rbp)        # 8-byte Spill
311	movq	-192(%rbp), %r11        # 8-byte Reload
312	leaq	(%rax,%r11,4), %rcx
313	movq	%rcx, -280(%rbp)        # 8-byte Spill
314	movaps	(%rax,%rsi,4), %xmm15
315	movq	-80(%rbp), %rcx         # 8-byte Reload
316	movaps	(%rax,%rcx,4), %xmm14
317	movaps	(%rax,%rbx,4), %xmm13
318	movaps	(%rax,%r8,4), %xmm12
319	movaps	(%rax,%rdi,4), %xmm11
320	movaps	(%rax,%r9,4), %xmm10
321	movaps	(%rax,%r10,4), %xmm9
322	movaps	(%rax,%r14,4), %xmm8
323	movaps	(%rax,%r15,4), %xmm7
324	movaps	(%rax,%r12,4), %xmm6
325	movaps	(%rax,%r13,4), %xmm5
326	movaps	(%rax,%r11,4), %xmm4
327	movq	-184(%rbp), %rcx        # 8-byte Reload
328	movaps	(%rax,%rcx,4), %xmm3
329	movq	-176(%rbp), %rsi        # 8-byte Reload
330	movaps	(%rax,%rsi,4), %xmm0
331	movaps	%xmm0, -96(%rbp)        # 16-byte Spill
332	movq	-168(%rbp), %rbx        # 8-byte Reload
333	movaps	(%rax,%rbx,4), %xmm0
334	movaps	%xmm0, -112(%rbp)       # 16-byte Spill
335	movq	-160(%rbp), %rdi        # 8-byte Reload
336	movaps	(%rax,%rdi,4), %xmm0
337	movaps	%xmm0, -128(%rbp)       # 16-byte Spill
338	leaq	(%rax,%rcx,4), %r8
339	leaq	(%rax,%rsi,4), %rcx
340	leaq	(%rax,%rbx,4), %rsi
341	leaq	(%rax,%rdi,4), %rax
342	movq	-72(%rbp), %r9          # 8-byte Reload
343	movl	$0, %r10d
344	.p2align	4, 0x90
345.LBB2_5:                                # %vector.ph
346                                        #   Parent Loop BB2_1 Depth=1
347                                        #     Parent Loop BB2_2 Depth=2
348                                        #       Parent Loop BB2_3 Depth=3
349                                        #         Parent Loop BB2_4 Depth=4
350                                        # =>        This Inner Loop Header: Depth=5
351	movss	(%rdx,%r10,4), %xmm0    # xmm0 = mem[0],zero,zero,zero
352	shufps	$0, %xmm0, %xmm0        # xmm0 = xmm0[0,0,0,0]
353	movaps	-240(%r9), %xmm1
354	mulps	%xmm0, %xmm1
355	addps	%xmm1, %xmm15
356	movaps	-224(%r9), %xmm1
357	mulps	%xmm0, %xmm1
358	addps	%xmm1, %xmm14
359	movaps	-208(%r9), %xmm1
360	mulps	%xmm0, %xmm1
361	addps	%xmm1, %xmm13
362	movaps	-192(%r9), %xmm1
363	mulps	%xmm0, %xmm1
364	addps	%xmm1, %xmm12
365	movaps	-176(%r9), %xmm1
366	mulps	%xmm0, %xmm1
367	addps	%xmm1, %xmm11
368	movaps	-160(%r9), %xmm1
369	mulps	%xmm0, %xmm1
370	addps	%xmm1, %xmm10
371	movaps	-144(%r9), %xmm1
372	mulps	%xmm0, %xmm1
373	addps	%xmm1, %xmm9
374	movaps	-128(%r9), %xmm1
375	mulps	%xmm0, %xmm1
376	addps	%xmm1, %xmm8
377	movaps	-112(%r9), %xmm1
378	mulps	%xmm0, %xmm1
379	addps	%xmm1, %xmm7
380	movaps	-96(%r9), %xmm1
381	mulps	%xmm0, %xmm1
382	addps	%xmm1, %xmm6
383	movaps	-80(%r9), %xmm1
384	mulps	%xmm0, %xmm1
385	addps	%xmm1, %xmm5
386	movaps	-64(%r9), %xmm1
387	mulps	%xmm0, %xmm1
388	addps	%xmm1, %xmm4
389	movaps	-48(%r9), %xmm1
390	mulps	%xmm0, %xmm1
391	addps	%xmm1, %xmm3
392	movaps	-32(%r9), %xmm1
393	mulps	%xmm0, %xmm1
394	movaps	-96(%rbp), %xmm2        # 16-byte Reload
395	addps	%xmm1, %xmm2
396	movaps	%xmm2, -96(%rbp)        # 16-byte Spill
397	movaps	-16(%r9), %xmm1
398	mulps	%xmm0, %xmm1
399	movaps	-112(%rbp), %xmm2       # 16-byte Reload
400	addps	%xmm1, %xmm2
401	movaps	%xmm2, -112(%rbp)       # 16-byte Spill
402	mulps	(%r9), %xmm0
403	movaps	-128(%rbp), %xmm1       # 16-byte Reload
404	addps	%xmm0, %xmm1
405	movaps	%xmm1, -128(%rbp)       # 16-byte Spill
406	addq	$1, %r10
407	addq	$6144, %r9              # imm = 0x1800
408	cmpq	$64, %r10
409	jne	.LBB2_5
410# %bb.6:                                # %polly.loop_exit34
411                                        #   in Loop: Header=BB2_4 Depth=4
412	movq	-368(%rbp), %rdi        # 8-byte Reload
413	movaps	%xmm15, (%rdi)
414	movq	-360(%rbp), %rdi        # 8-byte Reload
415	movaps	%xmm14, (%rdi)
416	movq	-352(%rbp), %rdi        # 8-byte Reload
417	movaps	%xmm13, (%rdi)
418	movq	-344(%rbp), %rdi        # 8-byte Reload
419	movaps	%xmm12, (%rdi)
420	movq	-336(%rbp), %rdi        # 8-byte Reload
421	movaps	%xmm11, (%rdi)
422	movq	-328(%rbp), %rdi        # 8-byte Reload
423	movaps	%xmm10, (%rdi)
424	movq	-320(%rbp), %rdi        # 8-byte Reload
425	movaps	%xmm9, (%rdi)
426	movq	-312(%rbp), %rdi        # 8-byte Reload
427	movaps	%xmm8, (%rdi)
428	movq	-304(%rbp), %rdi        # 8-byte Reload
429	movaps	%xmm7, (%rdi)
430	movq	-296(%rbp), %rdi        # 8-byte Reload
431	movaps	%xmm6, (%rdi)
432	movq	-288(%rbp), %rdi        # 8-byte Reload
433	movaps	%xmm5, (%rdi)
434	movq	-280(%rbp), %rdi        # 8-byte Reload
435	movaps	%xmm4, (%rdi)
436	movaps	%xmm3, (%r8)
437	movaps	-96(%rbp), %xmm0        # 16-byte Reload
438	movaps	%xmm0, (%rcx)
439	movaps	-112(%rbp), %xmm0       # 16-byte Reload
440	movaps	%xmm0, (%rsi)
441	movaps	-128(%rbp), %xmm0       # 16-byte Reload
442	movaps	%xmm0, (%rax)
443	movq	-376(%rbp), %rax        # 8-byte Reload
444	addq	$1, %rax
445	addq	$6144, %rdx             # imm = 0x1800
446	cmpq	-64(%rbp), %rax         # 8-byte Folded Reload
447	movq	-272(%rbp), %rdi        # 8-byte Reload
448	jne	.LBB2_4
449# %bb.7:                                # %polly.loop_exit28
450                                        #   in Loop: Header=BB2_3 Depth=3
451	movq	-144(%rbp), %rax        # 8-byte Reload
452	addq	$64, %rax
453	addq	$393216, -72(%rbp)      # 8-byte Folded Spill
454                                        # imm = 0x60000
455	movq	-152(%rbp), %rdx        # 8-byte Reload
456	addq	$256, %rdx              # imm = 0x100
457	cmpq	$1536, %rax             # imm = 0x600
458	jb	.LBB2_3
459# %bb.8:                                # %polly.loop_exit22
460                                        #   in Loop: Header=BB2_2 Depth=2
461	addq	$64, %rdi
462	movq	-136(%rbp), %rax        # 8-byte Reload
463	addq	$256, %rax              # imm = 0x100
464	cmpq	$1536, %rdi             # imm = 0x600
465	jb	.LBB2_2
466# %bb.9:                                # %polly.loop_exit16
467                                        #   in Loop: Header=BB2_1 Depth=1
468	movq	-48(%rbp), %rax         # 8-byte Reload
469	movq	%rax, %rcx
470	addq	$64, %rcx
471	addq	$64, -64(%rbp)          # 8-byte Folded Spill
472	addq	$393216, -56(%rbp)      # 8-byte Folded Spill
473                                        # imm = 0x60000
474	movq	%rcx, %rax
475	movq	%rcx, -48(%rbp)         # 8-byte Spill
476	cmpq	$1536, %rcx             # imm = 0x600
477	jb	.LBB2_1
478# %bb.10:                               # %polly.exiting
479	xorl	%eax, %eax
480	addq	$344, %rsp              # imm = 0x158
481	popq	%rbx
482	popq	%r12
483	popq	%r13
484	popq	%r14
485	popq	%r15
486	popq	%rbp
487	.cfi_def_cfa %rsp, 8
488	retq
489.Lfunc_end2:
490	.size	main, .Lfunc_end2-main
491	.cfi_endproc
492                                        # -- End function
493	.type	A,@object               # @A
494	.comm	A,9437184,16
495	.type	B,@object               # @B
496	.comm	B,9437184,16
497	.type	.L.str,@object          # @.str
498	.section	.rodata.str1.1,"aMS",@progbits,1
499.L.str:
500	.asciz	"%lf "
501	.size	.L.str, 5
502
503	.type	C,@object               # @C
504	.comm	C,9437184,16
505
506	.ident	"clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)"
507	.section	".note.GNU-stack","",@progbits
508