1 .text 2 .file "matmul.c" 3 .section .rodata.cst8,"aM",@progbits,8 4 .p2align 3 # -- Begin function init_array 5.LCPI0_0: 6 .quad 4602678819172646912 # double 0.5 7 .text 8 .globl init_array 9 .p2align 4, 0x90 10 .type init_array,@function 11init_array: # @init_array 12 .cfi_startproc 13# %bb.0: # %entry 14 pushq %rbp 15 .cfi_def_cfa_offset 16 16 .cfi_offset %rbp, -16 17 movq %rsp, %rbp 18 .cfi_def_cfa_register %rbp 19 leaq B(%rip), %rax 20 leaq A(%rip), %rcx 21 xorl %r8d, %r8d 22 movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero 23 xorl %r9d, %r9d 24 .p2align 4, 0x90 25.LBB0_1: # %polly.loop_header 26 # =>This Loop Header: Depth=1 27 # Child Loop BB0_2 Depth 2 28 movl $1, %edi 29 xorl %edx, %edx 30 .p2align 4, 0x90 31.LBB0_2: # %polly.loop_header1 32 # Parent Loop BB0_1 Depth=1 33 # => This Inner Loop Header: Depth=2 34 movl %edx, %esi 35 andl $1022, %esi # imm = 0x3FE 36 orl $1, %esi 37 xorps %xmm1, %xmm1 38 cvtsi2sdl %esi, %xmm1 39 mulsd %xmm0, %xmm1 40 cvtsd2ss %xmm1, %xmm1 41 movss %xmm1, -4(%rcx,%rdi,4) 42 movss %xmm1, -4(%rax,%rdi,4) 43 leal (%r9,%rdx), %esi 44 andl $1023, %esi # imm = 0x3FF 45 addl $1, %esi 46 xorps %xmm1, %xmm1 47 cvtsi2sdl %esi, %xmm1 48 mulsd %xmm0, %xmm1 49 cvtsd2ss %xmm1, %xmm1 50 movss %xmm1, (%rcx,%rdi,4) 51 movss %xmm1, (%rax,%rdi,4) 52 addq $2, %rdi 53 addl %r8d, %edx 54 cmpq $1537, %rdi # imm = 0x601 55 jne .LBB0_2 56# %bb.3: # %polly.loop_exit3 57 # in Loop: Header=BB0_1 Depth=1 58 addq $1, %r9 59 addq $6144, %rax # imm = 0x1800 60 addq $6144, %rcx # imm = 0x1800 61 addl $2, %r8d 62 cmpq $1536, %r9 # imm = 0x600 63 jne .LBB0_1 64# %bb.4: # %polly.exiting 65 popq %rbp 66 .cfi_def_cfa %rsp, 8 67 retq 68.Lfunc_end0: 69 .size init_array, .Lfunc_end0-init_array 70 .cfi_endproc 71 # -- End function 72 .globl print_array # -- Begin function print_array 73 .p2align 4, 0x90 74 .type print_array,@function 75print_array: # @print_array 76 .cfi_startproc 77# %bb.0: # %entry 78 pushq %rbp 79 .cfi_def_cfa_offset 16 80 .cfi_offset %rbp, -16 81 movq %rsp, %rbp 82 .cfi_def_cfa_register %rbp 83 pushq %r15 84 pushq %r14 85 pushq %r13 86 pushq %r12 87 pushq %rbx 88 pushq %rax 89 .cfi_offset %rbx, -56 90 .cfi_offset %r12, -48 91 .cfi_offset %r13, -40 92 .cfi_offset %r14, -32 93 .cfi_offset %r15, -24 94 leaq C(%rip), %r13 95 xorl %eax, %eax 96 movl $3435973837, %r12d # imm = 0xCCCCCCCD 97 leaq .L.str(%rip), %r14 98 .p2align 4, 0x90 99.LBB1_1: # %for.cond1.preheader 100 # =>This Loop Header: Depth=1 101 # Child Loop BB1_2 Depth 2 102 movq %rax, -48(%rbp) # 8-byte Spill 103 movq stdout(%rip), %rsi 104 xorl %ebx, %ebx 105 .p2align 4, 0x90 106.LBB1_2: # %for.body3 107 # Parent Loop BB1_1 Depth=1 108 # => This Inner Loop Header: Depth=2 109 movl %ebx, %eax 110 imulq %r12, %rax 111 shrq $38, %rax 112 leal (%rax,%rax,4), %r15d 113 shll $4, %r15d 114 addl $79, %r15d 115 movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero 116 cvtss2sd %xmm0, %xmm0 117 movb $1, %al 118 movq %rsi, %rdi 119 movq %r14, %rsi 120 callq fprintf 121 cmpl %ebx, %r15d 122 jne .LBB1_4 123# %bb.3: # %if.then 124 # in Loop: Header=BB1_2 Depth=2 125 movq stdout(%rip), %rsi 126 movl $10, %edi 127 callq fputc@PLT 128.LBB1_4: # %for.inc 129 # in Loop: Header=BB1_2 Depth=2 130 addq $1, %rbx 131 movq stdout(%rip), %rsi 132 cmpq $1536, %rbx # imm = 0x600 133 jne .LBB1_2 134# %bb.5: # %for.end 135 # in Loop: Header=BB1_1 Depth=1 136 movl $10, %edi 137 callq fputc@PLT 138 movq -48(%rbp), %rax # 8-byte Reload 139 addq $1, %rax 140 addq $6144, %r13 # imm = 0x1800 141 cmpq $1536, %rax # imm = 0x600 142 jne .LBB1_1 143# %bb.6: # %for.end12 144 addq $8, %rsp 145 popq %rbx 146 popq %r12 147 popq %r13 148 popq %r14 149 popq %r15 150 popq %rbp 151 .cfi_def_cfa %rsp, 8 152 retq 153.Lfunc_end1: 154 .size print_array, .Lfunc_end1-print_array 155 .cfi_endproc 156 # -- End function 157 .globl main # -- Begin function main 158 .p2align 4, 0x90 159 .type main,@function 160main: # @main 161 .cfi_startproc 162# %bb.0: # %entry 163 pushq %rbp 164 .cfi_def_cfa_offset 16 165 .cfi_offset %rbp, -16 166 movq %rsp, %rbp 167 .cfi_def_cfa_register %rbp 168 pushq %r15 169 pushq %r14 170 pushq %r13 171 pushq %r12 172 pushq %rbx 173 subq $344, %rsp # imm = 0x158 174 .cfi_offset %rbx, -56 175 .cfi_offset %r12, -48 176 .cfi_offset %r13, -40 177 .cfi_offset %r14, -32 178 .cfi_offset %r15, -24 179 callq init_array 180 leaq C(%rip), %rdi 181 xorl %eax, %eax 182 movq %rax, -48(%rbp) # 8-byte Spill 183 xorl %esi, %esi 184 movl $9437184, %edx # imm = 0x900000 185 callq memset@PLT 186 movl $64, %eax 187 movq %rax, -64(%rbp) # 8-byte Spill 188 leaq A(%rip), %rax 189 movq %rax, -56(%rbp) # 8-byte Spill 190 .p2align 4, 0x90 191.LBB2_1: # %polly.loop_header8 192 # =>This Loop Header: Depth=1 193 # Child Loop BB2_2 Depth 2 194 # Child Loop BB2_3 Depth 3 195 # Child Loop BB2_4 Depth 4 196 # Child Loop BB2_5 Depth 5 197 leaq B+240(%rip), %rax 198 xorl %edi, %edi 199 .p2align 4, 0x90 200.LBB2_2: # %polly.loop_header14 201 # Parent Loop BB2_1 Depth=1 202 # => This Loop Header: Depth=2 203 # Child Loop BB2_3 Depth 3 204 # Child Loop BB2_4 Depth 4 205 # Child Loop BB2_5 Depth 5 206 movq %rdi, %rcx 207 orq $4, %rcx 208 movq %rcx, -80(%rbp) # 8-byte Spill 209 movq %rdi, %rcx 210 orq $8, %rcx 211 movq %rcx, -264(%rbp) # 8-byte Spill 212 movq %rdi, %rcx 213 orq $12, %rcx 214 movq %rcx, -256(%rbp) # 8-byte Spill 215 movq %rdi, %rcx 216 orq $16, %rcx 217 movq %rcx, -248(%rbp) # 8-byte Spill 218 movq %rdi, %rcx 219 orq $20, %rcx 220 movq %rcx, -240(%rbp) # 8-byte Spill 221 movq %rdi, %rcx 222 orq $24, %rcx 223 movq %rcx, -232(%rbp) # 8-byte Spill 224 movq %rdi, %rcx 225 orq $28, %rcx 226 movq %rcx, -224(%rbp) # 8-byte Spill 227 movq %rdi, %rcx 228 orq $32, %rcx 229 movq %rcx, -216(%rbp) # 8-byte Spill 230 movq %rdi, %rcx 231 orq $36, %rcx 232 movq %rcx, -208(%rbp) # 8-byte Spill 233 movq %rdi, %rcx 234 orq $40, %rcx 235 movq %rcx, -200(%rbp) # 8-byte Spill 236 movq %rdi, %rcx 237 orq $44, %rcx 238 movq %rcx, -192(%rbp) # 8-byte Spill 239 movq %rdi, %rcx 240 orq $48, %rcx 241 movq %rcx, -184(%rbp) # 8-byte Spill 242 movq %rdi, %rcx 243 orq $52, %rcx 244 movq %rcx, -176(%rbp) # 8-byte Spill 245 movq %rdi, %rcx 246 orq $56, %rcx 247 movq %rcx, -168(%rbp) # 8-byte Spill 248 movq %rdi, %rcx 249 orq $60, %rcx 250 movq %rcx, -160(%rbp) # 8-byte Spill 251 movq -56(%rbp), %rdx # 8-byte Reload 252 movq %rax, -136(%rbp) # 8-byte Spill 253 movq %rax, -72(%rbp) # 8-byte Spill 254 xorl %eax, %eax 255 movq %rdi, -272(%rbp) # 8-byte Spill 256 .p2align 4, 0x90 257.LBB2_3: # %polly.loop_header20 258 # Parent Loop BB2_1 Depth=1 259 # Parent Loop BB2_2 Depth=2 260 # => This Loop Header: Depth=3 261 # Child Loop BB2_4 Depth 4 262 # Child Loop BB2_5 Depth 5 263 movq %rax, -144(%rbp) # 8-byte Spill 264 movq %rdx, -152(%rbp) # 8-byte Spill 265 movq -48(%rbp), %rax # 8-byte Reload 266 .p2align 4, 0x90 267.LBB2_4: # %polly.loop_header26 268 # Parent Loop BB2_1 Depth=1 269 # Parent Loop BB2_2 Depth=2 270 # Parent Loop BB2_3 Depth=3 271 # => This Loop Header: Depth=4 272 # Child Loop BB2_5 Depth 5 273 movq %rax, -376(%rbp) # 8-byte Spill 274 leaq (%rax,%rax,2), %rax 275 shlq $11, %rax 276 leaq C(%rip), %rsi 277 addq %rsi, %rax 278 leaq (%rax,%rdi,4), %rcx 279 movq %rcx, -368(%rbp) # 8-byte Spill 280 movq -80(%rbp), %rcx # 8-byte Reload 281 leaq (%rax,%rcx,4), %rcx 282 movq %rcx, -360(%rbp) # 8-byte Spill 283 movq -264(%rbp), %rbx # 8-byte Reload 284 leaq (%rax,%rbx,4), %rcx 285 movq %rcx, -352(%rbp) # 8-byte Spill 286 movq -256(%rbp), %r8 # 8-byte Reload 287 movq %rdi, %rsi 288 leaq (%rax,%r8,4), %rdi 289 movq %rdi, -344(%rbp) # 8-byte Spill 290 movq -248(%rbp), %rdi # 8-byte Reload 291 leaq (%rax,%rdi,4), %rcx 292 movq %rcx, -336(%rbp) # 8-byte Spill 293 movq -240(%rbp), %r9 # 8-byte Reload 294 leaq (%rax,%r9,4), %rcx 295 movq %rcx, -328(%rbp) # 8-byte Spill 296 movq -232(%rbp), %r10 # 8-byte Reload 297 leaq (%rax,%r10,4), %rcx 298 movq %rcx, -320(%rbp) # 8-byte Spill 299 movq -224(%rbp), %r14 # 8-byte Reload 300 leaq (%rax,%r14,4), %rcx 301 movq %rcx, -312(%rbp) # 8-byte Spill 302 movq -216(%rbp), %r15 # 8-byte Reload 303 leaq (%rax,%r15,4), %rcx 304 movq %rcx, -304(%rbp) # 8-byte Spill 305 movq -208(%rbp), %r12 # 8-byte Reload 306 leaq (%rax,%r12,4), %rcx 307 movq %rcx, -296(%rbp) # 8-byte Spill 308 movq -200(%rbp), %r13 # 8-byte Reload 309 leaq (%rax,%r13,4), %rcx 310 movq %rcx, -288(%rbp) # 8-byte Spill 311 movq -192(%rbp), %r11 # 8-byte Reload 312 leaq (%rax,%r11,4), %rcx 313 movq %rcx, -280(%rbp) # 8-byte Spill 314 movaps (%rax,%rsi,4), %xmm15 315 movq -80(%rbp), %rcx # 8-byte Reload 316 movaps (%rax,%rcx,4), %xmm14 317 movaps (%rax,%rbx,4), %xmm13 318 movaps (%rax,%r8,4), %xmm12 319 movaps (%rax,%rdi,4), %xmm11 320 movaps (%rax,%r9,4), %xmm10 321 movaps (%rax,%r10,4), %xmm9 322 movaps (%rax,%r14,4), %xmm8 323 movaps (%rax,%r15,4), %xmm7 324 movaps (%rax,%r12,4), %xmm6 325 movaps (%rax,%r13,4), %xmm5 326 movaps (%rax,%r11,4), %xmm4 327 movq -184(%rbp), %rcx # 8-byte Reload 328 movaps (%rax,%rcx,4), %xmm3 329 movq -176(%rbp), %rsi # 8-byte Reload 330 movaps (%rax,%rsi,4), %xmm0 331 movaps %xmm0, -96(%rbp) # 16-byte Spill 332 movq -168(%rbp), %rbx # 8-byte Reload 333 movaps (%rax,%rbx,4), %xmm0 334 movaps %xmm0, -112(%rbp) # 16-byte Spill 335 movq -160(%rbp), %rdi # 8-byte Reload 336 movaps (%rax,%rdi,4), %xmm0 337 movaps %xmm0, -128(%rbp) # 16-byte Spill 338 leaq (%rax,%rcx,4), %r8 339 leaq (%rax,%rsi,4), %rcx 340 leaq (%rax,%rbx,4), %rsi 341 leaq (%rax,%rdi,4), %rax 342 movq -72(%rbp), %r9 # 8-byte Reload 343 movl $0, %r10d 344 .p2align 4, 0x90 345.LBB2_5: # %vector.ph 346 # Parent Loop BB2_1 Depth=1 347 # Parent Loop BB2_2 Depth=2 348 # Parent Loop BB2_3 Depth=3 349 # Parent Loop BB2_4 Depth=4 350 # => This Inner Loop Header: Depth=5 351 movss (%rdx,%r10,4), %xmm0 # xmm0 = mem[0],zero,zero,zero 352 shufps $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0] 353 movaps -240(%r9), %xmm1 354 mulps %xmm0, %xmm1 355 addps %xmm1, %xmm15 356 movaps -224(%r9), %xmm1 357 mulps %xmm0, %xmm1 358 addps %xmm1, %xmm14 359 movaps -208(%r9), %xmm1 360 mulps %xmm0, %xmm1 361 addps %xmm1, %xmm13 362 movaps -192(%r9), %xmm1 363 mulps %xmm0, %xmm1 364 addps %xmm1, %xmm12 365 movaps -176(%r9), %xmm1 366 mulps %xmm0, %xmm1 367 addps %xmm1, %xmm11 368 movaps -160(%r9), %xmm1 369 mulps %xmm0, %xmm1 370 addps %xmm1, %xmm10 371 movaps -144(%r9), %xmm1 372 mulps %xmm0, %xmm1 373 addps %xmm1, %xmm9 374 movaps -128(%r9), %xmm1 375 mulps %xmm0, %xmm1 376 addps %xmm1, %xmm8 377 movaps -112(%r9), %xmm1 378 mulps %xmm0, %xmm1 379 addps %xmm1, %xmm7 380 movaps -96(%r9), %xmm1 381 mulps %xmm0, %xmm1 382 addps %xmm1, %xmm6 383 movaps -80(%r9), %xmm1 384 mulps %xmm0, %xmm1 385 addps %xmm1, %xmm5 386 movaps -64(%r9), %xmm1 387 mulps %xmm0, %xmm1 388 addps %xmm1, %xmm4 389 movaps -48(%r9), %xmm1 390 mulps %xmm0, %xmm1 391 addps %xmm1, %xmm3 392 movaps -32(%r9), %xmm1 393 mulps %xmm0, %xmm1 394 movaps -96(%rbp), %xmm2 # 16-byte Reload 395 addps %xmm1, %xmm2 396 movaps %xmm2, -96(%rbp) # 16-byte Spill 397 movaps -16(%r9), %xmm1 398 mulps %xmm0, %xmm1 399 movaps -112(%rbp), %xmm2 # 16-byte Reload 400 addps %xmm1, %xmm2 401 movaps %xmm2, -112(%rbp) # 16-byte Spill 402 mulps (%r9), %xmm0 403 movaps -128(%rbp), %xmm1 # 16-byte Reload 404 addps %xmm0, %xmm1 405 movaps %xmm1, -128(%rbp) # 16-byte Spill 406 addq $1, %r10 407 addq $6144, %r9 # imm = 0x1800 408 cmpq $64, %r10 409 jne .LBB2_5 410# %bb.6: # %polly.loop_exit34 411 # in Loop: Header=BB2_4 Depth=4 412 movq -368(%rbp), %rdi # 8-byte Reload 413 movaps %xmm15, (%rdi) 414 movq -360(%rbp), %rdi # 8-byte Reload 415 movaps %xmm14, (%rdi) 416 movq -352(%rbp), %rdi # 8-byte Reload 417 movaps %xmm13, (%rdi) 418 movq -344(%rbp), %rdi # 8-byte Reload 419 movaps %xmm12, (%rdi) 420 movq -336(%rbp), %rdi # 8-byte Reload 421 movaps %xmm11, (%rdi) 422 movq -328(%rbp), %rdi # 8-byte Reload 423 movaps %xmm10, (%rdi) 424 movq -320(%rbp), %rdi # 8-byte Reload 425 movaps %xmm9, (%rdi) 426 movq -312(%rbp), %rdi # 8-byte Reload 427 movaps %xmm8, (%rdi) 428 movq -304(%rbp), %rdi # 8-byte Reload 429 movaps %xmm7, (%rdi) 430 movq -296(%rbp), %rdi # 8-byte Reload 431 movaps %xmm6, (%rdi) 432 movq -288(%rbp), %rdi # 8-byte Reload 433 movaps %xmm5, (%rdi) 434 movq -280(%rbp), %rdi # 8-byte Reload 435 movaps %xmm4, (%rdi) 436 movaps %xmm3, (%r8) 437 movaps -96(%rbp), %xmm0 # 16-byte Reload 438 movaps %xmm0, (%rcx) 439 movaps -112(%rbp), %xmm0 # 16-byte Reload 440 movaps %xmm0, (%rsi) 441 movaps -128(%rbp), %xmm0 # 16-byte Reload 442 movaps %xmm0, (%rax) 443 movq -376(%rbp), %rax # 8-byte Reload 444 addq $1, %rax 445 addq $6144, %rdx # imm = 0x1800 446 cmpq -64(%rbp), %rax # 8-byte Folded Reload 447 movq -272(%rbp), %rdi # 8-byte Reload 448 jne .LBB2_4 449# %bb.7: # %polly.loop_exit28 450 # in Loop: Header=BB2_3 Depth=3 451 movq -144(%rbp), %rax # 8-byte Reload 452 addq $64, %rax 453 addq $393216, -72(%rbp) # 8-byte Folded Spill 454 # imm = 0x60000 455 movq -152(%rbp), %rdx # 8-byte Reload 456 addq $256, %rdx # imm = 0x100 457 cmpq $1536, %rax # imm = 0x600 458 jb .LBB2_3 459# %bb.8: # %polly.loop_exit22 460 # in Loop: Header=BB2_2 Depth=2 461 addq $64, %rdi 462 movq -136(%rbp), %rax # 8-byte Reload 463 addq $256, %rax # imm = 0x100 464 cmpq $1536, %rdi # imm = 0x600 465 jb .LBB2_2 466# %bb.9: # %polly.loop_exit16 467 # in Loop: Header=BB2_1 Depth=1 468 movq -48(%rbp), %rax # 8-byte Reload 469 movq %rax, %rcx 470 addq $64, %rcx 471 addq $64, -64(%rbp) # 8-byte Folded Spill 472 addq $393216, -56(%rbp) # 8-byte Folded Spill 473 # imm = 0x60000 474 movq %rcx, %rax 475 movq %rcx, -48(%rbp) # 8-byte Spill 476 cmpq $1536, %rcx # imm = 0x600 477 jb .LBB2_1 478# %bb.10: # %polly.exiting 479 xorl %eax, %eax 480 addq $344, %rsp # imm = 0x158 481 popq %rbx 482 popq %r12 483 popq %r13 484 popq %r14 485 popq %r15 486 popq %rbp 487 .cfi_def_cfa %rsp, 8 488 retq 489.Lfunc_end2: 490 .size main, .Lfunc_end2-main 491 .cfi_endproc 492 # -- End function 493 .type A,@object # @A 494 .comm A,9437184,16 495 .type B,@object # @B 496 .comm B,9437184,16 497 .type .L.str,@object # @.str 498 .section .rodata.str1.1,"aMS",@progbits,1 499.L.str: 500 .asciz "%lf " 501 .size .L.str, 5 502 503 .type C,@object # @C 504 .comm C,9437184,16 505 506 .ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)" 507 .section ".note.GNU-stack","",@progbits 508