1 .text 2 .file "matmul.c" 3 .section .rodata.cst8,"aM",@progbits,8 4 .p2align 3 # -- Begin function init_array 5.LCPI0_0: 6 .quad 4602678819172646912 # double 0.5 7 .text 8 .globl init_array 9 .p2align 4, 0x90 10 .type init_array,@function 11init_array: # @init_array 12 .cfi_startproc 13# %bb.0: # %entry 14 pushq %rbp 15 .cfi_def_cfa_offset 16 16 .cfi_offset %rbp, -16 17 movq %rsp, %rbp 18 .cfi_def_cfa_register %rbp 19 leaq B(%rip), %rax 20 leaq A(%rip), %rcx 21 xorl %r8d, %r8d 22 movsd .LCPI0_0(%rip), %xmm0 # xmm0 = mem[0],zero 23 xorl %r9d, %r9d 24 .p2align 4, 0x90 25.LBB0_1: # %for.cond1.preheader 26 # =>This Loop Header: Depth=1 27 # Child Loop BB0_2 Depth 2 28 movl $1, %edi 29 xorl %edx, %edx 30 .p2align 4, 0x90 31.LBB0_2: # %for.body3 32 # Parent Loop BB0_1 Depth=1 33 # => This Inner Loop Header: Depth=2 34 movl %edx, %esi 35 andl $1022, %esi # imm = 0x3FE 36 orl $1, %esi 37 xorps %xmm1, %xmm1 38 cvtsi2sdl %esi, %xmm1 39 mulsd %xmm0, %xmm1 40 cvtsd2ss %xmm1, %xmm1 41 movss %xmm1, -4(%rcx,%rdi,4) 42 movss %xmm1, -4(%rax,%rdi,4) 43 leal (%r9,%rdx), %esi 44 andl $1023, %esi # imm = 0x3FF 45 addl $1, %esi 46 xorps %xmm1, %xmm1 47 cvtsi2sdl %esi, %xmm1 48 mulsd %xmm0, %xmm1 49 cvtsd2ss %xmm1, %xmm1 50 movss %xmm1, (%rcx,%rdi,4) 51 movss %xmm1, (%rax,%rdi,4) 52 addq $2, %rdi 53 addl %r8d, %edx 54 cmpq $1537, %rdi # imm = 0x601 55 jne .LBB0_2 56# %bb.3: # %for.inc17 57 # in Loop: Header=BB0_1 Depth=1 58 addq $1, %r9 59 addq $6144, %rax # imm = 0x1800 60 addq $6144, %rcx # imm = 0x1800 61 addl $2, %r8d 62 cmpq $1536, %r9 # imm = 0x600 63 jne .LBB0_1 64# %bb.4: # %for.end19 65 popq %rbp 66 .cfi_def_cfa %rsp, 8 67 retq 68.Lfunc_end0: 69 .size init_array, .Lfunc_end0-init_array 70 .cfi_endproc 71 # -- End function 72 .globl print_array # -- Begin function print_array 73 .p2align 4, 0x90 74 .type print_array,@function 75print_array: # @print_array 76 .cfi_startproc 77# %bb.0: # %entry 78 pushq %rbp 79 .cfi_def_cfa_offset 16 80 .cfi_offset %rbp, -16 81 movq %rsp, %rbp 82 .cfi_def_cfa_register %rbp 83 pushq %r15 84 pushq %r14 85 pushq %r13 86 pushq %r12 87 pushq %rbx 88 pushq %rax 89 .cfi_offset %rbx, -56 90 .cfi_offset %r12, -48 91 .cfi_offset %r13, -40 92 .cfi_offset %r14, -32 93 .cfi_offset %r15, -24 94 leaq C(%rip), %r13 95 xorl %eax, %eax 96 movl $3435973837, %r12d # imm = 0xCCCCCCCD 97 leaq .L.str(%rip), %r14 98 .p2align 4, 0x90 99.LBB1_1: # %for.cond1.preheader 100 # =>This Loop Header: Depth=1 101 # Child Loop BB1_2 Depth 2 102 movq %rax, -48(%rbp) # 8-byte Spill 103 movq stdout(%rip), %rsi 104 xorl %ebx, %ebx 105 .p2align 4, 0x90 106.LBB1_2: # %for.body3 107 # Parent Loop BB1_1 Depth=1 108 # => This Inner Loop Header: Depth=2 109 movl %ebx, %eax 110 imulq %r12, %rax 111 shrq $38, %rax 112 leal (%rax,%rax,4), %r15d 113 shll $4, %r15d 114 addl $79, %r15d 115 movss (%r13,%rbx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero 116 cvtss2sd %xmm0, %xmm0 117 movb $1, %al 118 movq %rsi, %rdi 119 movq %r14, %rsi 120 callq fprintf 121 cmpl %ebx, %r15d 122 jne .LBB1_4 123# %bb.3: # %if.then 124 # in Loop: Header=BB1_2 Depth=2 125 movq stdout(%rip), %rsi 126 movl $10, %edi 127 callq fputc@PLT 128.LBB1_4: # %for.inc 129 # in Loop: Header=BB1_2 Depth=2 130 addq $1, %rbx 131 movq stdout(%rip), %rsi 132 cmpq $1536, %rbx # imm = 0x600 133 jne .LBB1_2 134# %bb.5: # %for.end 135 # in Loop: Header=BB1_1 Depth=1 136 movl $10, %edi 137 callq fputc@PLT 138 movq -48(%rbp), %rax # 8-byte Reload 139 addq $1, %rax 140 addq $6144, %r13 # imm = 0x1800 141 cmpq $1536, %rax # imm = 0x600 142 jne .LBB1_1 143# %bb.6: # %for.end12 144 addq $8, %rsp 145 popq %rbx 146 popq %r12 147 popq %r13 148 popq %r14 149 popq %r15 150 popq %rbp 151 .cfi_def_cfa %rsp, 8 152 retq 153.Lfunc_end1: 154 .size print_array, .Lfunc_end1-print_array 155 .cfi_endproc 156 # -- End function 157 .globl main # -- Begin function main 158 .p2align 4, 0x90 159 .type main,@function 160main: # @main 161 .cfi_startproc 162# %bb.0: # %entry 163 pushq %rbp 164 .cfi_def_cfa_offset 16 165 .cfi_offset %rbp, -16 166 movq %rsp, %rbp 167 .cfi_def_cfa_register %rbp 168 callq init_array 169 leaq A(%rip), %rax 170 xorl %r10d, %r10d 171 leaq B(%rip), %r8 172 leaq C(%rip), %r9 173 .p2align 4, 0x90 174.LBB2_1: # %for.cond1.preheader 175 # =>This Loop Header: Depth=1 176 # Child Loop BB2_2 Depth 2 177 # Child Loop BB2_3 Depth 3 178 movq %r8, %rsi 179 xorl %edx, %edx 180 .p2align 4, 0x90 181.LBB2_2: # %for.body3 182 # Parent Loop BB2_1 Depth=1 183 # => This Loop Header: Depth=2 184 # Child Loop BB2_3 Depth 3 185 leaq (%r10,%r10,2), %rcx 186 shlq $11, %rcx 187 addq %r9, %rcx 188 leaq (%rcx,%rdx,4), %r11 189 movl $0, (%rcx,%rdx,4) 190 xorps %xmm0, %xmm0 191 movl $2, %ecx 192 movq %rsi, %rdi 193 .p2align 4, 0x90 194.LBB2_3: # %for.body8 195 # Parent Loop BB2_1 Depth=1 196 # Parent Loop BB2_2 Depth=2 197 # => This Inner Loop Header: Depth=3 198 movss -8(%rax,%rcx,4), %xmm1 # xmm1 = mem[0],zero,zero,zero 199 mulss (%rdi), %xmm1 200 movss -4(%rax,%rcx,4), %xmm2 # xmm2 = mem[0],zero,zero,zero 201 addss %xmm0, %xmm1 202 mulss 6144(%rdi), %xmm2 203 addss %xmm1, %xmm2 204 movss (%rax,%rcx,4), %xmm0 # xmm0 = mem[0],zero,zero,zero 205 mulss 12288(%rdi), %xmm0 206 addss %xmm2, %xmm0 207 addq $3, %rcx 208 addq $18432, %rdi # imm = 0x4800 209 cmpq $1538, %rcx # imm = 0x602 210 jne .LBB2_3 211# %bb.4: # %for.inc25 212 # in Loop: Header=BB2_2 Depth=2 213 movss %xmm0, (%r11) 214 addq $1, %rdx 215 addq $4, %rsi 216 cmpq $1536, %rdx # imm = 0x600 217 jne .LBB2_2 218# %bb.5: # %for.inc28 219 # in Loop: Header=BB2_1 Depth=1 220 addq $1, %r10 221 addq $6144, %rax # imm = 0x1800 222 cmpq $1536, %r10 # imm = 0x600 223 jne .LBB2_1 224# %bb.6: # %for.end30 225 xorl %eax, %eax 226 popq %rbp 227 .cfi_def_cfa %rsp, 8 228 retq 229.Lfunc_end2: 230 .size main, .Lfunc_end2-main 231 .cfi_endproc 232 # -- End function 233 .type A,@object # @A 234 .comm A,9437184,16 235 .type B,@object # @B 236 .comm B,9437184,16 237 .type .L.str,@object # @.str 238 .section .rodata.str1.1,"aMS",@progbits,1 239.L.str: 240 .asciz "%lf " 241 .size .L.str, 5 242 243 .type C,@object # @C 244 .comm C,9437184,16 245 246 .ident "clang version 8.0.0 (trunk 342834) (llvm/trunk 342856)" 247 .section ".note.GNU-stack","",@progbits 248