• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512f | FileCheck %s
3
4define void @eggs(<8 x double>* %arg, <8 x double>* %arg1, <8 x double>* %arg2, <8 x double>* %arg3, <8 x double>* %arg4, <8 x double>* %arg5, i64 %arg6, i64 %arg7, i64 %arg8, i64 %arg9, i64 %arg10, i64 %arg11, i64 %arg12, double* %arg13, double* %arg14) nounwind {
5; CHECK-LABEL: eggs:
6; CHECK:       ## %bb.0: ## %bb
7; CHECK-NEXT:    pushq %r15
8; CHECK-NEXT:    pushq %r14
9; CHECK-NEXT:    pushq %r13
10; CHECK-NEXT:    pushq %r12
11; CHECK-NEXT:    pushq %rbx
12; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
13; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r10
14; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r11
15; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r12
16; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r15
17; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r14
18; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rbx
19; CHECK-NEXT:    leaq (%rbx,%r14,8), %r14
20; CHECK-NEXT:    leaq (%rbx,%r15,8), %r15
21; CHECK-NEXT:    vxorpd %xmm0, %xmm0, %xmm0
22; CHECK-NEXT:    xorl %ebx, %ebx
23; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %r13
24; CHECK-NEXT:    addq %r12, %r13
25; CHECK-NEXT:    addq {{[0-9]+}}(%rsp), %r12
26; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
27; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
28; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
29; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
30; CHECK-NEXT:    vxorpd %xmm5, %xmm5, %xmm5
31; CHECK-NEXT:    .p2align 4, 0x90
32; CHECK-NEXT:  LBB0_1: ## %bb15
33; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
34; CHECK-NEXT:    vmovupd (%rax,%r11,8), %zmm6
35; CHECK-NEXT:    vmovupd (%rax,%r13,8), %zmm7
36; CHECK-NEXT:    vmovupd (%rax,%r12,8), %zmm8
37; CHECK-NEXT:    vbroadcastsd (%r15,%rbx,8), %zmm9
38; CHECK-NEXT:    vfmadd231pd {{.*#+}} zmm0 = (zmm6 * zmm9) + zmm0
39; CHECK-NEXT:    vfmadd231pd {{.*#+}} zmm1 = (zmm7 * zmm9) + zmm1
40; CHECK-NEXT:    vfmadd231pd {{.*#+}} zmm2 = (zmm8 * zmm9) + zmm2
41; CHECK-NEXT:    vbroadcastsd (%r14,%rbx,8), %zmm9
42; CHECK-NEXT:    vfmadd231pd {{.*#+}} zmm3 = (zmm9 * zmm6) + zmm3
43; CHECK-NEXT:    vfmadd231pd {{.*#+}} zmm4 = (zmm9 * zmm7) + zmm4
44; CHECK-NEXT:    vfmadd231pd {{.*#+}} zmm5 = (zmm8 * zmm9) + zmm5
45; CHECK-NEXT:    incq %rbx
46; CHECK-NEXT:    cmpq %rbx, %r10
47; CHECK-NEXT:    jne LBB0_1
48; CHECK-NEXT:  ## %bb.2: ## %bb51
49; CHECK-NEXT:    vmovapd %zmm0, (%rdi)
50; CHECK-NEXT:    vmovapd %zmm1, (%rsi)
51; CHECK-NEXT:    vmovapd %zmm2, (%rdx)
52; CHECK-NEXT:    vmovapd %zmm3, (%rcx)
53; CHECK-NEXT:    vmovapd %zmm4, (%r8)
54; CHECK-NEXT:    vmovapd %zmm5, (%r9)
55; CHECK-NEXT:    popq %rbx
56; CHECK-NEXT:    popq %r12
57; CHECK-NEXT:    popq %r13
58; CHECK-NEXT:    popq %r14
59; CHECK-NEXT:    popq %r15
60; CHECK-NEXT:    vzeroupper
61; CHECK-NEXT:    retq
62bb:
63  br label %bb15
64
65bb15:                                             ; preds = %bb15, %bb
66  %tmp = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp38, %bb15 ]
67  %tmp16 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp39, %bb15 ]
68  %tmp17 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp40, %bb15 ]
69  %tmp18 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp46, %bb15 ]
70  %tmp19 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp47, %bb15 ]
71  %tmp20 = phi <8 x double> [ zeroinitializer, %bb ], [ %tmp48, %bb15 ]
72  %tmp21 = phi i64 [ 0, %bb ], [ %tmp49, %bb15 ]
73  %tmp22 = getelementptr inbounds double, double* %arg14, i64 %arg11
74  %tmp23 = bitcast double* %tmp22 to <8 x double>*
75  %tmp24 = load <8 x double>, <8 x double>* %tmp23, align 8
76  %tmp25 = add i64 %arg10, %arg6
77  %tmp26 = getelementptr inbounds double, double* %arg14, i64 %tmp25
78  %tmp27 = bitcast double* %tmp26 to <8 x double>*
79  %tmp28 = load <8 x double>, <8 x double>* %tmp27, align 8
80  %tmp29 = add i64 %arg10, %arg7
81  %tmp30 = getelementptr inbounds double, double* %arg14, i64 %tmp29
82  %tmp31 = bitcast double* %tmp30 to <8 x double>*
83  %tmp32 = load <8 x double>, <8 x double>* %tmp31, align 8
84  %tmp33 = add i64 %tmp21, %arg8
85  %tmp34 = getelementptr inbounds double, double* %arg13, i64 %tmp33
86  %tmp35 = load double, double* %tmp34, align 8
87  %tmp36 = insertelement <8 x double> undef, double %tmp35, i32 0
88  %tmp37 = shufflevector <8 x double> %tmp36, <8 x double> undef, <8 x i32> zeroinitializer
89  %tmp38 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp24, <8 x double> %tmp37, <8 x double> %tmp)
90  %tmp39 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp28, <8 x double> %tmp37, <8 x double> %tmp16)
91  %tmp40 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp32, <8 x double> %tmp37, <8 x double> %tmp17)
92  %tmp41 = add i64 %tmp21, %arg9
93  %tmp42 = getelementptr inbounds double, double* %arg13, i64 %tmp41
94  %tmp43 = load double, double* %tmp42, align 8
95  %tmp44 = insertelement <8 x double> undef, double %tmp43, i32 0
96  %tmp45 = shufflevector <8 x double> %tmp44, <8 x double> undef, <8 x i32> zeroinitializer
97  %tmp46 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp24, <8 x double> %tmp45, <8 x double> %tmp18)
98  %tmp47 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp28, <8 x double> %tmp45, <8 x double> %tmp19)
99  %tmp48 = call <8 x double> @llvm.fmuladd.v8f64(<8 x double> %tmp32, <8 x double> %tmp45, <8 x double> %tmp20)
100  %tmp49 = add nuw nsw i64 %tmp21, 1
101  %tmp50 = icmp eq i64 %tmp49, %arg12
102  br i1 %tmp50, label %bb51, label %bb15
103
104bb51:                                             ; preds = %bb15
105  store <8 x double> %tmp38, <8 x double>* %arg
106  store <8 x double> %tmp39, <8 x double>* %arg1
107  store <8 x double> %tmp40, <8 x double>* %arg2
108  store <8 x double> %tmp46, <8 x double>* %arg3
109  store <8 x double> %tmp47, <8 x double>* %arg4
110  store <8 x double> %tmp48, <8 x double>* %arg5
111  ret void
112}
113
114declare <8 x double> @llvm.fmuladd.v8f64(<8 x double>, <8 x double>, <8 x double>)
115