• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck -check-prefix=CHECK-P9 %s
3; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-postra-bias-addi=false < %s |\
4; RUN:   FileCheck -check-prefix=CHECK-P9-NO-HEURISTIC %s
5
6%_type_of_scalars = type <{ [16 x i8], double, [152 x i8] }>
7%_elem_type_of_x = type <{ double }>
8%_elem_type_of_a = type <{ double }>
9
10@scalars = common local_unnamed_addr global %_type_of_scalars zeroinitializer, align 16
11
12define void @test([0 x %_elem_type_of_x]* noalias %.x, [0 x %_elem_type_of_a]* %.a, i64* noalias %.n) {
13; CHECK-P9-LABEL: test:
14; CHECK-P9:       # %bb.0: # %entry
15; CHECK-P9-NEXT:    ld 5, 0(5)
16; CHECK-P9-NEXT:    addis 6, 2, scalars@toc@ha
17; CHECK-P9-NEXT:    addi 6, 6, scalars@toc@l
18; CHECK-P9-NEXT:    addi 6, 6, 16
19; CHECK-P9-NEXT:    rldicr 5, 5, 0, 58
20; CHECK-P9-NEXT:    addi 5, 5, -32
21; CHECK-P9-NEXT:    lxvdsx 0, 0, 6
22; CHECK-P9-NEXT:    rldicl 5, 5, 59, 5
23; CHECK-P9-NEXT:    addi 5, 5, 1
24; CHECK-P9-NEXT:    mtctr 5
25; CHECK-P9-NEXT:    .p2align 4
26; CHECK-P9-NEXT:  .LBB0_1: # %vector.body
27; CHECK-P9-NEXT:    #
28; CHECK-P9-NEXT:    lxv 1, 16(4)
29; CHECK-P9-NEXT:    lxv 2, 0(4)
30; CHECK-P9-NEXT:    lxv 3, 48(4)
31; CHECK-P9-NEXT:    lxv 4, 32(4)
32; CHECK-P9-NEXT:    xvmuldp 2, 2, 0
33; CHECK-P9-NEXT:    lxv 5, 240(4)
34; CHECK-P9-NEXT:    lxv 6, 224(4)
35; CHECK-P9-NEXT:    xvmuldp 1, 1, 0
36; CHECK-P9-NEXT:    xvmuldp 4, 4, 0
37; CHECK-P9-NEXT:    xvmuldp 3, 3, 0
38; CHECK-P9-NEXT:    xvmuldp 5, 5, 0
39; CHECK-P9-NEXT:    addi 4, 4, 256
40; CHECK-P9-NEXT:    xvmuldp 6, 6, 0
41; CHECK-P9-NEXT:    stxv 1, 16(3)
42; CHECK-P9-NEXT:    stxv 2, 0(3)
43; CHECK-P9-NEXT:    stxv 3, 48(3)
44; CHECK-P9-NEXT:    stxv 4, 32(3)
45; CHECK-P9-NEXT:    stxv 5, 240(3)
46; CHECK-P9-NEXT:    stxv 6, 224(3)
47; CHECK-P9-NEXT:    addi 3, 3, 256
48; CHECK-P9-NEXT:    bdnz .LBB0_1
49; CHECK-P9-NEXT:  # %bb.2: # %return.block
50; CHECK-P9-NEXT:    blr
51;
52; CHECK-P9-NO-HEURISTIC-LABEL: test:
53; CHECK-P9-NO-HEURISTIC:       # %bb.0: # %entry
54; CHECK-P9-NO-HEURISTIC-NEXT:    ld 5, 0(5)
55; CHECK-P9-NO-HEURISTIC-NEXT:    addis 6, 2, scalars@toc@ha
56; CHECK-P9-NO-HEURISTIC-NEXT:    addi 6, 6, scalars@toc@l
57; CHECK-P9-NO-HEURISTIC-NEXT:    rldicr 5, 5, 0, 58
58; CHECK-P9-NO-HEURISTIC-NEXT:    addi 6, 6, 16
59; CHECK-P9-NO-HEURISTIC-NEXT:    addi 5, 5, -32
60; CHECK-P9-NO-HEURISTIC-NEXT:    lxvdsx 0, 0, 6
61; CHECK-P9-NO-HEURISTIC-NEXT:    rldicl 5, 5, 59, 5
62; CHECK-P9-NO-HEURISTIC-NEXT:    addi 5, 5, 1
63; CHECK-P9-NO-HEURISTIC-NEXT:    mtctr 5
64; CHECK-P9-NO-HEURISTIC-NEXT:    .p2align 4
65; CHECK-P9-NO-HEURISTIC-NEXT:  .LBB0_1: # %vector.body
66; CHECK-P9-NO-HEURISTIC-NEXT:    #
67; CHECK-P9-NO-HEURISTIC-NEXT:    lxv 1, 16(4)
68; CHECK-P9-NO-HEURISTIC-NEXT:    lxv 2, 0(4)
69; CHECK-P9-NO-HEURISTIC-NEXT:    lxv 3, 48(4)
70; CHECK-P9-NO-HEURISTIC-NEXT:    lxv 4, 32(4)
71; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 2, 2, 0
72; CHECK-P9-NO-HEURISTIC-NEXT:    lxv 5, 240(4)
73; CHECK-P9-NO-HEURISTIC-NEXT:    lxv 6, 224(4)
74; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 1, 1, 0
75; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 4, 4, 0
76; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 3, 3, 0
77; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 6, 6, 0
78; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 5, 5, 0
79; CHECK-P9-NO-HEURISTIC-NEXT:    addi 4, 4, 256
80; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 1, 16(3)
81; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 2, 0(3)
82; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 3, 48(3)
83; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 4, 32(3)
84; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 5, 240(3)
85; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 6, 224(3)
86; CHECK-P9-NO-HEURISTIC-NEXT:    addi 3, 3, 256
87; CHECK-P9-NO-HEURISTIC-NEXT:    bdnz .LBB0_1
88; CHECK-P9-NO-HEURISTIC-NEXT:  # %bb.2: # %return.block
89; CHECK-P9-NO-HEURISTIC-NEXT:    blr
90entry:
91  %x_rvo_based_addr_3 = getelementptr inbounds [0 x %_elem_type_of_x], [0 x %_elem_type_of_x]* %.x, i64 0, i64 -1
92  %a_rvo_based_addr_5 = getelementptr inbounds [0 x %_elem_type_of_a], [0 x %_elem_type_of_a]* %.a, i64 0, i64 -1
93  %_val_n_ = load i64, i64* %.n, align 8
94  %_val_c1_ = load double, double* getelementptr inbounds (%_type_of_scalars, %_type_of_scalars* @scalars, i64 0, i32 1), align 16
95  %n.vec = and i64 %_val_n_, -32
96  %broadcast.splatinsert26 = insertelement <4 x double> undef, double %_val_c1_, i32 0
97  %broadcast.splat27 = shufflevector <4 x double> %broadcast.splatinsert26, <4 x double> undef, <4 x i32> zeroinitializer
98  br label %vector.body
99
100vector.body:
101  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
102   %offset.idx = or i64 %index, 1
103  %0 = getelementptr %_elem_type_of_x, %_elem_type_of_x* %x_rvo_based_addr_3, i64 %offset.idx, i32 0
104  %1 = getelementptr %_elem_type_of_a, %_elem_type_of_a* %a_rvo_based_addr_5, i64 %offset.idx, i32 0
105  %2 = bitcast double* %1 to <4 x double>*
106  %wide.load = load <4 x double>, <4 x double>* %2, align 8
107  %3 = getelementptr double, double* %1, i64 4
108  %4 = bitcast double* %3 to <4 x double>*
109  %wide.load19 = load <4 x double>, <4 x double>* %4, align 8
110  %5 = getelementptr double, double* %1, i64 8
111  %6 = bitcast double* %5 to <4 x double>*
112  %wide.load20 = load <4 x double>, <4 x double>* %6, align 8
113  %7 = getelementptr double, double* %1, i64 12
114  %8 = bitcast double* %7 to <4 x double>*
115  %wide.load21 = load <4 x double>, <4 x double>* %8, align 8
116  %9 = getelementptr double, double* %1, i64 16
117  %10 = bitcast double* %9 to <4 x double>*
118  %wide.load22 = load <4 x double>, <4 x double>* %10, align 8
119  %11 = getelementptr double, double* %1, i64 20
120  %12 = bitcast double* %11 to <4 x double>*
121  %wide.load23 = load <4 x double>, <4 x double>* %12, align 8
122  %13 = getelementptr double, double* %1, i64 24
123  %14 = bitcast double* %13 to <4 x double>*
124  %wide.load24 = load <4 x double>, <4 x double>* %14, align 8
125  %15 = getelementptr double, double* %1, i64 28
126  %16 = bitcast double* %15 to <4 x double>*
127  %wide.load25 = load <4 x double>, <4 x double>* %16, align 8
128  %17 = fmul fast <4 x double> %wide.load, %broadcast.splat27
129  %18 = fmul fast <4 x double> %wide.load19, %broadcast.splat27
130  %19 = fmul fast <4 x double> %wide.load20, %broadcast.splat27
131  %20 = fmul fast <4 x double> %wide.load21, %broadcast.splat27
132  %21 = fmul fast <4 x double> %wide.load22, %broadcast.splat27
133  %22 = fmul fast <4 x double> %wide.load23, %broadcast.splat27
134  %23 = fmul fast <4 x double> %wide.load24, %broadcast.splat27
135  %24 = fmul fast <4 x double> %wide.load25, %broadcast.splat27
136  %25 = bitcast double* %0 to <4 x double>*
137  store <4 x double> %17, <4 x double>* %25, align 8
138  %26 = getelementptr double, double* %0, i64 4
139  %27 = bitcast double* %26 to <4 x double>*
140  store <4 x double> %18, <4 x double>* %27, align 8
141  %28 = getelementptr double, double* %0, i64 8
142  %29 = bitcast double* %28 to <4 x double>*
143  %30 = getelementptr double, double* %0, i64 12
144  %31 = bitcast double* %30 to <4 x double>*
145  %32 = getelementptr double, double* %0, i64 16
146  %33 = bitcast double* %32 to <4 x double>*
147  %34 = getelementptr double, double* %0, i64 20
148  %35 = bitcast double* %34 to <4 x double>*
149  %36 = getelementptr double, double* %0, i64 24
150  %37 = bitcast double* %36 to <4 x double>*
151  %38 = getelementptr double, double* %0, i64 28
152  %39 = bitcast double* %38 to <4 x double>*
153  store <4 x double> %24, <4 x double>* %39, align 8
154  %index.next = add i64 %index, 32
155  %cm = icmp eq i64 %index.next, %n.vec
156  br i1 %cm, label %return.block, label %vector.body
157
158return.block:
159  ret void
160}
161
162