• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
4; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
7; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
8
9; assemble_acc
10declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
11define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) {
12; CHECK-LABEL: ass_acc:
13; CHECK:       # %bb.0: # %entry
14; CHECK-NEXT:    vmr v3, v2
15; CHECK-NEXT:    xxlor vs0, v2, v2
16; CHECK-NEXT:    xxlor vs1, v3, v3
17; CHECK-NEXT:    xxlor vs2, v2, v2
18; CHECK-NEXT:    xxlor vs3, v3, v3
19; CHECK-NEXT:    stxv vs0, 48(r3)
20; CHECK-NEXT:    stxv vs1, 32(r3)
21; CHECK-NEXT:    stxv vs2, 16(r3)
22; CHECK-NEXT:    stxv vs3, 0(r3)
23; CHECK-NEXT:    blr
24;
25; CHECK-BE-LABEL: ass_acc:
26; CHECK-BE:       # %bb.0: # %entry
27; CHECK-BE-NEXT:    vmr v3, v2
28; CHECK-BE-NEXT:    xxlor vs0, v2, v2
29; CHECK-BE-NEXT:    xxlor vs1, v3, v3
30; CHECK-BE-NEXT:    xxlor vs2, v2, v2
31; CHECK-BE-NEXT:    xxlor vs3, v3, v3
32; CHECK-BE-NEXT:    stxv vs1, 16(r3)
33; CHECK-BE-NEXT:    stxv vs0, 0(r3)
34; CHECK-BE-NEXT:    stxv vs3, 48(r3)
35; CHECK-BE-NEXT:    stxv vs2, 32(r3)
36; CHECK-BE-NEXT:    blr
37entry:
38  %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
39  store <512 x i1> %0, <512 x i1>* %ptr, align 64
40  ret void
41}
42
43; assemble_pair
44declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>)
45define void @ass_pair(<256 x i1>* %ptr, <16 x i8> %vc) {
46; CHECK-LABEL: ass_pair:
47; CHECK:       # %bb.0: # %entry
48; CHECK-NEXT:    vmr v3, v2
49; CHECK-NEXT:    stxv v2, 16(r3)
50; CHECK-NEXT:    stxv v3, 0(r3)
51; CHECK-NEXT:    blr
52;
53; CHECK-BE-LABEL: ass_pair:
54; CHECK-BE:       # %bb.0: # %entry
55; CHECK-BE-NEXT:    vmr v3, v2
56; CHECK-BE-NEXT:    stxv v2, 16(r3)
57; CHECK-BE-NEXT:    stxv v2, 0(r3)
58; CHECK-BE-NEXT:    blr
59entry:
60  %0 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %vc, <16 x i8> %vc)
61  store <256 x i1> %0, <256 x i1>* %ptr, align 32
62  ret void
63}
64
65; xxmtacc
66declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>)
67define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
68; CHECK-LABEL: int_xxmtacc:
69; CHECK:       # %bb.0: # %entry
70; CHECK-NEXT:    vmr v3, v2
71; CHECK-NEXT:    xxlor vs0, v2, v2
72; CHECK-NEXT:    xxlor vs1, v3, v3
73; CHECK-NEXT:    xxlor vs2, v2, v2
74; CHECK-NEXT:    xxlor vs3, v3, v3
75; CHECK-NEXT:    xxmtacc acc0
76; CHECK-NEXT:    stxv vs0, 48(r3)
77; CHECK-NEXT:    stxv vs1, 32(r3)
78; CHECK-NEXT:    stxv vs2, 16(r3)
79; CHECK-NEXT:    stxv vs3, 0(r3)
80; CHECK-NEXT:    blr
81;
82; CHECK-BE-LABEL: int_xxmtacc:
83; CHECK-BE:       # %bb.0: # %entry
84; CHECK-BE-NEXT:    vmr v3, v2
85; CHECK-BE-NEXT:    xxlor vs0, v2, v2
86; CHECK-BE-NEXT:    xxlor vs1, v3, v3
87; CHECK-BE-NEXT:    xxlor vs2, v2, v2
88; CHECK-BE-NEXT:    xxlor vs3, v3, v3
89; CHECK-BE-NEXT:    xxmtacc acc0
90; CHECK-BE-NEXT:    stxv vs1, 16(r3)
91; CHECK-BE-NEXT:    stxv vs0, 0(r3)
92; CHECK-BE-NEXT:    stxv vs3, 48(r3)
93; CHECK-BE-NEXT:    stxv vs2, 32(r3)
94; CHECK-BE-NEXT:    blr
95entry:
96; One xxmtacc is generated from the call to assemble.acc then one xxmtacc is
97; generated from the call to xxmtacc then one xxmfacc is generated for the store
98  %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
99  %1 = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> %0)
100  store <512 x i1> %1, <512 x i1>* %ptr, align 64
101  ret void
102}
103
104; xxmfacc
105declare <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1>)
106define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) {
107; CHECK-LABEL: int_xxmfacc:
108; CHECK:       # %bb.0: # %entry
109; CHECK-NEXT:    vmr v3, v2
110; CHECK-NEXT:    xxlor vs0, v2, v2
111; CHECK-NEXT:    xxlor vs1, v3, v3
112; CHECK-NEXT:    xxlor vs2, v2, v2
113; CHECK-NEXT:    xxlor vs3, v3, v3
114; CHECK-NEXT:    stxv vs0, 48(r3)
115; CHECK-NEXT:    stxv vs1, 32(r3)
116; CHECK-NEXT:    stxv vs2, 16(r3)
117; CHECK-NEXT:    stxv vs3, 0(r3)
118; CHECK-NEXT:    blr
119;
120; CHECK-BE-LABEL: int_xxmfacc:
121; CHECK-BE:       # %bb.0: # %entry
122; CHECK-BE-NEXT:    vmr v3, v2
123; CHECK-BE-NEXT:    xxlor vs0, v2, v2
124; CHECK-BE-NEXT:    xxlor vs1, v3, v3
125; CHECK-BE-NEXT:    xxlor vs2, v2, v2
126; CHECK-BE-NEXT:    xxlor vs3, v3, v3
127; CHECK-BE-NEXT:    stxv vs1, 16(r3)
128; CHECK-BE-NEXT:    stxv vs0, 0(r3)
129; CHECK-BE-NEXT:    stxv vs3, 48(r3)
130; CHECK-BE-NEXT:    stxv vs2, 32(r3)
131; CHECK-BE-NEXT:    blr
132entry:
133; One xxmtacc is generated from the call to assemble.acc then one xxmfacc is
134; generated from the call to xxmfacc then one xxmfacc is generated for the store
135  %0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
136  %1 = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> %0)
137  store <512 x i1> %1, <512 x i1>* %ptr, align 64
138  ret void
139}
140
141; xxsetaccz
142declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
143define void @int_xxsetaccz(<512 x i1>* %ptr) {
144; CHECK-LABEL: int_xxsetaccz:
145; CHECK:       # %bb.0: # %entry
146; CHECK-NEXT:    xxsetaccz acc0
147; CHECK-NEXT:    xxmfacc acc0
148; CHECK-NEXT:    stxv vs0, 48(r3)
149; CHECK-NEXT:    stxv vs1, 32(r3)
150; CHECK-NEXT:    stxv vs2, 16(r3)
151; CHECK-NEXT:    stxv vs3, 0(r3)
152; CHECK-NEXT:    blr
153;
154; CHECK-BE-LABEL: int_xxsetaccz:
155; CHECK-BE:       # %bb.0: # %entry
156; CHECK-BE-NEXT:    xxsetaccz acc0
157; CHECK-BE-NEXT:    xxmfacc acc0
158; CHECK-BE-NEXT:    stxv vs1, 16(r3)
159; CHECK-BE-NEXT:    stxv vs0, 0(r3)
160; CHECK-BE-NEXT:    stxv vs3, 48(r3)
161; CHECK-BE-NEXT:    stxv vs2, 32(r3)
162; CHECK-BE-NEXT:    blr
163entry:
164  %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
165  store <512 x i1> %0, <512 x i1>* %ptr, align 64
166  ret void
167}
168
169; disassemble_acc
170declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
171define void @disass_acc(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <16 x i8>* %ptr4) {
172; CHECK-LABEL: disass_acc:
173; CHECK:       # %bb.0: # %entry
174; CHECK-NEXT:    xxsetaccz acc0
175; CHECK-NEXT:    xxmfacc acc0
176; CHECK-NEXT:    stxv vs3, 0(r3)
177; CHECK-NEXT:    stxv vs2, 0(r4)
178; CHECK-NEXT:    stxv vs1, 0(r5)
179; CHECK-NEXT:    stxv vs0, 0(r6)
180; CHECK-NEXT:    blr
181;
182; CHECK-BE-LABEL: disass_acc:
183; CHECK-BE:       # %bb.0: # %entry
184; CHECK-BE-NEXT:    xxsetaccz acc0
185; CHECK-BE-NEXT:    xxmfacc acc0
186; CHECK-BE-NEXT:    stxv vs0, 0(r3)
187; CHECK-BE-NEXT:    stxv vs1, 0(r4)
188; CHECK-BE-NEXT:    stxv vs2, 0(r5)
189; CHECK-BE-NEXT:    stxv vs3, 0(r6)
190; CHECK-BE-NEXT:    blr
191entry:
192  %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
193  %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0)
194  %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
195  %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
196  %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
197  %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3
198  store <16 x i8> %2, <16 x i8>* %ptr1, align 16
199  store <16 x i8> %3, <16 x i8>* %ptr2, align 16
200  store <16 x i8> %4, <16 x i8>* %ptr3, align 16
201  store <16 x i8> %5, <16 x i8>* %ptr4, align 16
202  ret void
203}
204
205; disassemble_pair
206declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>)
207define void @disass_pair(<256 x i1>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3) {
208; CHECK-LABEL: disass_pair:
209; CHECK:       # %bb.0: # %entry
210; CHECK-NEXT:    lxv vs1, 0(r3)
211; CHECK-NEXT:    lxv vs0, 16(r3)
212; CHECK-NEXT:    stxv vs1, 0(r4)
213; CHECK-NEXT:    stxv vs0, 0(r5)
214; CHECK-NEXT:    blr
215;
216; CHECK-BE-LABEL: disass_pair:
217; CHECK-BE:       # %bb.0: # %entry
218; CHECK-BE-NEXT:    lxv vs1, 16(r3)
219; CHECK-BE-NEXT:    lxv vs0, 0(r3)
220; CHECK-BE-NEXT:    stxv vs0, 0(r4)
221; CHECK-BE-NEXT:    stxv vs1, 0(r5)
222; CHECK-BE-NEXT:    blr
223entry:
224  %0 = load <256 x i1>, <256 x i1>* %ptr1, align 32
225  %1 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %0)
226  %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
227  %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
228  store <16 x i8> %2, <16 x i8>* %ptr2, align 16
229  store <16 x i8> %3, <16 x i8>* %ptr3, align 16
230  ret void
231}
232
233declare <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1>, <16 x i8>, <16 x i8>)
234define void @testBranch(<512 x i1>* %ptr, <16 x i8> %vc, i32 %val) {
235; CHECK-LABEL: testBranch:
236; CHECK:       # %bb.0: # %entry
237; CHECK-NEXT:    cmplwi r7, 0
238; CHECK-NEXT:    beq cr0, .LBB7_2
239; CHECK-NEXT:  # %bb.1: # %if.then
240; CHECK-NEXT:    xxsetaccz acc0
241; CHECK-NEXT:    b .LBB7_3
242; CHECK-NEXT:  .LBB7_2: # %if.else
243; CHECK-NEXT:    lxv vs1, 32(r3)
244; CHECK-NEXT:    lxv vs0, 48(r3)
245; CHECK-NEXT:    lxv vs3, 0(r3)
246; CHECK-NEXT:    lxv vs2, 16(r3)
247; CHECK-NEXT:    xxmtacc acc0
248; CHECK-NEXT:    xvi4ger8pp acc0, v2, v2
249; CHECK-NEXT:  .LBB7_3: # %if.end
250; CHECK-NEXT:    xxmfacc acc0
251; CHECK-NEXT:    stxv vs0, 48(r3)
252; CHECK-NEXT:    stxv vs1, 32(r3)
253; CHECK-NEXT:    stxv vs2, 16(r3)
254; CHECK-NEXT:    stxv vs3, 0(r3)
255; CHECK-NEXT:    blr
256;
257; CHECK-BE-LABEL: testBranch:
258; CHECK-BE:       # %bb.0: # %entry
259; CHECK-BE-NEXT:    cmplwi r7, 0
260; CHECK-BE-NEXT:    beq cr0, .LBB7_2
261; CHECK-BE-NEXT:  # %bb.1: # %if.then
262; CHECK-BE-NEXT:    xxsetaccz acc0
263; CHECK-BE-NEXT:    b .LBB7_3
264; CHECK-BE-NEXT:  .LBB7_2: # %if.else
265; CHECK-BE-NEXT:    lxv vs1, 16(r3)
266; CHECK-BE-NEXT:    lxv vs0, 0(r3)
267; CHECK-BE-NEXT:    lxv vs3, 48(r3)
268; CHECK-BE-NEXT:    lxv vs2, 32(r3)
269; CHECK-BE-NEXT:    xxmtacc acc0
270; CHECK-BE-NEXT:    xvi4ger8pp acc0, v2, v2
271; CHECK-BE-NEXT:  .LBB7_3: # %if.end
272; CHECK-BE-NEXT:    xxmfacc acc0
273; CHECK-BE-NEXT:    stxv vs1, 16(r3)
274; CHECK-BE-NEXT:    stxv vs0, 0(r3)
275; CHECK-BE-NEXT:    stxv vs3, 48(r3)
276; CHECK-BE-NEXT:    stxv vs2, 32(r3)
277; CHECK-BE-NEXT:    blr
278entry:
279  %tobool = icmp eq i32 %val, 0
280  br i1 %tobool, label %if.else, label %if.then
281
282if.then:
283  %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
284  br label %if.end
285
286if.else:
287  %1 = load <512 x i1>, <512 x i1>* %ptr, align 64
288  %2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
289  br label %if.end
290
291if.end:
292  %vq1.0 = phi <512 x i1> [ %0, %if.then ], [ %2, %if.else ]
293  store <512 x i1> %vq1.0, <512 x i1>* %ptr, align 64
294  ret void
295}
296
297; The following test cases check that the xxsetaccz instruction is correctly rematerialized
298declare <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1>, <16 x i8>, <16 x i8>)
299declare <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1>, <16 x i8>, <16 x i8>)
300declare <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1>, <16 x i8>, <16 x i8>)
301
302define void @testcse(<512 x i1>* %res, <16 x i8> %vc) {
303; CHECK-LABEL: testcse:
304; CHECK:       # %bb.0: # %entry
305; CHECK-NEXT:    xxsetaccz acc0
306; CHECK-NEXT:    xvf32gerpp acc0, v2, v2
307; CHECK-NEXT:    xxmfacc acc0
308; CHECK-NEXT:    stxv vs0, 48(r3)
309; CHECK-NEXT:    stxv vs1, 32(r3)
310; CHECK-NEXT:    stxv vs2, 16(r3)
311; CHECK-NEXT:    stxv vs3, 0(r3)
312; CHECK-NEXT:    stxv vs0, 112(r3)
313; CHECK-NEXT:    stxv vs1, 96(r3)
314; CHECK-NEXT:    stxv vs2, 80(r3)
315; CHECK-NEXT:    stxv vs3, 64(r3)
316; CHECK-NEXT:    blr
317;
318; CHECK-BE-LABEL: testcse:
319; CHECK-BE:       # %bb.0: # %entry
320; CHECK-BE-NEXT:    xxsetaccz acc0
321; CHECK-BE-NEXT:    xvf32gerpp acc0, v2, v2
322; CHECK-BE-NEXT:    xxmfacc acc0
323; CHECK-BE-NEXT:    stxv vs1, 16(r3)
324; CHECK-BE-NEXT:    stxv vs0, 0(r3)
325; CHECK-BE-NEXT:    stxv vs3, 48(r3)
326; CHECK-BE-NEXT:    stxv vs2, 32(r3)
327; CHECK-BE-NEXT:    stxv vs1, 80(r3)
328; CHECK-BE-NEXT:    stxv vs0, 64(r3)
329; CHECK-BE-NEXT:    stxv vs3, 112(r3)
330; CHECK-BE-NEXT:    stxv vs2, 96(r3)
331; CHECK-BE-NEXT:    blr
332entry:
333  %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
334  %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
335  %2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
336  %3 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
337  %4 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 0
338  %5 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 1
339  store <512 x i1> %2, <512 x i1>* %4, align 64
340  store <512 x i1> %3, <512 x i1>* %5, align 64
341  ret void
342}
343
344define void @testcse2(<512 x i1>* %res, <16 x i8> %vc) {
345; CHECK-LABEL: testcse2:
346; CHECK:       # %bb.0: # %entry
347; CHECK-NEXT:    xxsetaccz acc0
348; CHECK-NEXT:    xxsetaccz acc1
349; CHECK-NEXT:    xvf32gerpp acc1, v2, v2
350; CHECK-NEXT:    xvf32gerpn acc0, v2, v2
351; CHECK-NEXT:    xxmfacc acc1
352; CHECK-NEXT:    xxmfacc acc0
353; CHECK-NEXT:    stxv vs4, 48(r3)
354; CHECK-NEXT:    stxv vs5, 32(r3)
355; CHECK-NEXT:    stxv vs6, 16(r3)
356; CHECK-NEXT:    stxv vs7, 0(r3)
357; CHECK-NEXT:    stxv vs0, 112(r3)
358; CHECK-NEXT:    stxv vs1, 96(r3)
359; CHECK-NEXT:    stxv vs2, 80(r3)
360; CHECK-NEXT:    stxv vs3, 64(r3)
361; CHECK-NEXT:    blr
362;
363; CHECK-BE-LABEL: testcse2:
364; CHECK-BE:       # %bb.0: # %entry
365; CHECK-BE-NEXT:    xxsetaccz acc0
366; CHECK-BE-NEXT:    xxsetaccz acc1
367; CHECK-BE-NEXT:    xvf32gerpp acc1, v2, v2
368; CHECK-BE-NEXT:    xvf32gerpn acc0, v2, v2
369; CHECK-BE-NEXT:    xxmfacc acc1
370; CHECK-BE-NEXT:    xxmfacc acc0
371; CHECK-BE-NEXT:    stxv vs5, 16(r3)
372; CHECK-BE-NEXT:    stxv vs4, 0(r3)
373; CHECK-BE-NEXT:    stxv vs7, 48(r3)
374; CHECK-BE-NEXT:    stxv vs6, 32(r3)
375; CHECK-BE-NEXT:    stxv vs1, 80(r3)
376; CHECK-BE-NEXT:    stxv vs0, 64(r3)
377; CHECK-BE-NEXT:    stxv vs3, 112(r3)
378; CHECK-BE-NEXT:    stxv vs2, 96(r3)
379; CHECK-BE-NEXT:    blr
380entry:
381  %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
382  %1 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
383  %2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
384  %3 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %1, <16 x i8> %vc, <16 x i8> %vc)
385  %4 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 0
386  %5 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 1
387  store <512 x i1> %2, <512 x i1>* %4, align 64
388  store <512 x i1> %3, <512 x i1>* %5, align 64
389  ret void
390}
391
392define void @testcse3(<512 x i1>* %res, <16 x i8> %vc) {
393; CHECK-LABEL: testcse3:
394; CHECK:       # %bb.0: # %entry
395; CHECK-NEXT:    xxsetaccz acc0
396; CHECK-NEXT:    xxsetaccz acc1
397; CHECK-NEXT:    xvf32gerpp acc1, v2, v2
398; CHECK-NEXT:    xvf32gerpn acc0, v2, v2
399; CHECK-NEXT:    xxmfacc acc1
400; CHECK-NEXT:    xxmfacc acc0
401; CHECK-NEXT:    stxv vs4, 48(r3)
402; CHECK-NEXT:    stxv vs5, 32(r3)
403; CHECK-NEXT:    stxv vs6, 16(r3)
404; CHECK-NEXT:    stxv vs7, 0(r3)
405; CHECK-NEXT:    stxv vs0, 112(r3)
406; CHECK-NEXT:    stxv vs1, 96(r3)
407; CHECK-NEXT:    stxv vs2, 80(r3)
408; CHECK-NEXT:    stxv vs3, 64(r3)
409; CHECK-NEXT:    blr
410;
411; CHECK-BE-LABEL: testcse3:
412; CHECK-BE:       # %bb.0: # %entry
413; CHECK-BE-NEXT:    xxsetaccz acc0
414; CHECK-BE-NEXT:    xxsetaccz acc1
415; CHECK-BE-NEXT:    xvf32gerpp acc1, v2, v2
416; CHECK-BE-NEXT:    xvf32gerpn acc0, v2, v2
417; CHECK-BE-NEXT:    xxmfacc acc1
418; CHECK-BE-NEXT:    xxmfacc acc0
419; CHECK-BE-NEXT:    stxv vs5, 16(r3)
420; CHECK-BE-NEXT:    stxv vs4, 0(r3)
421; CHECK-BE-NEXT:    stxv vs7, 48(r3)
422; CHECK-BE-NEXT:    stxv vs6, 32(r3)
423; CHECK-BE-NEXT:    stxv vs1, 80(r3)
424; CHECK-BE-NEXT:    stxv vs0, 64(r3)
425; CHECK-BE-NEXT:    stxv vs3, 112(r3)
426; CHECK-BE-NEXT:    stxv vs2, 96(r3)
427; CHECK-BE-NEXT:    blr
428entry:
429  %0 = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
430  %1 = call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
431  %2 = call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
432  %3 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 0
433  %4 = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 1
434  store <512 x i1> %1, <512 x i1>* %3, align 64
435  store <512 x i1> %2, <512 x i1>* %4, align 64
436  ret void
437}
438
439define void @testcse4(<512 x i1>* %res, i32 %lim, <16 x i8>* %vc) {
440; CHECK-LABEL: testcse4:
441; CHECK:       # %bb.0: # %entry
442; CHECK-NEXT:    cmpwi r4, 1
443; CHECK-NEXT:    bltlr cr0
444; CHECK-NEXT:  # %bb.1: # %for.body.preheader
445; CHECK-NEXT:    clrldi r4, r4, 32
446; CHECK-NEXT:    li r6, 0
447; CHECK-NEXT:    mtctr r4
448; CHECK-NEXT:    li r4, 0
449; CHECK-NEXT:    .p2align 4
450; CHECK-NEXT:  .LBB11_2: # %for.body
451; CHECK-NEXT:    #
452; CHECK-NEXT:    rldic r7, r6, 4, 28
453; CHECK-NEXT:    addi r6, r6, 6
454; CHECK-NEXT:    xxsetaccz acc2
455; CHECK-NEXT:    xxsetaccz acc1
456; CHECK-NEXT:    lxvx vs0, r5, r7
457; CHECK-NEXT:    add r7, r5, r7
458; CHECK-NEXT:    lxv vs1, 16(r7)
459; CHECK-NEXT:    xvf32gerpp acc2, vs0, vs1
460; CHECK-NEXT:    lxv vs0, 32(r7)
461; CHECK-NEXT:    lxv vs1, 48(r7)
462; CHECK-NEXT:    xxmfacc acc2
463; CHECK-NEXT:    xvf32gerpn acc1, vs0, vs1
464; CHECK-NEXT:    lxv vs12, 64(r7)
465; CHECK-NEXT:    lxv vs13, 80(r7)
466; CHECK-NEXT:    rldic r7, r4, 6, 26
467; CHECK-NEXT:    addi r4, r4, 3
468; CHECK-NEXT:    xxsetaccz acc0
469; CHECK-NEXT:    xxmfacc acc1
470; CHECK-NEXT:    xvf32gernp acc0, vs12, vs13
471; CHECK-NEXT:    stxvx vs11, r3, r7
472; CHECK-NEXT:    add r7, r3, r7
473; CHECK-NEXT:    xxmfacc acc0
474; CHECK-NEXT:    stxv vs8, 48(r7)
475; CHECK-NEXT:    stxv vs9, 32(r7)
476; CHECK-NEXT:    stxv vs10, 16(r7)
477; CHECK-NEXT:    stxv vs4, 112(r7)
478; CHECK-NEXT:    stxv vs5, 96(r7)
479; CHECK-NEXT:    stxv vs6, 80(r7)
480; CHECK-NEXT:    stxv vs7, 64(r7)
481; CHECK-NEXT:    stxv vs0, 176(r7)
482; CHECK-NEXT:    stxv vs1, 160(r7)
483; CHECK-NEXT:    stxv vs2, 144(r7)
484; CHECK-NEXT:    stxv vs3, 128(r7)
485; CHECK-NEXT:    bdnz .LBB11_2
486; CHECK-NEXT:  # %bb.3: # %for.cond.cleanup
487; CHECK-NEXT:    blr
488;
489; CHECK-BE-LABEL: testcse4:
490; CHECK-BE:       # %bb.0: # %entry
491; CHECK-BE-NEXT:    cmpwi r4, 1
492; CHECK-BE-NEXT:    bltlr cr0
493; CHECK-BE-NEXT:  # %bb.1: # %for.body.preheader
494; CHECK-BE-NEXT:    clrldi r4, r4, 32
495; CHECK-BE-NEXT:    li r6, 0
496; CHECK-BE-NEXT:    mtctr r4
497; CHECK-BE-NEXT:    li r4, 0
498; CHECK-BE-NEXT:    .p2align 4
499; CHECK-BE-NEXT:  .LBB11_2: # %for.body
500; CHECK-BE-NEXT:    #
501; CHECK-BE-NEXT:    rldic r7, r6, 4, 28
502; CHECK-BE-NEXT:    addi r6, r6, 6
503; CHECK-BE-NEXT:    xxsetaccz acc2
504; CHECK-BE-NEXT:    xxsetaccz acc1
505; CHECK-BE-NEXT:    lxvx vs0, r5, r7
506; CHECK-BE-NEXT:    add r7, r5, r7
507; CHECK-BE-NEXT:    lxv vs1, 16(r7)
508; CHECK-BE-NEXT:    xvf32gerpp acc2, vs0, vs1
509; CHECK-BE-NEXT:    lxv vs0, 32(r7)
510; CHECK-BE-NEXT:    lxv vs1, 48(r7)
511; CHECK-BE-NEXT:    xxmfacc acc2
512; CHECK-BE-NEXT:    xvf32gerpn acc1, vs0, vs1
513; CHECK-BE-NEXT:    lxv vs12, 64(r7)
514; CHECK-BE-NEXT:    lxv vs13, 80(r7)
515; CHECK-BE-NEXT:    rldic r7, r4, 6, 26
516; CHECK-BE-NEXT:    addi r4, r4, 3
517; CHECK-BE-NEXT:    xxsetaccz acc0
518; CHECK-BE-NEXT:    xxmfacc acc1
519; CHECK-BE-NEXT:    xvf32gernp acc0, vs12, vs13
520; CHECK-BE-NEXT:    stxvx vs8, r3, r7
521; CHECK-BE-NEXT:    add r7, r3, r7
522; CHECK-BE-NEXT:    xxmfacc acc0
523; CHECK-BE-NEXT:    stxv vs9, 16(r7)
524; CHECK-BE-NEXT:    stxv vs11, 48(r7)
525; CHECK-BE-NEXT:    stxv vs10, 32(r7)
526; CHECK-BE-NEXT:    stxv vs5, 80(r7)
527; CHECK-BE-NEXT:    stxv vs4, 64(r7)
528; CHECK-BE-NEXT:    stxv vs7, 112(r7)
529; CHECK-BE-NEXT:    stxv vs6, 96(r7)
530; CHECK-BE-NEXT:    stxv vs1, 144(r7)
531; CHECK-BE-NEXT:    stxv vs0, 128(r7)
532; CHECK-BE-NEXT:    stxv vs3, 176(r7)
533; CHECK-BE-NEXT:    stxv vs2, 160(r7)
534; CHECK-BE-NEXT:    bdnz .LBB11_2
535; CHECK-BE-NEXT:  # %bb.3: # %for.cond.cleanup
536; CHECK-BE-NEXT:    blr
537entry:
538  %cmp55 = icmp sgt i32 %lim, 0
539  br i1 %cmp55, label %for.body.preheader, label %for.cond.cleanup
540
541for.body.preheader:                               ; preds = %entry
542  %wide.trip.count = zext i32 %lim to i64
543  br label %for.body
544
545for.cond.cleanup:                                 ; preds = %for.body, %entry
546  ret void
547
548for.body:                                         ; preds = %for.body, %for.body.preheader
549  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
550  %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
551  %1 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
552  %2 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
553  %3 = trunc i64 %indvars.iv to i32
554  %mul = mul nsw i32 %3, 6
555  %idxprom = zext i32 %mul to i64
556  %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom
557  %4 = load <16 x i8>, <16 x i8>* %arrayidx, align 16
558  %add2 = or i32 %mul, 1
559  %idxprom3 = zext i32 %add2 to i64
560  %arrayidx4 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom3
561  %5 = load <16 x i8>, <16 x i8>* %arrayidx4, align 16
562  %6 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %4, <16 x i8> %5)
563  %add6 = add nuw nsw i32 %mul, 2
564  %idxprom7 = zext i32 %add6 to i64
565  %arrayidx8 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom7
566  %7 = load <16 x i8>, <16 x i8>* %arrayidx8, align 16
567  %add10 = add nuw nsw i32 %mul, 3
568  %idxprom11 = zext i32 %add10 to i64
569  %arrayidx12 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom11
570  %8 = load <16 x i8>, <16 x i8>* %arrayidx12, align 16
571  %9 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpn(<512 x i1> %1, <16 x i8> %7, <16 x i8> %8)
572  %add14 = add nuw nsw i32 %mul, 4
573  %idxprom15 = zext i32 %add14 to i64
574  %arrayidx16 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom15
575  %10 = load <16 x i8>, <16 x i8>* %arrayidx16, align 16
576  %add18 = add nuw nsw i32 %mul, 5
577  %idxprom19 = zext i32 %add18 to i64
578  %arrayidx20 = getelementptr inbounds <16 x i8>, <16 x i8>* %vc, i64 %idxprom19
579  %11 = load <16 x i8>, <16 x i8>* %arrayidx20, align 16
580  %12 = tail call <512 x i1> @llvm.ppc.mma.xvf32gernp(<512 x i1> %2, <16 x i8> %10, <16 x i8> %11)
581  %mul21 = mul i64 %indvars.iv, 3
582  %idx.ext = and i64 %mul21, 4294967295
583  %add.ptr = getelementptr inbounds <512 x i1>, <512 x i1>* %res, i64 %idx.ext
584  store <512 x i1> %6, <512 x i1>* %add.ptr, align 64
585  %add.ptr26 = getelementptr inbounds <512 x i1>, <512 x i1>* %add.ptr, i64 1
586  store <512 x i1> %9, <512 x i1>* %add.ptr26, align 64
587  %add.ptr30 = getelementptr inbounds <512 x i1>, <512 x i1>* %add.ptr, i64 2
588  store <512 x i1> %12, <512 x i1>* %add.ptr30, align 64
589  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
590  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
591  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
592}
593
594declare i32 @testRedundantPrimeUnprimeF()
595define void @testRedundantPrimeUnprime(<512 x i1>* %dst, <16 x i8> %vc) nounwind {
596; CHECK-LABEL: testRedundantPrimeUnprime:
597; CHECK:         .localentry testRedundantPrimeUnprime, 1
598; CHECK-NEXT:  # %bb.0: # %entry
599; CHECK-NEXT:    mflr r0
600; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
601; CHECK-NEXT:    std r0, 16(r1)
602; CHECK-NEXT:    stdu r1, -112(r1)
603; CHECK-NEXT:    xxsetaccz acc0
604; CHECK-NEXT:    xxsetaccz acc1
605; CHECK-NEXT:    mr r30, r3
606; CHECK-NEXT:    xxmfacc acc0
607; CHECK-NEXT:    stxv vs0, 48(r3)
608; CHECK-NEXT:    stxv vs1, 32(r3)
609; CHECK-NEXT:    stxv vs2, 16(r3)
610; CHECK-NEXT:    stxv vs3, 0(r3)
611; CHECK-NEXT:    xvf32gerpp acc1, v2, v2
612; CHECK-NEXT:    li r3, 64
613; CHECK-NEXT:    xxmfacc acc1
614; CHECK-NEXT:    stxvp vsp4, r1(r3)
615; CHECK-NEXT:    li r3, 32
616; CHECK-NEXT:    stxvp vsp6, r1(r3)
617; CHECK-NEXT:    bl testRedundantPrimeUnprimeF@notoc
618; CHECK-NEXT:    li r3, 64
619; CHECK-NEXT:    lxvp vsp0, r1(r3)
620; CHECK-NEXT:    li r3, 32
621; CHECK-NEXT:    lxvp vsp2, r1(r3)
622; CHECK-NEXT:    stxv vs0, 112(r30)
623; CHECK-NEXT:    stxv vs1, 96(r30)
624; CHECK-NEXT:    stxv vs2, 80(r30)
625; CHECK-NEXT:    stxv vs3, 64(r30)
626; CHECK-NEXT:    addi r1, r1, 112
627; CHECK-NEXT:    ld r0, 16(r1)
628; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
629; CHECK-NEXT:    mtlr r0
630; CHECK-NEXT:    blr
631;
632; CHECK-BE-LABEL: testRedundantPrimeUnprime:
633; CHECK-BE:       # %bb.0: # %entry
634; CHECK-BE-NEXT:    mflr r0
635; CHECK-BE-NEXT:    std r0, 16(r1)
636; CHECK-BE-NEXT:    stdu r1, -192(r1)
637; CHECK-BE-NEXT:    xxsetaccz acc0
638; CHECK-BE-NEXT:    xxsetaccz acc1
639; CHECK-BE-NEXT:    std r30, 176(r1) # 8-byte Folded Spill
640; CHECK-BE-NEXT:    mr r30, r3
641; CHECK-BE-NEXT:    xxmfacc acc0
642; CHECK-BE-NEXT:    stxv vs1, 16(r3)
643; CHECK-BE-NEXT:    stxv vs0, 0(r3)
644; CHECK-BE-NEXT:    stxv vs3, 48(r3)
645; CHECK-BE-NEXT:    stxv vs2, 32(r3)
646; CHECK-BE-NEXT:    xvf32gerpp acc1, v2, v2
647; CHECK-BE-NEXT:    li r3, 112
648; CHECK-BE-NEXT:    xxmfacc acc1
649; CHECK-BE-NEXT:    stxvp vsp4, r1(r3)
650; CHECK-BE-NEXT:    li r3, 144
651; CHECK-BE-NEXT:    stxvp vsp6, r1(r3)
652; CHECK-BE-NEXT:    bl testRedundantPrimeUnprimeF
653; CHECK-BE-NEXT:    nop
654; CHECK-BE-NEXT:    li r3, 112
655; CHECK-BE-NEXT:    lxvp vsp0, r1(r3)
656; CHECK-BE-NEXT:    li r3, 144
657; CHECK-BE-NEXT:    lxvp vsp2, r1(r3)
658; CHECK-BE-NEXT:    stxv vs3, 112(r30)
659; CHECK-BE-NEXT:    stxv vs2, 96(r30)
660; CHECK-BE-NEXT:    stxv vs1, 80(r30)
661; CHECK-BE-NEXT:    stxv vs0, 64(r30)
662; CHECK-BE-NEXT:    ld r30, 176(r1) # 8-byte Folded Reload
663; CHECK-BE-NEXT:    addi r1, r1, 192
664; CHECK-BE-NEXT:    ld r0, 16(r1)
665; CHECK-BE-NEXT:    mtlr r0
666; CHECK-BE-NEXT:    blr
667entry:
668  %0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
669  store <512 x i1> %0, <512 x i1>* %dst, align 64
670  %1 = tail call <512 x i1> @llvm.ppc.mma.xvf32gerpp(<512 x i1> %0, <16 x i8> %vc, <16 x i8> %vc)
671  %call = tail call signext i32 bitcast (i32 ()* @testRedundantPrimeUnprimeF to i32 ()*)()
672  %add.ptr1 = getelementptr inbounds <512 x i1>, <512 x i1>* %dst, i64 1
673  store <512 x i1> %1, <512 x i1>* %add.ptr1, align 64
674  ret void
675}
676
677declare <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1>, <256 x i1>, <16 x i8>, i32, i32)
678declare <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1>, <256 x i1>, <16 x i8>)
679
680; Function Attrs: nounwind
681define void @test_ldst_1(<256 x i1>* %vpp, <256 x i1>* %vp2) {
682; CHECK-LABEL: test_ldst_1:
683; CHECK:       # %bb.0: # %entry
684; CHECK-NEXT:    lxvp vsp0, 0(r3)
685; CHECK-NEXT:    stxvp vsp0, 0(r4)
686; CHECK-NEXT:    blr
687;
688; CHECK-BE-LABEL: test_ldst_1:
689; CHECK-BE:       # %bb.0: # %entry
690; CHECK-BE-NEXT:    lxvp vsp0, 0(r3)
691; CHECK-BE-NEXT:    stxvp vsp0, 0(r4)
692; CHECK-BE-NEXT:    blr
693entry:
694  %0 = bitcast <256 x i1>* %vpp to i8*
695  %1 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %0)
696  %2 = bitcast <256 x i1>* %vp2 to i8*
697  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %1, i8* %2)
698  ret void
699}
700
701; Function Attrs: argmemonly nounwind readonly
702declare <256 x i1> @llvm.ppc.mma.lxvp(i8*)
703
704; Function Attrs: argmemonly nounwind writeonly
705declare void @llvm.ppc.mma.stxvp(<256 x i1>, i8*)
706
707; Function Attrs: nounwind
708define void @test_ldst_2(<256 x i1>* %vpp, i64 %offset, <256 x i1>* %vp2)  {
709; CHECK-LABEL: test_ldst_2:
710; CHECK:       # %bb.0: # %entry
711; CHECK-NEXT:    lxvpx vsp0, r3, r4
712; CHECK-NEXT:    stxvpx vsp0, r5, r4
713; CHECK-NEXT:    blr
714;
715; CHECK-BE-LABEL: test_ldst_2:
716; CHECK-BE:       # %bb.0: # %entry
717; CHECK-BE-NEXT:    lxvpx vsp0, r3, r4
718; CHECK-BE-NEXT:    stxvpx vsp0, r5, r4
719; CHECK-BE-NEXT:    blr
720entry:
721  %0 = bitcast <256 x i1>* %vpp to i8*
722  %1 = getelementptr i8, i8* %0, i64 %offset
723  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
724  %3 = bitcast <256 x i1>* %vp2 to i8*
725  %4 = getelementptr i8, i8* %3, i64 %offset
726  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
727  ret void
728}
729
730; Function Attrs: nounwind
731define void @test_ldst_3(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
732; CHECK-LABEL: test_ldst_3:
733; CHECK:       # %bb.0: # %entry
734; CHECK-NEXT:    li r5, 18
735; CHECK-NEXT:    lxvpx vsp0, r3, r5
736; CHECK-NEXT:    stxvpx vsp0, r4, r5
737; CHECK-NEXT:    blr
738;
739; CHECK-BE-LABEL: test_ldst_3:
740; CHECK-BE:       # %bb.0: # %entry
741; CHECK-BE-NEXT:    li r5, 18
742; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
743; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
744; CHECK-BE-NEXT:    blr
745entry:
746  %0 = bitcast <256 x i1>* %vpp to i8*
747  %1 = getelementptr i8, i8* %0, i64 18
748  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
749  %3 = bitcast <256 x i1>* %vp2 to i8*
750  %4 = getelementptr i8, i8* %3, i64 18
751  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
752  ret void
753}
754
755; Function Attrs: nounwind
756define void @test_ldst_4(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
757; CHECK-LABEL: test_ldst_4:
758; CHECK:       # %bb.0: # %entry
759; CHECK-NEXT:    li r5, 1
760; CHECK-NEXT:    lxvpx vsp0, r3, r5
761; CHECK-NEXT:    stxvpx vsp0, r4, r5
762; CHECK-NEXT:    blr
763;
764; CHECK-BE-LABEL: test_ldst_4:
765; CHECK-BE:       # %bb.0: # %entry
766; CHECK-BE-NEXT:    li r5, 1
767; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
768; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
769; CHECK-BE-NEXT:    blr
770entry:
771  %0 = bitcast <256 x i1>* %vpp to i8*
772  %1 = getelementptr i8, i8* %0, i64 1
773  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
774  %3 = bitcast <256 x i1>* %vp2 to i8*
775  %4 = getelementptr i8, i8* %3, i64 1
776  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
777  ret void
778}
779
780; Function Attrs: nounwind
781define void @test_ldst_5(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
782; CHECK-LABEL: test_ldst_5:
783; CHECK:       # %bb.0: # %entry
784; CHECK-NEXT:    li r5, 42
785; CHECK-NEXT:    lxvpx vsp0, r3, r5
786; CHECK-NEXT:    stxvpx vsp0, r4, r5
787; CHECK-NEXT:    blr
788;
789; CHECK-BE-LABEL: test_ldst_5:
790; CHECK-BE:       # %bb.0: # %entry
791; CHECK-BE-NEXT:    li r5, 42
792; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
793; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
794; CHECK-BE-NEXT:    blr
795entry:
796  %0 = bitcast <256 x i1>* %vpp to i8*
797  %1 = getelementptr i8, i8* %0, i64 42
798  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
799  %3 = bitcast <256 x i1>* %vp2 to i8*
800  %4 = getelementptr i8, i8* %3, i64 42
801  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
802  ret void
803}
804
805; Function Attrs: nounwind
806define void @test_ldst_6(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
807; CHECK-LABEL: test_ldst_6:
808; CHECK:       # %bb.0: # %entry
809; CHECK-NEXT:    lxvp vsp0, 4096(r3)
810; CHECK-NEXT:    stxvp vsp0, 4096(r4)
811; CHECK-NEXT:    blr
812;
813; CHECK-BE-LABEL: test_ldst_6:
814; CHECK-BE:       # %bb.0: # %entry
815; CHECK-BE-NEXT:    lxvp vsp0, 4096(r3)
816; CHECK-BE-NEXT:    stxvp vsp0, 4096(r4)
817; CHECK-BE-NEXT:    blr
818entry:
819  %0 = getelementptr <256 x i1>, <256 x i1>* %vpp, i64 128
820  %1 = bitcast <256 x i1>* %0 to i8*
821  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
822  %3 = getelementptr <256 x i1>, <256 x i1>* %vp2, i64 128
823  %4 = bitcast <256 x i1>* %3 to i8*
824  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
825  ret void
826}
827
828; Function Attrs: nounwind
829define void @test_ldst_7(<256 x i1>* %vpp, <256 x i1>* %vp2)  {
830; FIXME: A prefixed load (plxvp) is expected here as the offset in this
831; test case is a constant that fits within 34-bits.
832; CHECK-LABEL: test_ldst_7:
833; CHECK:       # %bb.0: # %entry
834; CHECK-NEXT:    li r5, 0
835; CHECK-NEXT:    ori r5, r5, 32799
836; CHECK-NEXT:    lxvpx vsp0, r3, r5
837; CHECK-NEXT:    stxvpx vsp0, r4, r5
838; CHECK-NEXT:    blr
839;
840; CHECK-BE-LABEL: test_ldst_7:
841; CHECK-BE:       # %bb.0: # %entry
842; CHECK-BE-NEXT:    li r5, 0
843; CHECK-BE-NEXT:    ori r5, r5, 32799
844; CHECK-BE-NEXT:    lxvpx vsp0, r3, r5
845; CHECK-BE-NEXT:    stxvpx vsp0, r4, r5
846; CHECK-BE-NEXT:    blr
847entry:
848  %0 = bitcast <256 x i1>* %vpp to i8*
849  %1 = getelementptr i8, i8* %0, i64 32799
850  %2 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %1)
851  %3 = bitcast <256 x i1>* %vp2 to i8*
852  %4 = getelementptr i8, i8* %3, i64 32799
853  tail call void @llvm.ppc.mma.stxvp(<256 x i1> %2, i8* %4)
854  ret void
855}
856
857; Function Attrs: nofree nounwind
858define void @test_ldst_8(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp)  {
859; CHECK-LABEL: test_ldst_8:
860; CHECK:       # %bb.0: # %entry
861; CHECK-NEXT:    lxv vs1, 32(r3)
862; CHECK-NEXT:    lxv vs0, 48(r3)
863; CHECK-NEXT:    lxv vs3, 0(r3)
864; CHECK-NEXT:    lxv vs2, 16(r3)
865; CHECK-NEXT:    li r3, 8
866; CHECK-NEXT:    lxvpx vsp4, r4, r3
867; CHECK-NEXT:    xxmtacc acc0
868; CHECK-NEXT:    pmxvf64gernn acc0, vsp4, v2, 0, 0
869; CHECK-NEXT:    xxmfacc acc0
870; CHECK-NEXT:    stxv vs0, 48(r7)
871; CHECK-NEXT:    stxv vs1, 32(r7)
872; CHECK-NEXT:    stxv vs2, 16(r7)
873; CHECK-NEXT:    stxv vs3, 0(r7)
874; CHECK-NEXT:    blr
875;
876; CHECK-BE-LABEL: test_ldst_8:
877; CHECK-BE:       # %bb.0: # %entry
878; CHECK-BE-NEXT:    lxv vs1, 16(r3)
879; CHECK-BE-NEXT:    lxv vs0, 0(r3)
880; CHECK-BE-NEXT:    lxv vs3, 48(r3)
881; CHECK-BE-NEXT:    lxv vs2, 32(r3)
882; CHECK-BE-NEXT:    li r3, 8
883; CHECK-BE-NEXT:    lxvpx vsp4, r4, r3
884; CHECK-BE-NEXT:    xxmtacc acc0
885; CHECK-BE-NEXT:    pmxvf64gernn acc0, vsp4, v2, 0, 0
886; CHECK-BE-NEXT:    xxmfacc acc0
887; CHECK-BE-NEXT:    stxv vs1, 16(r7)
888; CHECK-BE-NEXT:    stxv vs0, 0(r7)
889; CHECK-BE-NEXT:    stxv vs3, 48(r7)
890; CHECK-BE-NEXT:    stxv vs2, 32(r7)
891; CHECK-BE-NEXT:    blr
892entry:
893  %0 = bitcast i8* %vqp to <512 x i1>*
894  %1 = load <512 x i1>, <512 x i1>* %0, align 64
895  %2 = bitcast <256 x i1>* %vpp to i8*
896  %3 = getelementptr i8, i8* %2, i64 8
897  %4 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %3)
898  %5 = tail call <512 x i1> @llvm.ppc.mma.pmxvf64gernn(<512 x i1> %1, <256 x i1> %4, <16 x i8> %vc, i32 0, i32 0)
899  %6 = bitcast i8* %resp to <512 x i1>*
900  store <512 x i1> %5, <512 x i1>* %6, align 64
901  ret void
902}
903
904; Function Attrs: nofree nounwind
905define void @test_ldst_9(i8* nocapture readonly %vqp, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp)  {
906; CHECK-LABEL: test_ldst_9:
907; CHECK:       # %bb.0: # %entry
908; CHECK-NEXT:    lxv vs1, 32(r3)
909; CHECK-NEXT:    lxv vs0, 48(r3)
910; CHECK-NEXT:    lxv vs3, 0(r3)
911; CHECK-NEXT:    lxv vs2, 16(r3)
912; CHECK-NEXT:    lxvp vsp4, 0(r4)
913; CHECK-NEXT:    xxmtacc acc0
914; CHECK-NEXT:    xvf64gernp acc0, vsp4, v2
915; CHECK-NEXT:    xxmfacc acc0
916; CHECK-NEXT:    stxv vs0, 48(r7)
917; CHECK-NEXT:    stxv vs1, 32(r7)
918; CHECK-NEXT:    stxv vs2, 16(r7)
919; CHECK-NEXT:    stxv vs3, 0(r7)
920; CHECK-NEXT:    blr
921;
922; CHECK-BE-LABEL: test_ldst_9:
923; CHECK-BE:       # %bb.0: # %entry
924; CHECK-BE-NEXT:    lxv vs1, 16(r3)
925; CHECK-BE-NEXT:    lxv vs0, 0(r3)
926; CHECK-BE-NEXT:    lxv vs3, 48(r3)
927; CHECK-BE-NEXT:    lxv vs2, 32(r3)
928; CHECK-BE-NEXT:    lxvp vsp4, 0(r4)
929; CHECK-BE-NEXT:    xxmtacc acc0
930; CHECK-BE-NEXT:    xvf64gernp acc0, vsp4, v2
931; CHECK-BE-NEXT:    xxmfacc acc0
932; CHECK-BE-NEXT:    stxv vs1, 16(r7)
933; CHECK-BE-NEXT:    stxv vs0, 0(r7)
934; CHECK-BE-NEXT:    stxv vs3, 48(r7)
935; CHECK-BE-NEXT:    stxv vs2, 32(r7)
936; CHECK-BE-NEXT:    blr
937entry:
938  %0 = bitcast i8* %vqp to <512 x i1>*
939  %1 = load <512 x i1>, <512 x i1>* %0, align 64
940  %2 = bitcast <256 x i1>* %vpp to i8*
941  %3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2)
942  %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc)
943  %5 = bitcast i8* %resp to <512 x i1>*
944  store <512 x i1> %4, <512 x i1>* %5, align 64
945  ret void
946}
947
948; Function Attrs: nofree nounwind
949define void @test_ldst_10(i8* nocapture readonly %vqp, i64 %offs, <256 x i1>* %vpp, <16 x i8> %vc, i8* nocapture %resp)  {
950; CHECK-LABEL: test_ldst_10:
951; CHECK:       # %bb.0: # %entry
952; CHECK-NEXT:    lxv vs1, 32(r3)
953; CHECK-NEXT:    lxv vs0, 48(r3)
954; CHECK-NEXT:    lxv vs3, 0(r3)
955; CHECK-NEXT:    lxv vs2, 16(r3)
956; CHECK-NEXT:    lxvp vsp4, 0(r5)
957; CHECK-NEXT:    xxmtacc acc0
958; CHECK-NEXT:    xvf64gernp acc0, vsp4, v2
959; CHECK-NEXT:    xxmfacc acc0
960; CHECK-NEXT:    stxv vs0, 48(r9)
961; CHECK-NEXT:    stxv vs1, 32(r9)
962; CHECK-NEXT:    stxv vs2, 16(r9)
963; CHECK-NEXT:    stxv vs3, 0(r9)
964; CHECK-NEXT:    blr
965;
966; CHECK-BE-LABEL: test_ldst_10:
967; CHECK-BE:       # %bb.0: # %entry
968; CHECK-BE-NEXT:    lxv vs1, 16(r3)
969; CHECK-BE-NEXT:    lxv vs0, 0(r3)
970; CHECK-BE-NEXT:    lxv vs3, 48(r3)
971; CHECK-BE-NEXT:    lxv vs2, 32(r3)
972; CHECK-BE-NEXT:    lxvp vsp4, 0(r5)
973; CHECK-BE-NEXT:    xxmtacc acc0
974; CHECK-BE-NEXT:    xvf64gernp acc0, vsp4, v2
975; CHECK-BE-NEXT:    xxmfacc acc0
976; CHECK-BE-NEXT:    stxv vs1, 16(r9)
977; CHECK-BE-NEXT:    stxv vs0, 0(r9)
978; CHECK-BE-NEXT:    stxv vs3, 48(r9)
979; CHECK-BE-NEXT:    stxv vs2, 32(r9)
980; CHECK-BE-NEXT:    blr
981entry:
982  %0 = bitcast i8* %vqp to <512 x i1>*
983  %1 = load <512 x i1>, <512 x i1>* %0, align 64
984  %2 = bitcast <256 x i1>* %vpp to i8*
985  %3 = tail call <256 x i1> @llvm.ppc.mma.lxvp(i8* %2)
986  %4 = tail call <512 x i1> @llvm.ppc.mma.xvf64gernp(<512 x i1> %1, <256 x i1> %3, <16 x i8> %vc)
987  %5 = bitcast i8* %resp to <512 x i1>*
988  store <512 x i1> %4, <512 x i1>* %5, align 64
989  ret void
990}
991