• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
4; RUN:   FileCheck %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
7; RUN:   FileCheck %s
8
9; This test case aims to test the vector multiply instructions on Power10.
10; This includes the low order and high order versions of vector multiply.
11; The low order version operates on doublewords, whereas the high order version
12; operates on signed and unsigned words and doublewords.
13; This file also includes 128 bit vector multiply instructions.
14
15define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) {
16; CHECK-LABEL: test_vmulld:
17; CHECK:       # %bb.0: # %entry
18; CHECK-NEXT:    vmulld v2, v3, v2
19; CHECK-NEXT:    blr
20entry:
21  %mul = mul <2 x i64> %b, %a
22  ret <2 x i64> %mul
23}
24
25define <2 x i64> @test_vmulhsd(<2 x i64> %a, <2 x i64> %b) {
26; CHECK-LABEL: test_vmulhsd:
27; CHECK:       # %bb.0: # %entry
28; CHECK-NEXT:    vmulhsd v2, v3, v2
29; CHECK-NEXT:    blr
30entry:
31  %0 = sext <2 x i64> %a to <2 x i128>
32  %1 = sext <2 x i64> %b to <2 x i128>
33  %mul = mul <2 x i128> %1, %0
34  %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
35  %tr = trunc <2 x i128> %shr to <2 x i64>
36  ret <2 x i64> %tr
37}
38
39define <2 x i64> @test_vmulhud(<2 x i64> %a, <2 x i64> %b) {
40; CHECK-LABEL: test_vmulhud:
41; CHECK:       # %bb.0: # %entry
42; CHECK-NEXT:    vmulhud v2, v3, v2
43; CHECK-NEXT:    blr
44entry:
45  %0 = zext <2 x i64> %a to <2 x i128>
46  %1 = zext <2 x i64> %b to <2 x i128>
47  %mul = mul <2 x i128> %1, %0
48  %shr = lshr <2 x i128> %mul, <i128 64, i128 64>
49  %tr = trunc <2 x i128> %shr to <2 x i64>
50  ret <2 x i64> %tr
51}
52
53define <4 x i32> @test_vmulhsw(<4 x i32> %a, <4 x i32> %b) {
54; CHECK-LABEL: test_vmulhsw:
55; CHECK:       # %bb.0: # %entry
56; CHECK-NEXT:    vmulhsw v2, v3, v2
57; CHECK-NEXT:    blr
58entry:
59  %0 = sext <4 x i32> %a to <4 x i64>
60  %1 = sext <4 x i32> %b to <4 x i64>
61  %mul = mul <4 x i64> %1, %0
62  %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
63  %tr = trunc <4 x i64> %shr to <4 x i32>
64  ret <4 x i32> %tr
65}
66
67define <4 x i32> @test_vmulhuw(<4 x i32> %a, <4 x i32> %b) {
68; CHECK-LABEL: test_vmulhuw:
69; CHECK:       # %bb.0: # %entry
70; CHECK-NEXT:    vmulhuw v2, v3, v2
71; CHECK-NEXT:    blr
72entry:
73  %0 = zext <4 x i32> %a to <4 x i64>
74  %1 = zext <4 x i32> %b to <4 x i64>
75  %mul = mul <4 x i64> %1, %0
76  %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32>
77  %tr = trunc <4 x i64> %shr to <4 x i32>
78  ret <4 x i32> %tr
79}
80
81; Test the vector multiply high intrinsics.
82declare <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32>, <4 x i32>)
83declare <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32>, <4 x i32>)
84declare <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64>, <2 x i64>)
85declare <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64>, <2 x i64>)
86
87define <4 x i32> @test_vmulhsw_intrinsic(<4 x i32> %a, <4 x i32> %b) {
88; CHECK-LABEL: test_vmulhsw_intrinsic:
89; CHECK:       # %bb.0: # %entry
90; CHECK-NEXT:    vmulhsw v2, v2, v3
91; CHECK-NEXT:    blr
92entry:
93  %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32> %a, <4 x i32> %b)
94  ret <4 x i32> %mulh
95}
96
97define <4 x i32> @test_vmulhuw_intrinsic(<4 x i32> %a, <4 x i32> %b) {
98; CHECK-LABEL: test_vmulhuw_intrinsic:
99; CHECK:       # %bb.0: # %entry
100; CHECK-NEXT:    vmulhuw v2, v2, v3
101; CHECK-NEXT:    blr
102entry:
103  %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32> %a, <4 x i32> %b)
104  ret <4 x i32> %mulh
105}
106
107define <2 x i64> @test_vmulhsd_intrinsic(<2 x i64> %a, <2 x i64> %b) {
108; CHECK-LABEL: test_vmulhsd_intrinsic:
109; CHECK:       # %bb.0: # %entry
110; CHECK-NEXT:    vmulhsd v2, v2, v3
111; CHECK-NEXT:    blr
112entry:
113  %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64> %a, <2 x i64> %b)
114  ret <2 x i64> %mulh
115}
116
117define <2 x i64> @test_vmulhud_intrinsic(<2 x i64> %a, <2 x i64> %b) {
118; CHECK-LABEL: test_vmulhud_intrinsic:
119; CHECK:       # %bb.0: # %entry
120; CHECK-NEXT:    vmulhud v2, v2, v3
121; CHECK-NEXT:    blr
122entry:
123  %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64> %a, <2 x i64> %b)
124  ret <2 x i64> %mulh
125}
126
127declare <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64>, <2 x i64>) nounwind readnone
128declare <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64>, <2 x i64>) nounwind readnone
129declare <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64>, <2 x i64>) nounwind readnone
130declare <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64>, <2 x i64>) nounwind readnone
131declare <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64>, <2 x i64>, <1 x i128>) nounwind readnone
132
133define <1 x i128> @test_vmuleud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
134; CHECK-LABEL: test_vmuleud:
135; CHECK:       # %bb.0:
136; CHECK-NEXT:    vmuleud v2, v2, v3
137; CHECK-NEXT:    blr
138  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64> %x, <2 x i64> %y)
139  ret <1 x i128> %tmp
140}
141
142define <1 x i128> @test_vmuloud(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
143; CHECK-LABEL: test_vmuloud:
144; CHECK:       # %bb.0:
145; CHECK-NEXT:    vmuloud v2, v2, v3
146; CHECK-NEXT:    blr
147  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64> %x, <2 x i64> %y)
148  ret <1 x i128> %tmp
149}
150
151define <1 x i128> @test_vmulesd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
152; CHECK-LABEL: test_vmulesd:
153; CHECK:       # %bb.0:
154; CHECK-NEXT:    vmulesd v2, v2, v3
155; CHECK-NEXT:    blr
156  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64> %x, <2 x i64> %y)
157  ret <1 x i128> %tmp
158}
159
160define <1 x i128> @test_vmulosd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
161; CHECK-LABEL: test_vmulosd:
162; CHECK:       # %bb.0:
163; CHECK-NEXT:    vmulosd v2, v2, v3
164; CHECK-NEXT:    blr
165  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64> %x, <2 x i64> %y)
166  ret <1 x i128> %tmp
167}
168
169define <1 x i128> @test_vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) nounwind readnone {
170; CHECK-LABEL: test_vmsumcud:
171; CHECK:       # %bb.0:
172; CHECK-NEXT:    vmsumcud v2, v2, v3, v4
173; CHECK-NEXT:    blr
174  %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z)
175  ret <1 x i128> %tmp
176}
177