1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 3; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ 4; RUN: FileCheck %s 5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ 6; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ 7; RUN: FileCheck %s 8 9; This test case aims to test the vector multiply instructions on Power10. 10; This includes the low order and high order versions of vector multiply. 11; The low order version operates on doublewords, whereas the high order version 12; operates on signed and unsigned words and doublewords. 13; This file also includes 128 bit vector multiply instructions. 14 15define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) { 16; CHECK-LABEL: test_vmulld: 17; CHECK: # %bb.0: # %entry 18; CHECK-NEXT: vmulld v2, v3, v2 19; CHECK-NEXT: blr 20entry: 21 %mul = mul <2 x i64> %b, %a 22 ret <2 x i64> %mul 23} 24 25define <2 x i64> @test_vmulhsd(<2 x i64> %a, <2 x i64> %b) { 26; CHECK-LABEL: test_vmulhsd: 27; CHECK: # %bb.0: # %entry 28; CHECK-NEXT: vmulhsd v2, v3, v2 29; CHECK-NEXT: blr 30entry: 31 %0 = sext <2 x i64> %a to <2 x i128> 32 %1 = sext <2 x i64> %b to <2 x i128> 33 %mul = mul <2 x i128> %1, %0 34 %shr = lshr <2 x i128> %mul, <i128 64, i128 64> 35 %tr = trunc <2 x i128> %shr to <2 x i64> 36 ret <2 x i64> %tr 37} 38 39define <2 x i64> @test_vmulhud(<2 x i64> %a, <2 x i64> %b) { 40; CHECK-LABEL: test_vmulhud: 41; CHECK: # %bb.0: # %entry 42; CHECK-NEXT: vmulhud v2, v3, v2 43; CHECK-NEXT: blr 44entry: 45 %0 = zext <2 x i64> %a to <2 x i128> 46 %1 = zext <2 x i64> %b to <2 x i128> 47 %mul = mul <2 x i128> %1, %0 48 %shr = lshr <2 x i128> %mul, <i128 64, i128 64> 49 %tr = trunc <2 x i128> %shr to <2 x i64> 50 ret <2 x i64> %tr 51} 52 53define <4 x i32> @test_vmulhsw(<4 x i32> %a, <4 x i32> %b) { 54; CHECK-LABEL: test_vmulhsw: 55; CHECK: # %bb.0: # %entry 56; CHECK-NEXT: vmulhsw v2, v3, v2 57; CHECK-NEXT: blr 58entry: 59 %0 = sext <4 x i32> %a to <4 x i64> 60 %1 = sext <4 x i32> %b to <4 x i64> 61 %mul = mul <4 x i64> %1, %0 62 %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32> 63 %tr = trunc <4 x i64> %shr to <4 x i32> 64 ret <4 x i32> %tr 65} 66 67define <4 x i32> @test_vmulhuw(<4 x i32> %a, <4 x i32> %b) { 68; CHECK-LABEL: test_vmulhuw: 69; CHECK: # %bb.0: # %entry 70; CHECK-NEXT: vmulhuw v2, v3, v2 71; CHECK-NEXT: blr 72entry: 73 %0 = zext <4 x i32> %a to <4 x i64> 74 %1 = zext <4 x i32> %b to <4 x i64> 75 %mul = mul <4 x i64> %1, %0 76 %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32> 77 %tr = trunc <4 x i64> %shr to <4 x i32> 78 ret <4 x i32> %tr 79} 80 81; Test the vector multiply high intrinsics. 82declare <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32>, <4 x i32>) 83declare <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32>, <4 x i32>) 84declare <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64>, <2 x i64>) 85declare <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64>, <2 x i64>) 86 87define <4 x i32> @test_vmulhsw_intrinsic(<4 x i32> %a, <4 x i32> %b) { 88; CHECK-LABEL: test_vmulhsw_intrinsic: 89; CHECK: # %bb.0: # %entry 90; CHECK-NEXT: vmulhsw v2, v2, v3 91; CHECK-NEXT: blr 92entry: 93 %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32> %a, <4 x i32> %b) 94 ret <4 x i32> %mulh 95} 96 97define <4 x i32> @test_vmulhuw_intrinsic(<4 x i32> %a, <4 x i32> %b) { 98; CHECK-LABEL: test_vmulhuw_intrinsic: 99; CHECK: # %bb.0: # %entry 100; CHECK-NEXT: vmulhuw v2, v2, v3 101; CHECK-NEXT: blr 102entry: 103 %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32> %a, <4 x i32> %b) 104 ret <4 x i32> %mulh 105} 106 107define <2 x i64> @test_vmulhsd_intrinsic(<2 x i64> %a, <2 x i64> %b) { 108; CHECK-LABEL: test_vmulhsd_intrinsic: 109; CHECK: # %bb.0: # %entry 110; CHECK-NEXT: vmulhsd v2, v2, v3 111; CHECK-NEXT: blr 112entry: 113 %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64> %a, <2 x i64> %b) 114 ret <2 x i64> %mulh 115} 116 117define <2 x i64> @test_vmulhud_intrinsic(<2 x i64> %a, <2 x i64> %b) { 118; CHECK-LABEL: test_vmulhud_intrinsic: 119; CHECK: # %bb.0: # %entry 120; CHECK-NEXT: vmulhud v2, v2, v3 121; CHECK-NEXT: blr 122entry: 123 %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64> %a, <2 x i64> %b) 124 ret <2 x i64> %mulh 125} 126 127declare <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64>, <2 x i64>) nounwind readnone 128declare <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64>, <2 x i64>) nounwind readnone 129declare <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64>, <2 x i64>) nounwind readnone 130declare <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64>, <2 x i64>) nounwind readnone 131declare <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64>, <2 x i64>, <1 x i128>) nounwind readnone 132 133define <1 x i128> @test_vmuleud(<2 x i64> %x, <2 x i64> %y) nounwind readnone { 134; CHECK-LABEL: test_vmuleud: 135; CHECK: # %bb.0: 136; CHECK-NEXT: vmuleud v2, v2, v3 137; CHECK-NEXT: blr 138 %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64> %x, <2 x i64> %y) 139 ret <1 x i128> %tmp 140} 141 142define <1 x i128> @test_vmuloud(<2 x i64> %x, <2 x i64> %y) nounwind readnone { 143; CHECK-LABEL: test_vmuloud: 144; CHECK: # %bb.0: 145; CHECK-NEXT: vmuloud v2, v2, v3 146; CHECK-NEXT: blr 147 %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64> %x, <2 x i64> %y) 148 ret <1 x i128> %tmp 149} 150 151define <1 x i128> @test_vmulesd(<2 x i64> %x, <2 x i64> %y) nounwind readnone { 152; CHECK-LABEL: test_vmulesd: 153; CHECK: # %bb.0: 154; CHECK-NEXT: vmulesd v2, v2, v3 155; CHECK-NEXT: blr 156 %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64> %x, <2 x i64> %y) 157 ret <1 x i128> %tmp 158} 159 160define <1 x i128> @test_vmulosd(<2 x i64> %x, <2 x i64> %y) nounwind readnone { 161; CHECK-LABEL: test_vmulosd: 162; CHECK: # %bb.0: 163; CHECK-NEXT: vmulosd v2, v2, v3 164; CHECK-NEXT: blr 165 %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64> %x, <2 x i64> %y) 166 ret <1 x i128> %tmp 167} 168 169define <1 x i128> @test_vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) nounwind readnone { 170; CHECK-LABEL: test_vmsumcud: 171; CHECK: # %bb.0: 172; CHECK-NEXT: vmsumcud v2, v2, v3, v4 173; CHECK-NEXT: blr 174 %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) 175 ret <1 x i128> %tmp 176} 177