1; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s 2; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s 3 4; Exponent is a variable 5define void @vspow_var(float* nocapture %z, float* nocapture readonly %y, float* nocapture readonly %x) { 6; CHECK-LABEL: @vspow_var 7; CHECK-PWR9: bl __powf4_P9 8; CHECK-PWR8: bl __powf4_P8 9; CHECK: blr 10entry: 11 br label %vector.body 12 13vector.body: 14 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 15 %next.gep = getelementptr float, float* %z, i64 %index 16 %next.gep31 = getelementptr float, float* %y, i64 %index 17 %next.gep32 = getelementptr float, float* %x, i64 %index 18 %0 = bitcast float* %next.gep32 to <4 x float>* 19 %wide.load = load <4 x float>, <4 x float>* %0, align 4 20 %1 = bitcast float* %next.gep31 to <4 x float>* 21 %wide.load33 = load <4 x float>, <4 x float>* %1, align 4 22 %2 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> %wide.load33) 23 %3 = bitcast float* %next.gep to <4 x float>* 24 store <4 x float> %2, <4 x float>* %3, align 4 25 %index.next = add i64 %index, 4 26 %4 = icmp eq i64 %index.next, 1024 27 br i1 %4, label %for.end, label %vector.body 28 29for.end: 30 ret void 31} 32 33; Exponent is a constant != 0.75 and !=0.25 34define void @vspow_const(float* nocapture %y, float* nocapture readonly %x) { 35; CHECK-LABEL: @vspow_const 36; CHECK-PWR9: bl __powf4_P9 37; CHECK-PWR8: bl __powf4_P8 38; CHECK: blr 39entry: 40 br label %vector.body 41 42vector.body: 43 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 44 %next.gep = getelementptr float, float* %y, i64 %index 45 %next.gep19 = getelementptr float, float* %x, i64 %index 46 %0 = bitcast float* %next.gep19 to <4 x float>* 47 %wide.load = load <4 x float>, <4 x float>* %0, align 4 48 %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>) 49 %2 = bitcast float* %next.gep to <4 x float>* 50 store <4 x float> %1, <4 x float>* %2, align 4 51 %index.next = add i64 %index, 4 52 %3 = icmp eq i64 %index.next, 1024 53 br i1 %3, label %for.end, label %vector.body 54 55for.end: 56 ret void 57} 58 59; Exponent is a constant != 0.75 and !=0.25 and they are different 60define void @vspow_neq_const(float* nocapture %y, float* nocapture readonly %x) { 61; CHECK-LABEL: @vspow_neq_const 62; CHECK-PWR9: bl __powf4_P9 63; CHECK-PWR8: bl __powf4_P8 64; CHECK: blr 65entry: 66 br label %vector.body 67 68vector.body: 69 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 70 %next.gep = getelementptr float, float* %y, i64 %index 71 %next.gep19 = getelementptr float, float* %x, i64 %index 72 %0 = bitcast float* %next.gep19 to <4 x float>* 73 %wide.load = load <4 x float>, <4 x float>* %0, align 4 74 %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 0x3FE861EB80000000, float 0x3FE871EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>) 75 %2 = bitcast float* %next.gep to <4 x float>* 76 store <4 x float> %1, <4 x float>* %2, align 4 77 %index.next = add i64 %index, 4 78 %3 = icmp eq i64 %index.next, 1024 79 br i1 %3, label %for.end, label %vector.body 80 81for.end: 82 ret void 83} 84 85; Exponent is a constant != 0.75 and !=0.25 86define void @vspow_neq075_const(float* nocapture %y, float* nocapture readonly %x) { 87; CHECK-LABEL: @vspow_neq075_const 88; CHECK-PWR9: bl __powf4_P9 89; CHECK-PWR8: bl __powf4_P8 90; CHECK: blr 91entry: 92 br label %vector.body 93 94vector.body: 95 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 96 %next.gep = getelementptr float, float* %y, i64 %index 97 %next.gep19 = getelementptr float, float* %x, i64 %index 98 %0 = bitcast float* %next.gep19 to <4 x float>* 99 %wide.load = load <4 x float>, <4 x float>* %0, align 4 100 %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 0x3FE851EB80000000>) 101 %2 = bitcast float* %next.gep to <4 x float>* 102 store <4 x float> %1, <4 x float>* %2, align 4 103 %index.next = add i64 %index, 4 104 %3 = icmp eq i64 %index.next, 1024 105 br i1 %3, label %for.end, label %vector.body 106 107for.end: 108 ret void 109} 110 111; Exponent is a constant != 0.75 and !=0.25 112define void @vspow_neq025_const(float* nocapture %y, float* nocapture readonly %x) { 113; CHECK-LABEL: @vspow_neq025_const 114; CHECK-PWR9: bl __powf4_P9 115; CHECK-PWR8: bl __powf4_P8 116; CHECK: blr 117entry: 118 br label %vector.body 119 120vector.body: 121 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 122 %next.gep = getelementptr float, float* %y, i64 %index 123 %next.gep19 = getelementptr float, float* %x, i64 %index 124 %0 = bitcast float* %next.gep19 to <4 x float>* 125 %wide.load = load <4 x float>, <4 x float>* %0, align 4 126 %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 2.500000e-01, float 0x3FE851EB80000000, float 2.500000e-01>) 127 %2 = bitcast float* %next.gep to <4 x float>* 128 store <4 x float> %1, <4 x float>* %2, align 4 129 %index.next = add i64 %index, 4 130 %3 = icmp eq i64 %index.next, 1024 131 br i1 %3, label %for.end, label %vector.body 132 133for.end: 134 ret void 135} 136 137; Exponent is 0.75 138define void @vspow_075(float* nocapture %y, float* nocapture readonly %x) { 139; CHECK-LABEL: @vspow_075 140; CHECK-NOT: bl __powf4_P{{[8,9]}} 141; CHECK: xvrsqrtesp 142; CHECK: blr 143entry: 144 br label %vector.body 145 146vector.body: 147 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 148 %next.gep = getelementptr float, float* %y, i64 %index 149 %next.gep19 = getelementptr float, float* %x, i64 %index 150 %0 = bitcast float* %next.gep19 to <4 x float>* 151 %wide.load = load <4 x float>, <4 x float>* %0, align 4 152 %1 = call ninf afn <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>) 153 %2 = bitcast float* %next.gep to <4 x float>* 154 store <4 x float> %1, <4 x float>* %2, align 4 155 %index.next = add i64 %index, 4 156 %3 = icmp eq i64 %index.next, 1024 157 br i1 %3, label %for.end, label %vector.body 158 159for.end: 160 ret void 161} 162 163; Exponent is 0.25 164define void @vspow_025(float* nocapture %y, float* nocapture readonly %x) { 165; CHECK-LABEL: @vspow_025 166; CHECK-NOT: bl __powf4_P{{[8,9]}} 167; CHECK: xvrsqrtesp 168; CHECK: blr 169entry: 170 br label %vector.body 171 172vector.body: 173 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 174 %next.gep = getelementptr float, float* %y, i64 %index 175 %next.gep19 = getelementptr float, float* %x, i64 %index 176 %0 = bitcast float* %next.gep19 to <4 x float>* 177 %wide.load = load <4 x float>, <4 x float>* %0, align 4 178 %1 = call ninf afn nsz <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>) 179 %2 = bitcast float* %next.gep to <4 x float>* 180 store <4 x float> %1, <4 x float>* %2, align 4 181 %index.next = add i64 %index, 4 182 %3 = icmp eq i64 %index.next, 1024 183 br i1 %3, label %for.end, label %vector.body 184 185for.end: 186 ret void 187} 188 189; Exponent is 0.75 but no proper fast-math flags 190define void @vspow_075_nofast(float* nocapture %y, float* nocapture readonly %x) { 191; CHECK-LABEL: @vspow_075_nofast 192; CHECK-PWR9: bl __powf4_P9 193; CHECK-PWR8: bl __powf4_P8 194; CHECK-NOT: xvrsqrtesp 195; CHECK: blr 196entry: 197 br label %vector.body 198 199vector.body: 200 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 201 %next.gep = getelementptr float, float* %y, i64 %index 202 %next.gep19 = getelementptr float, float* %x, i64 %index 203 %0 = bitcast float* %next.gep19 to <4 x float>* 204 %wide.load = load <4 x float>, <4 x float>* %0, align 4 205 %1 = call <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>) 206 %2 = bitcast float* %next.gep to <4 x float>* 207 store <4 x float> %1, <4 x float>* %2, align 4 208 %index.next = add i64 %index, 4 209 %3 = icmp eq i64 %index.next, 1024 210 br i1 %3, label %for.end, label %vector.body 211 212for.end: 213 ret void 214} 215 216; Exponent is 0.25 but no proper fast-math flags 217define void @vspow_025_nofast(float* nocapture %y, float* nocapture readonly %x) { 218; CHECK-LABEL: @vspow_025_nofast 219; CHECK-PWR9: bl __powf4_P9 220; CHECK-PWR8: bl __powf4_P8 221; CHECK-NOT: xvrsqrtesp 222; CHECK: blr 223entry: 224 br label %vector.body 225 226vector.body: 227 %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] 228 %next.gep = getelementptr float, float* %y, i64 %index 229 %next.gep19 = getelementptr float, float* %x, i64 %index 230 %0 = bitcast float* %next.gep19 to <4 x float>* 231 %wide.load = load <4 x float>, <4 x float>* %0, align 4 232 %1 = call <4 x float> @__powf4_massv(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>) 233 %2 = bitcast float* %next.gep to <4 x float>* 234 store <4 x float> %1, <4 x float>* %2, align 4 235 %index.next = add i64 %index, 4 236 %3 = icmp eq i64 %index.next, 1024 237 br i1 %3, label %for.end, label %vector.body 238 239for.end: 240 ret void 241} 242 243; Function Attrs: nounwind readnone speculatable willreturn 244declare <4 x float> @__powf4_massv(<4 x float>, <4 x float>) 245