1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 5declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 6 7declare <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32, <8 x half>, <8 x half>) 8declare <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32, <4 x float>, <4 x float>) 9 10declare <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32, <8 x half>, <8 x half>, <8 x half>, <8 x i1>) 11declare <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32, <4 x float>, <4 x float>, <4 x float>, <4 x i1>) 12 13define arm_aapcs_vfpcc <8 x half> @test_vcmulq_f16(<8 x half> %a, <8 x half> %b) { 14; CHECK-LABEL: test_vcmulq_f16: 15; CHECK: @ %bb.0: @ %entry 16; CHECK-NEXT: vcmul.f16 q0, q0, q1, #0 17; CHECK-NEXT: bx lr 18entry: 19 %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 0, <8 x half> %a, <8 x half> %b) 20 ret <8 x half> %0 21} 22 23define arm_aapcs_vfpcc <4 x float> @test_vcmulq_f32(<4 x float> %a, <4 x float> %b) { 24; CHECK-LABEL: test_vcmulq_f32: 25; CHECK: @ %bb.0: @ %entry 26; CHECK-NEXT: vcmul.f32 q2, q0, q1, #0 27; CHECK-NEXT: vmov q0, q2 28; CHECK-NEXT: bx lr 29entry: 30 %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %a, <4 x float> %b) 31 ret <4 x float> %0 32} 33 34define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot90_f16(<8 x half> %a, <8 x half> %b) { 35; CHECK-LABEL: test_vcmulq_rot90_f16: 36; CHECK: @ %bb.0: @ %entry 37; CHECK-NEXT: vcmul.f16 q0, q0, q1, #90 38; CHECK-NEXT: bx lr 39entry: 40 %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 1, <8 x half> %a, <8 x half> %b) 41 ret <8 x half> %0 42} 43 44define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot90_f32(<4 x float> %a, <4 x float> %b) { 45; CHECK-LABEL: test_vcmulq_rot90_f32: 46; CHECK: @ %bb.0: @ %entry 47; CHECK-NEXT: vcmul.f32 q2, q0, q1, #90 48; CHECK-NEXT: vmov q0, q2 49; CHECK-NEXT: bx lr 50entry: 51 %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 1, <4 x float> %a, <4 x float> %b) 52 ret <4 x float> %0 53} 54 55define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot180_f16(<8 x half> %a, <8 x half> %b) { 56; CHECK-LABEL: test_vcmulq_rot180_f16: 57; CHECK: @ %bb.0: @ %entry 58; CHECK-NEXT: vcmul.f16 q0, q0, q1, #180 59; CHECK-NEXT: bx lr 60entry: 61 %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 2, <8 x half> %a, <8 x half> %b) 62 ret <8 x half> %0 63} 64 65define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot180_f32(<4 x float> %a, <4 x float> %b) { 66; CHECK-LABEL: test_vcmulq_rot180_f32: 67; CHECK: @ %bb.0: @ %entry 68; CHECK-NEXT: vcmul.f32 q2, q0, q1, #180 69; CHECK-NEXT: vmov q0, q2 70; CHECK-NEXT: bx lr 71entry: 72 %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 2, <4 x float> %a, <4 x float> %b) 73 ret <4 x float> %0 74} 75 76define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot270_f16(<8 x half> %a, <8 x half> %b) { 77; CHECK-LABEL: test_vcmulq_rot270_f16: 78; CHECK: @ %bb.0: @ %entry 79; CHECK-NEXT: vcmul.f16 q0, q0, q1, #270 80; CHECK-NEXT: bx lr 81entry: 82 %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 3, <8 x half> %a, <8 x half> %b) 83 ret <8 x half> %0 84} 85 86define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot270_f32(<4 x float> %a, <4 x float> %b) { 87; CHECK-LABEL: test_vcmulq_rot270_f32: 88; CHECK: @ %bb.0: @ %entry 89; CHECK-NEXT: vcmul.f32 q2, q0, q1, #270 90; CHECK-NEXT: vmov q0, q2 91; CHECK-NEXT: bx lr 92entry: 93 %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 3, <4 x float> %a, <4 x float> %b) 94 ret <4 x float> %0 95} 96 97define arm_aapcs_vfpcc <8 x half> @test_vcmulq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { 98; CHECK-LABEL: test_vcmulq_m_f16: 99; CHECK: @ %bb.0: @ %entry 100; CHECK-NEXT: vmsr p0, r0 101; CHECK-NEXT: vpst 102; CHECK-NEXT: vcmult.f16 q0, q1, q2, #0 103; CHECK-NEXT: bx lr 104entry: 105 %0 = zext i16 %p to i32 106 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 107 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 0, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1) 108 ret <8 x half> %2 109} 110 111define arm_aapcs_vfpcc <4 x float> @test_vcmulq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) { 112; CHECK-LABEL: test_vcmulq_m_f32: 113; CHECK: @ %bb.0: @ %entry 114; CHECK-NEXT: vmsr p0, r0 115; CHECK-NEXT: vpst 116; CHECK-NEXT: vcmult.f32 q0, q1, q2, #0 117; CHECK-NEXT: bx lr 118entry: 119 %0 = zext i16 %p to i32 120 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 121 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 0, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1) 122 ret <4 x float> %2 123} 124 125define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot90_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { 126; CHECK-LABEL: test_vcmulq_rot90_m_f16: 127; CHECK: @ %bb.0: @ %entry 128; CHECK-NEXT: vmsr p0, r0 129; CHECK-NEXT: vpst 130; CHECK-NEXT: vcmult.f16 q0, q1, q2, #90 131; CHECK-NEXT: bx lr 132entry: 133 %0 = zext i16 %p to i32 134 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 135 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 1, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1) 136 ret <8 x half> %2 137} 138 139define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot90_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) { 140; CHECK-LABEL: test_vcmulq_rot90_m_f32: 141; CHECK: @ %bb.0: @ %entry 142; CHECK-NEXT: vmsr p0, r0 143; CHECK-NEXT: vpst 144; CHECK-NEXT: vcmult.f32 q0, q1, q2, #90 145; CHECK-NEXT: bx lr 146entry: 147 %0 = zext i16 %p to i32 148 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 149 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 1, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1) 150 ret <4 x float> %2 151} 152 153define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot180_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { 154; CHECK-LABEL: test_vcmulq_rot180_m_f16: 155; CHECK: @ %bb.0: @ %entry 156; CHECK-NEXT: vmsr p0, r0 157; CHECK-NEXT: vpst 158; CHECK-NEXT: vcmult.f16 q0, q1, q2, #180 159; CHECK-NEXT: bx lr 160entry: 161 %0 = zext i16 %p to i32 162 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 163 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 2, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1) 164 ret <8 x half> %2 165} 166 167define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot180_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) { 168; CHECK-LABEL: test_vcmulq_rot180_m_f32: 169; CHECK: @ %bb.0: @ %entry 170; CHECK-NEXT: vmsr p0, r0 171; CHECK-NEXT: vpst 172; CHECK-NEXT: vcmult.f32 q0, q1, q2, #180 173; CHECK-NEXT: bx lr 174entry: 175 %0 = zext i16 %p to i32 176 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 177 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 2, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1) 178 ret <4 x float> %2 179} 180 181define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot270_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) { 182; CHECK-LABEL: test_vcmulq_rot270_m_f16: 183; CHECK: @ %bb.0: @ %entry 184; CHECK-NEXT: vmsr p0, r0 185; CHECK-NEXT: vpst 186; CHECK-NEXT: vcmult.f16 q0, q1, q2, #270 187; CHECK-NEXT: bx lr 188entry: 189 %0 = zext i16 %p to i32 190 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 191 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 3, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1) 192 ret <8 x half> %2 193} 194 195define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot270_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) { 196; CHECK-LABEL: test_vcmulq_rot270_m_f32: 197; CHECK: @ %bb.0: @ %entry 198; CHECK-NEXT: vmsr p0, r0 199; CHECK-NEXT: vpst 200; CHECK-NEXT: vcmult.f32 q0, q1, q2, #270 201; CHECK-NEXT: bx lr 202entry: 203 %0 = zext i16 %p to i32 204 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 205 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 3, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1) 206 ret <4 x float> %2 207} 208 209define arm_aapcs_vfpcc <8 x half> @test_vcmulq_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) { 210; CHECK-LABEL: test_vcmulq_x_f16: 211; CHECK: @ %bb.0: @ %entry 212; CHECK-NEXT: vmsr p0, r0 213; CHECK-NEXT: vpst 214; CHECK-NEXT: vcmult.f16 q0, q0, q1, #0 215; CHECK-NEXT: bx lr 216entry: 217 %0 = zext i16 %p to i32 218 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 219 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 0, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1) 220 ret <8 x half> %2 221} 222 223define arm_aapcs_vfpcc <4 x float> @test_vcmulq_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) { 224; CHECK-LABEL: test_vcmulq_x_f32: 225; CHECK: @ %bb.0: @ %entry 226; CHECK-NEXT: vmsr p0, r0 227; CHECK-NEXT: vpst 228; CHECK-NEXT: vcmult.f32 q2, q0, q1, #0 229; CHECK-NEXT: vmov q0, q2 230; CHECK-NEXT: bx lr 231entry: 232 %0 = zext i16 %p to i32 233 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 234 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 0, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1) 235 ret <4 x float> %2 236} 237 238define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot90_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) { 239; CHECK-LABEL: test_vcmulq_rot90_x_f16: 240; CHECK: @ %bb.0: @ %entry 241; CHECK-NEXT: vmsr p0, r0 242; CHECK-NEXT: vpst 243; CHECK-NEXT: vcmult.f16 q0, q0, q1, #90 244; CHECK-NEXT: bx lr 245entry: 246 %0 = zext i16 %p to i32 247 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 248 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 1, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1) 249 ret <8 x half> %2 250} 251 252define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot90_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) { 253; CHECK-LABEL: test_vcmulq_rot90_x_f32: 254; CHECK: @ %bb.0: @ %entry 255; CHECK-NEXT: vmsr p0, r0 256; CHECK-NEXT: vpst 257; CHECK-NEXT: vcmult.f32 q2, q0, q1, #90 258; CHECK-NEXT: vmov q0, q2 259; CHECK-NEXT: bx lr 260entry: 261 %0 = zext i16 %p to i32 262 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 263 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 1, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1) 264 ret <4 x float> %2 265} 266 267define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot180_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) { 268; CHECK-LABEL: test_vcmulq_rot180_x_f16: 269; CHECK: @ %bb.0: @ %entry 270; CHECK-NEXT: vmsr p0, r0 271; CHECK-NEXT: vpst 272; CHECK-NEXT: vcmult.f16 q0, q0, q1, #180 273; CHECK-NEXT: bx lr 274entry: 275 %0 = zext i16 %p to i32 276 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 277 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 2, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1) 278 ret <8 x half> %2 279} 280 281define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot180_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) { 282; CHECK-LABEL: test_vcmulq_rot180_x_f32: 283; CHECK: @ %bb.0: @ %entry 284; CHECK-NEXT: vmsr p0, r0 285; CHECK-NEXT: vpst 286; CHECK-NEXT: vcmult.f32 q2, q0, q1, #180 287; CHECK-NEXT: vmov q0, q2 288; CHECK-NEXT: bx lr 289entry: 290 %0 = zext i16 %p to i32 291 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 292 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 2, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1) 293 ret <4 x float> %2 294} 295 296define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot270_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) { 297; CHECK-LABEL: test_vcmulq_rot270_x_f16: 298; CHECK: @ %bb.0: @ %entry 299; CHECK-NEXT: vmsr p0, r0 300; CHECK-NEXT: vpst 301; CHECK-NEXT: vcmult.f16 q0, q0, q1, #270 302; CHECK-NEXT: bx lr 303entry: 304 %0 = zext i16 %p to i32 305 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 306 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 3, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1) 307 ret <8 x half> %2 308} 309 310define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot270_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) { 311; CHECK-LABEL: test_vcmulq_rot270_x_f32: 312; CHECK: @ %bb.0: @ %entry 313; CHECK-NEXT: vmsr p0, r0 314; CHECK-NEXT: vpst 315; CHECK-NEXT: vcmult.f32 q2, q0, q1, #270 316; CHECK-NEXT: vmov q0, q2 317; CHECK-NEXT: bx lr 318entry: 319 %0 = zext i16 %p to i32 320 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 321 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 3, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1) 322 ret <4 x float> %2 323} 324