1; Test vector intrinsics added with z14. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s 4 5declare <2 x i64> @llvm.s390.vbperm(<16 x i8>, <16 x i8>) 6declare <16 x i8> @llvm.s390.vmslg(<2 x i64>, <2 x i64>, <16 x i8>, i32) 7declare <16 x i8> @llvm.s390.vlrl(i32, i8 *) 8declare void @llvm.s390.vstrl(<16 x i8>, i32, i8 *) 9 10declare {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float>, <4 x float>) 11declare {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float>, <4 x float>) 12declare {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float>, <4 x float>) 13declare {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float>, i32) 14declare <4 x float> @llvm.s390.vfisb(<4 x float>, i32, i32) 15 16declare <2 x double> @llvm.s390.vfmaxdb(<2 x double>, <2 x double>, i32) 17declare <2 x double> @llvm.s390.vfmindb(<2 x double>, <2 x double>, i32) 18declare <4 x float> @llvm.s390.vfmaxsb(<4 x float>, <4 x float>, i32) 19declare <4 x float> @llvm.s390.vfminsb(<4 x float>, <4 x float>, i32) 20 21; VBPERM. 22define <2 x i64> @test_vbperm(<16 x i8> %a, <16 x i8> %b) { 23; CHECK-LABEL: test_vbperm: 24; CHECK: vbperm %v24, %v24, %v26 25; CHECK: br %r14 26 %res = call <2 x i64> @llvm.s390.vbperm(<16 x i8> %a, <16 x i8> %b) 27 ret <2 x i64> %res 28} 29 30; VMSLG with no shifts. 31define <16 x i8> @test_vmslg1(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) { 32; CHECK-LABEL: test_vmslg1: 33; CHECK: vmslg %v24, %v24, %v26, %v28, 0 34; CHECK: br %r14 35 %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 0) 36 ret <16 x i8> %res 37} 38 39; VMSLG with both shifts. 40define <16 x i8> @test_vmslg2(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) { 41; CHECK-LABEL: test_vmslg2: 42; CHECK: vmslg %v24, %v24, %v26, %v28, 12 43; CHECK: br %r14 44 %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 12) 45 ret <16 x i8> %res 46} 47 48; VLRLR with the lowest in-range displacement. 49define <16 x i8> @test_vlrlr1(i8 *%ptr, i32 %length) { 50; CHECK-LABEL: test_vlrlr1: 51; CHECK: vlrlr %v24, %r3, 0(%r2) 52; CHECK: br %r14 53 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr) 54 ret <16 x i8> %res 55} 56 57; VLRLR with the highest in-range displacement. 58define <16 x i8> @test_vlrlr2(i8 *%base, i32 %length) { 59; CHECK-LABEL: test_vlrlr2: 60; CHECK: vlrlr %v24, %r3, 4095(%r2) 61; CHECK: br %r14 62 %ptr = getelementptr i8, i8 *%base, i64 4095 63 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr) 64 ret <16 x i8> %res 65} 66 67; VLRLR with an out-of-range displacement. 68define <16 x i8> @test_vlrlr3(i8 *%base, i32 %length) { 69; CHECK-LABEL: test_vlrlr3: 70; CHECK: vlrlr %v24, %r3, 0({{%r[1-5]}}) 71; CHECK: br %r14 72 %ptr = getelementptr i8, i8 *%base, i64 4096 73 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr) 74 ret <16 x i8> %res 75} 76 77; Check that VLRLR doesn't allow an index. 78define <16 x i8> @test_vlrlr4(i8 *%base, i64 %index, i32 %length) { 79; CHECK-LABEL: test_vlrlr4: 80; CHECK: vlrlr %v24, %r4, 0({{%r[1-5]}}) 81; CHECK: br %r14 82 %ptr = getelementptr i8, i8 *%base, i64 %index 83 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr) 84 ret <16 x i8> %res 85} 86 87; VLRL with the lowest in-range displacement. 88define <16 x i8> @test_vlrl1(i8 *%ptr) { 89; CHECK-LABEL: test_vlrl1: 90; CHECK: vlrl %v24, 0(%r2), 0 91; CHECK: br %r14 92 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr) 93 ret <16 x i8> %res 94} 95 96; VLRL with the highest in-range displacement. 97define <16 x i8> @test_vlrl2(i8 *%base) { 98; CHECK-LABEL: test_vlrl2: 99; CHECK: vlrl %v24, 4095(%r2), 0 100; CHECK: br %r14 101 %ptr = getelementptr i8, i8 *%base, i64 4095 102 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr) 103 ret <16 x i8> %res 104} 105 106; VLRL with an out-of-range displacement. 107define <16 x i8> @test_vlrl3(i8 *%base) { 108; CHECK-LABEL: test_vlrl3: 109; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0 110; CHECK: br %r14 111 %ptr = getelementptr i8, i8 *%base, i64 4096 112 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr) 113 ret <16 x i8> %res 114} 115 116; Check that VLRL doesn't allow an index. 117define <16 x i8> @test_vlrl4(i8 *%base, i64 %index) { 118; CHECK-LABEL: test_vlrl4: 119; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0 120; CHECK: br %r14 121 %ptr = getelementptr i8, i8 *%base, i64 %index 122 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr) 123 ret <16 x i8> %res 124} 125 126; VLRL with length >= 15 should become VL. 127define <16 x i8> @test_vlrl5(i8 *%ptr) { 128; CHECK-LABEL: test_vlrl5: 129; CHECK: vl %v24, 0({{%r[1-5]}}) 130; CHECK: br %r14 131 %res = call <16 x i8> @llvm.s390.vlrl(i32 15, i8 *%ptr) 132 ret <16 x i8> %res 133} 134 135; VSTRLR with the lowest in-range displacement. 136define void @test_vstrlr1(<16 x i8> %vec, i8 *%ptr, i32 %length) { 137; CHECK-LABEL: test_vstrlr1: 138; CHECK: vstrlr %v24, %r3, 0(%r2) 139; CHECK: br %r14 140 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr) 141 ret void 142} 143 144; VSTRLR with the highest in-range displacement. 145define void @test_vstrlr2(<16 x i8> %vec, i8 *%base, i32 %length) { 146; CHECK-LABEL: test_vstrlr2: 147; CHECK: vstrlr %v24, %r3, 4095(%r2) 148; CHECK: br %r14 149 %ptr = getelementptr i8, i8 *%base, i64 4095 150 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr) 151 ret void 152} 153 154; VSTRLR with an out-of-range displacement. 155define void @test_vstrlr3(<16 x i8> %vec, i8 *%base, i32 %length) { 156; CHECK-LABEL: test_vstrlr3: 157; CHECK: vstrlr %v24, %r3, 0({{%r[1-5]}}) 158; CHECK: br %r14 159 %ptr = getelementptr i8, i8 *%base, i64 4096 160 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr) 161 ret void 162} 163 164; Check that VSTRLR doesn't allow an index. 165define void @test_vstrlr4(<16 x i8> %vec, i8 *%base, i64 %index, i32 %length) { 166; CHECK-LABEL: test_vstrlr4: 167; CHECK: vstrlr %v24, %r4, 0({{%r[1-5]}}) 168; CHECK: br %r14 169 %ptr = getelementptr i8, i8 *%base, i64 %index 170 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr) 171 ret void 172} 173 174; VSTRL with the lowest in-range displacement. 175define void @test_vstrl1(<16 x i8> %vec, i8 *%ptr) { 176; CHECK-LABEL: test_vstrl1: 177; CHECK: vstrl %v24, 0(%r2), 8 178; CHECK: br %r14 179 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr) 180 ret void 181} 182 183; VSTRL with the highest in-range displacement. 184define void @test_vstrl2(<16 x i8> %vec, i8 *%base) { 185; CHECK-LABEL: test_vstrl2: 186; CHECK: vstrl %v24, 4095(%r2), 8 187; CHECK: br %r14 188 %ptr = getelementptr i8, i8 *%base, i64 4095 189 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr) 190 ret void 191} 192 193; VSTRL with an out-of-range displacement. 194define void @test_vstrl3(<16 x i8> %vec, i8 *%base) { 195; CHECK-LABEL: test_vstrl3: 196; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8 197; CHECK: br %r14 198 %ptr = getelementptr i8, i8 *%base, i64 4096 199 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr) 200 ret void 201} 202 203; Check that VSTRL doesn't allow an index. 204define void @test_vstrl4(<16 x i8> %vec, i8 *%base, i64 %index) { 205; CHECK-LABEL: test_vstrl4: 206; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8 207; CHECK: br %r14 208 %ptr = getelementptr i8, i8 *%base, i64 %index 209 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr) 210 ret void 211} 212 213; VSTRL with length >= 15 should become VST. 214define void @test_vstrl5(<16 x i8> %vec, i8 *%ptr) { 215; CHECK-LABEL: test_vstrl5: 216; CHECK: vst %v24, 0({{%r[1-5]}}) 217; CHECK: br %r14 218 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 15, i8 *%ptr) 219 ret void 220} 221 222; VFCESBS with no processing of the result. 223define i32 @test_vfcesbs(<4 x float> %a, <4 x float> %b) { 224; CHECK-LABEL: test_vfcesbs: 225; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26 226; CHECK: ipm %r2 227; CHECK: srl %r2, 28 228; CHECK: br %r14 229 %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a, 230 <4 x float> %b) 231 %res = extractvalue {<4 x i32>, i32} %call, 1 232 ret i32 %res 233} 234 235; VFCESBS, returning 1 if any elements are equal (CC != 3). 236define i32 @test_vfcesbs_any_bool(<4 x float> %a, <4 x float> %b) { 237; CHECK-LABEL: test_vfcesbs_any_bool: 238; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26 239; CHECK: lhi %r2, 0 240; CHECK: lochile %r2, 1 241; CHECK: br %r14 242 %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a, 243 <4 x float> %b) 244 %res = extractvalue {<4 x i32>, i32} %call, 1 245 %cmp = icmp ne i32 %res, 3 246 %ext = zext i1 %cmp to i32 247 ret i32 %ext 248} 249 250; VFCESBS, storing to %ptr if any elements are equal. 251define <4 x i32> @test_vfcesbs_any_store(<4 x float> %a, <4 x float> %b, 252 i32 *%ptr) { 253; CHECK-LABEL: test_vfcesbs_any_store: 254; CHECK-NOT: %r 255; CHECK: vfcesbs %v24, %v24, %v26 256; CHECK-NEXT: {{bor|bnler}} %r14 257; CHECK: mvhi 0(%r2), 0 258; CHECK: br %r14 259 %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a, 260 <4 x float> %b) 261 %res = extractvalue {<4 x i32>, i32} %call, 0 262 %cc = extractvalue {<4 x i32>, i32} %call, 1 263 %cmp = icmp ule i32 %cc, 2 264 br i1 %cmp, label %store, label %exit 265 266store: 267 store i32 0, i32 *%ptr 268 br label %exit 269 270exit: 271 ret <4 x i32> %res 272} 273 274; VFCHSBS with no processing of the result. 275define i32 @test_vfchsbs(<4 x float> %a, <4 x float> %b) { 276; CHECK-LABEL: test_vfchsbs: 277; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26 278; CHECK: ipm %r2 279; CHECK: srl %r2, 28 280; CHECK: br %r14 281 %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a, 282 <4 x float> %b) 283 %res = extractvalue {<4 x i32>, i32} %call, 1 284 ret i32 %res 285} 286 287; VFCHSBS, returning 1 if not all elements are higher. 288define i32 @test_vfchsbs_notall_bool(<4 x float> %a, <4 x float> %b) { 289; CHECK-LABEL: test_vfchsbs_notall_bool: 290; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26 291; CHECK: lhi %r2, 0 292; CHECK: lochinhe %r2, 1 293; CHECK: br %r14 294 %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a, 295 <4 x float> %b) 296 %res = extractvalue {<4 x i32>, i32} %call, 1 297 %cmp = icmp sge i32 %res, 1 298 %ext = zext i1 %cmp to i32 299 ret i32 %ext 300} 301 302; VFCHSBS, storing to %ptr if not all elements are higher. 303define <4 x i32> @test_vfchsbs_notall_store(<4 x float> %a, <4 x float> %b, 304 i32 *%ptr) { 305; CHECK-LABEL: test_vfchsbs_notall_store: 306; CHECK-NOT: %r 307; CHECK: vfchsbs %v24, %v24, %v26 308; CHECK-NEXT: {{bher|ber}} %r14 309; CHECK: mvhi 0(%r2), 0 310; CHECK: br %r14 311 %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a, 312 <4 x float> %b) 313 %res = extractvalue {<4 x i32>, i32} %call, 0 314 %cc = extractvalue {<4 x i32>, i32} %call, 1 315 %cmp = icmp ugt i32 %cc, 0 316 br i1 %cmp, label %store, label %exit 317 318store: 319 store i32 0, i32 *%ptr 320 br label %exit 321 322exit: 323 ret <4 x i32> %res 324} 325 326; VFCHESBS with no processing of the result. 327define i32 @test_vfchesbs(<4 x float> %a, <4 x float> %b) { 328; CHECK-LABEL: test_vfchesbs: 329; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26 330; CHECK: ipm %r2 331; CHECK: srl %r2, 28 332; CHECK: br %r14 333 %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a, 334 <4 x float> %b) 335 %res = extractvalue {<4 x i32>, i32} %call, 1 336 ret i32 %res 337} 338 339; VFCHESBS, returning 1 if neither element is higher or equal. 340define i32 @test_vfchesbs_none_bool(<4 x float> %a, <4 x float> %b) { 341; CHECK-LABEL: test_vfchesbs_none_bool: 342; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26 343; CHECK: lhi %r2, 0 344; CHECK: lochio %r2, 1 345; CHECK: br %r14 346 %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a, 347 <4 x float> %b) 348 %res = extractvalue {<4 x i32>, i32} %call, 1 349 %cmp = icmp eq i32 %res, 3 350 %ext = zext i1 %cmp to i32 351 ret i32 %ext 352} 353 354; VFCHESBS, storing to %ptr if neither element is higher or equal. 355define <4 x i32> @test_vfchesbs_none_store(<4 x float> %a, <4 x float> %b, 356 i32 *%ptr) { 357; CHECK-LABEL: test_vfchesbs_none_store: 358; CHECK-NOT: %r 359; CHECK: vfchesbs %v24, %v24, %v26 360; CHECK-NEXT: {{bnor|bler}} %r14 361; CHECK: mvhi 0(%r2), 0 362; CHECK: br %r14 363 %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a, 364 <4 x float> %b) 365 %res = extractvalue {<4 x i32>, i32} %call, 0 366 %cc = extractvalue {<4 x i32>, i32} %call, 1 367 %cmp = icmp uge i32 %cc, 3 368 br i1 %cmp, label %store, label %exit 369 370store: 371 store i32 0, i32 *%ptr 372 br label %exit 373 374exit: 375 ret <4 x i32> %res 376} 377 378; VFTCISB with the lowest useful class selector and no processing of the result. 379define i32 @test_vftcisb(<4 x float> %a) { 380; CHECK-LABEL: test_vftcisb: 381; CHECK: vftcisb {{%v[0-9]+}}, %v24, 1 382; CHECK: ipm %r2 383; CHECK: srl %r2, 28 384; CHECK: br %r14 385 %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 1) 386 %res = extractvalue {<4 x i32>, i32} %call, 1 387 ret i32 %res 388} 389 390; VFTCISB with the highest useful class selector, returning 1 if all elements 391; have the right class (CC == 0). 392define i32 @test_vftcisb_all_bool(<4 x float> %a) { 393; CHECK-LABEL: test_vftcisb_all_bool: 394; CHECK: vftcisb {{%v[0-9]+}}, %v24, 4094 395; CHECK: lhi %r2, 0 396; CHECK: lochie %r2, 1 397; CHECK: br %r14 398 %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 4094) 399 %res = extractvalue {<4 x i32>, i32} %call, 1 400 %cmp = icmp eq i32 %res, 0 401 %ext = zext i1 %cmp to i32 402 ret i32 %ext 403} 404 405; VFISB with a rounding mode not usable via standard intrinsics. 406define <4 x float> @test_vfisb_0_4(<4 x float> %a) { 407; CHECK-LABEL: test_vfisb_0_4: 408; CHECK: vfisb %v24, %v24, 0, 4 409; CHECK: br %r14 410 %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 0, i32 4) 411 ret <4 x float> %res 412} 413 414; VFISB with IEEE-inexact exception suppressed. 415define <4 x float> @test_vfisb_4_0(<4 x float> %a) { 416; CHECK-LABEL: test_vfisb_4_0: 417; CHECK: vfisb %v24, %v24, 4, 0 418; CHECK: br %r14 419 %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 4, i32 0) 420 ret <4 x float> %res 421} 422 423; VFMAXDB. 424define <2 x double> @test_vfmaxdb(<2 x double> %a, <2 x double> %b) { 425; CHECK-LABEL: test_vfmaxdb: 426; CHECK: vfmaxdb %v24, %v24, %v26, 4 427; CHECK: br %r14 428 %res = call <2 x double> @llvm.s390.vfmaxdb(<2 x double> %a, <2 x double> %b, i32 4) 429 ret <2 x double> %res 430} 431 432; VFMINDB. 433define <2 x double> @test_vfmindb(<2 x double> %a, <2 x double> %b) { 434; CHECK-LABEL: test_vfmindb: 435; CHECK: vfmindb %v24, %v24, %v26, 4 436; CHECK: br %r14 437 %res = call <2 x double> @llvm.s390.vfmindb(<2 x double> %a, <2 x double> %b, i32 4) 438 ret <2 x double> %res 439} 440 441; VFMAXSB. 442define <4 x float> @test_vfmaxsb(<4 x float> %a, <4 x float> %b) { 443; CHECK-LABEL: test_vfmaxsb: 444; CHECK: vfmaxsb %v24, %v24, %v26, 4 445; CHECK: br %r14 446 %res = call <4 x float> @llvm.s390.vfmaxsb(<4 x float> %a, <4 x float> %b, i32 4) 447 ret <4 x float> %res 448} 449 450; VFMINSB. 451define <4 x float> @test_vfminsb(<4 x float> %a, <4 x float> %b) { 452; CHECK-LABEL: test_vfminsb: 453; CHECK: vfminsb %v24, %v24, %v26, 4 454; CHECK: br %r14 455 %res = call <4 x float> @llvm.s390.vfminsb(<4 x float> %a, <4 x float> %b, i32 4) 456 ret <4 x float> %res 457} 458 459