1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \ 3; RUN: < %s -verify-machineinstrs | FileCheck %s 4 5define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) { 6; CHECK-LABEL: test_select_cc_v8i8_i8: 7; CHECK: // %bb.0: 8; CHECK-NEXT: fmov s2, w1 9; CHECK-NEXT: fmov s3, w0 10; CHECK-NEXT: cmeq v2.8b, v3.8b, v2.8b 11; CHECK-NEXT: dup v2.8b, v2.b[0] 12; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 13; CHECK-NEXT: ret 14 %cmp31 = icmp eq i8 %a, %b 15 %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d 16 ret <8x i8> %e 17} 18 19define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) { 20; CHECK-LABEL: test_select_cc_v8i8_f32: 21; CHECK: // %bb.0: 22; CHECK-NEXT: // kill: def $s0 killed $s0 def $d0 23; CHECK-NEXT: // kill: def $s1 killed $s1 def $d1 24; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s 25; CHECK-NEXT: dup v0.2s, v0.s[0] 26; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b 27; CHECK-NEXT: ret 28 %cmp31 = fcmp oeq float %a, %b 29 %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d 30 ret <8x i8> %e 31} 32 33define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) { 34; CHECK-LABEL: test_select_cc_v8i8_f64: 35; CHECK: // %bb.0: 36; CHECK-NEXT: fcmeq d0, d0, d1 37; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b 38; CHECK-NEXT: ret 39 %cmp31 = fcmp oeq double %a, %b 40 %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d 41 ret <8x i8> %e 42} 43 44define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) { 45; CHECK-LABEL: test_select_cc_v16i8_i8: 46; CHECK: // %bb.0: 47; CHECK-NEXT: fmov s2, w1 48; CHECK-NEXT: fmov s3, w0 49; CHECK-NEXT: cmeq v2.16b, v3.16b, v2.16b 50; CHECK-NEXT: dup v2.16b, v2.b[0] 51; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 52; CHECK-NEXT: ret 53 %cmp31 = icmp eq i8 %a, %b 54 %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d 55 ret <16x i8> %e 56} 57 58define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) { 59; CHECK-LABEL: test_select_cc_v16i8_f32: 60; CHECK: // %bb.0: 61; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 62; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 63; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s 64; CHECK-NEXT: dup v0.4s, v0.s[0] 65; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b 66; CHECK-NEXT: ret 67 %cmp31 = fcmp oeq float %a, %b 68 %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d 69 ret <16x i8> %e 70} 71 72define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) { 73; CHECK-LABEL: test_select_cc_v16i8_f64: 74; CHECK: // %bb.0: 75; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 76; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 77; CHECK-NEXT: fcmeq v0.2d, v0.2d, v1.2d 78; CHECK-NEXT: dup v0.2d, v0.d[0] 79; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b 80; CHECK-NEXT: ret 81 %cmp31 = fcmp oeq double %a, %b 82 %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d 83 ret <16x i8> %e 84} 85 86define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) { 87; CHECK-LABEL: test_select_cc_v4i16: 88; CHECK: // %bb.0: 89; CHECK-NEXT: fmov s2, w1 90; CHECK-NEXT: fmov s3, w0 91; CHECK-NEXT: cmeq v2.4h, v3.4h, v2.4h 92; CHECK-NEXT: dup v2.4h, v2.h[0] 93; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 94; CHECK-NEXT: ret 95 %cmp31 = icmp eq i16 %a, %b 96 %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d 97 ret <4x i16> %e 98} 99 100define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) { 101; CHECK-LABEL: test_select_cc_v8i16: 102; CHECK: // %bb.0: 103; CHECK-NEXT: fmov s2, w1 104; CHECK-NEXT: fmov s3, w0 105; CHECK-NEXT: cmeq v2.8h, v3.8h, v2.8h 106; CHECK-NEXT: dup v2.8h, v2.h[0] 107; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 108; CHECK-NEXT: ret 109 %cmp31 = icmp eq i16 %a, %b 110 %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d 111 ret <8x i16> %e 112} 113 114define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) { 115; CHECK-LABEL: test_select_cc_v2i32: 116; CHECK: // %bb.0: 117; CHECK-NEXT: fmov s2, w1 118; CHECK-NEXT: fmov s3, w0 119; CHECK-NEXT: cmeq v2.2s, v3.2s, v2.2s 120; CHECK-NEXT: dup v2.2s, v2.s[0] 121; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 122; CHECK-NEXT: ret 123 %cmp31 = icmp eq i32 %a, %b 124 %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d 125 ret <2x i32> %e 126} 127 128define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) { 129; CHECK-LABEL: test_select_cc_v4i32: 130; CHECK: // %bb.0: 131; CHECK-NEXT: fmov s2, w1 132; CHECK-NEXT: fmov s3, w0 133; CHECK-NEXT: cmeq v2.4s, v3.4s, v2.4s 134; CHECK-NEXT: dup v2.4s, v2.s[0] 135; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 136; CHECK-NEXT: ret 137 %cmp31 = icmp eq i32 %a, %b 138 %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d 139 ret <4x i32> %e 140} 141 142define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) { 143; CHECK-LABEL: test_select_cc_v1i64: 144; CHECK: // %bb.0: 145; CHECK-NEXT: fmov d2, x1 146; CHECK-NEXT: fmov d3, x0 147; CHECK-NEXT: cmeq d2, d3, d2 148; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 149; CHECK-NEXT: ret 150 %cmp31 = icmp eq i64 %a, %b 151 %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d 152 ret <1x i64> %e 153} 154 155define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) { 156; CHECK-LABEL: test_select_cc_v2i64: 157; CHECK: // %bb.0: 158; CHECK-NEXT: fmov d2, x1 159; CHECK-NEXT: fmov d3, x0 160; CHECK-NEXT: cmeq v2.2d, v3.2d, v2.2d 161; CHECK-NEXT: dup v2.2d, v2.d[0] 162; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 163; CHECK-NEXT: ret 164 %cmp31 = icmp eq i64 %a, %b 165 %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d 166 ret <2x i64> %e 167} 168 169define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) { 170; CHECK-LABEL: test_select_cc_v1f32: 171; CHECK: // %bb.0: 172; CHECK-NEXT: // kill: def $s0 killed $s0 def $d0 173; CHECK-NEXT: // kill: def $s1 killed $s1 def $d1 174; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s 175; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b 176; CHECK-NEXT: ret 177 %cmp31 = fcmp oeq float %a, %b 178 %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d 179 ret <1 x float> %e 180} 181 182define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) { 183; CHECK-LABEL: test_select_cc_v2f32: 184; CHECK: // %bb.0: 185; CHECK-NEXT: // kill: def $s0 killed $s0 def $d0 186; CHECK-NEXT: // kill: def $s1 killed $s1 def $d1 187; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s 188; CHECK-NEXT: dup v0.2s, v0.s[0] 189; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b 190; CHECK-NEXT: ret 191 %cmp31 = fcmp oeq float %a, %b 192 %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d 193 ret <2 x float> %e 194} 195 196define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) { 197; CHECK-LABEL: test_select_cc_v4f32: 198; CHECK: // %bb.0: 199; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 200; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 201; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s 202; CHECK-NEXT: dup v0.4s, v0.s[0] 203; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b 204; CHECK-NEXT: ret 205 %cmp31 = fcmp oeq float %a, %b 206 %e = select i1 %cmp31, <4x float> %c, <4x float> %d 207 ret <4x float> %e 208} 209 210define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) { 211; CHECK-LABEL: test_select_cc_v4f32_icmp: 212; CHECK: // %bb.0: 213; CHECK-NEXT: fmov s2, w1 214; CHECK-NEXT: fmov s3, w0 215; CHECK-NEXT: cmeq v2.4s, v3.4s, v2.4s 216; CHECK-NEXT: dup v2.4s, v2.s[0] 217; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 218; CHECK-NEXT: ret 219 %cmp31 = icmp eq i32 %a, %b 220 %e = select i1 %cmp31, <4x float> %c, <4x float> %d 221 ret <4x float> %e 222} 223 224define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) { 225; CHECK-LABEL: test_select_cc_v1f64: 226; CHECK: // %bb.0: 227; CHECK-NEXT: fcmeq d0, d0, d1 228; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b 229; CHECK-NEXT: ret 230 %cmp31 = fcmp oeq double %a, %b 231 %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d 232 ret <1 x double> %e 233} 234 235define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) { 236; CHECK-LABEL: test_select_cc_v1f64_icmp: 237; CHECK: // %bb.0: 238; CHECK-NEXT: fmov d2, x1 239; CHECK-NEXT: fmov d3, x0 240; CHECK-NEXT: cmeq d2, d3, d2 241; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 242; CHECK-NEXT: ret 243 %cmp31 = icmp eq i64 %a, %b 244 %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d 245 ret <1 x double> %e 246} 247 248define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) { 249; CHECK-LABEL: test_select_cc_v2f64: 250; CHECK: // %bb.0: 251; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 252; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 253; CHECK-NEXT: fcmeq v0.2d, v0.2d, v1.2d 254; CHECK-NEXT: dup v0.2d, v0.d[0] 255; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b 256; CHECK-NEXT: ret 257 %cmp31 = fcmp oeq double %a, %b 258 %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d 259 ret <2 x double> %e 260} 261 262; Special case: when the select condition is an icmp with i1 operands, don't 263; do the comparison on vectors. 264; Part of PR21549. 265define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b) { 266; CHECK-LABEL: test_select_cc_v2i32_icmpi1: 267; CHECK: // %bb.0: 268; CHECK-NEXT: tst w0, #0x1 269; CHECK-NEXT: csetm w8, ne 270; CHECK-NEXT: dup v2.2s, w8 271; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 272; CHECK-NEXT: ret 273 %cmp = icmp ne i1 %cc, 0 274 %e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b 275 ret <2 x i32> %e 276} 277 278; Also make sure we support irregular/non-power-of-2 types such as v3f32. 279define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 { 280; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32: 281; CHECK: // %bb.0: 282; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2 283; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3 284; CHECK-NEXT: fcmeq v2.4s, v2.4s, v3.4s 285; CHECK-NEXT: dup v2.4s, v2.s[0] 286; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 287; CHECK-NEXT: ret 288 %cc = fcmp oeq float %c1, %c2 289 %r = select i1 %cc, <3 x float> %a, <3 x float> %b 290 ret <3 x float> %r 291} 292 293define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 { 294; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64: 295; CHECK: // %bb.0: 296; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 297; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 298; CHECK-NEXT: fcmeq v2.2d, v2.2d, v3.2d 299; CHECK-NEXT: dup v2.2d, v2.d[0] 300; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 301; CHECK-NEXT: ret 302 %cc = fcmp oeq double %c1, %c2 303 %r = select i1 %cc, <3 x float> %a, <3 x float> %b 304 ret <3 x float> %r 305} 306 307attributes #0 = { nounwind} 308