1; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \ 2; RUN: < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s 3 4define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) { 5; CHECK-LABEL: test_select_cc_v8i8_i8: 6; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 7; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 8; CHECK: cmeq [[MASK:v[0-9]+]].8b, v[[LHS]].8b, v[[RHS]].8b 9; CHECK: dup [[DUPMASK:v[0-9]+]].8b, [[MASK]].b[0] 10; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b 11 %cmp31 = icmp eq i8 %a, %b 12 %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d 13 ret <8x i8> %e 14} 15 16define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) { 17; CHECK-LABEL: test_select_cc_v8i8_f32: 18; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s 19; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0] 20; CHECK-NEXT: bsl [[DUPMASK]].8b, v2.8b, v3.8b 21 %cmp31 = fcmp oeq float %a, %b 22 %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d 23 ret <8x i8> %e 24} 25 26define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) { 27; CHECK-LABEL: test_select_cc_v8i8_f64: 28; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1 29; CHECK-NEXT: bsl v[[MASK]].8b, v2.8b, v3.8b 30 %cmp31 = fcmp oeq double %a, %b 31 %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d 32 ret <8x i8> %e 33} 34 35define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) { 36; CHECK-LABEL: test_select_cc_v16i8_i8: 37; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 38; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 39; CHECK: cmeq [[MASK:v[0-9]+]].16b, v[[LHS]].16b, v[[RHS]].16b 40; CHECK: dup [[DUPMASK:v[0-9]+]].16b, [[MASK]].b[0] 41; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b 42 %cmp31 = icmp eq i8 %a, %b 43 %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d 44 ret <16x i8> %e 45} 46 47define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) { 48; CHECK-LABEL: test_select_cc_v16i8_f32: 49; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s 50; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] 51; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b 52 %cmp31 = fcmp oeq float %a, %b 53 %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d 54 ret <16x i8> %e 55} 56 57define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) { 58; CHECK-LABEL: test_select_cc_v16i8_f64: 59; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d 60; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0] 61; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b 62 %cmp31 = fcmp oeq double %a, %b 63 %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d 64 ret <16x i8> %e 65} 66 67define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) { 68; CHECK-LABEL: test_select_cc_v4i16: 69; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 70; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 71; CHECK: cmeq [[MASK:v[0-9]+]].4h, v[[LHS]].4h, v[[RHS]].4h 72; CHECK: dup [[DUPMASK:v[0-9]+]].4h, [[MASK]].h[0] 73; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b 74 %cmp31 = icmp eq i16 %a, %b 75 %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d 76 ret <4x i16> %e 77} 78 79define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) { 80; CHECK-LABEL: test_select_cc_v8i16: 81; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 82; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 83; CHECK: cmeq [[MASK:v[0-9]+]].8h, v[[LHS]].8h, v[[RHS]].8h 84; CHECK: dup [[DUPMASK:v[0-9]+]].8h, [[MASK]].h[0] 85; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b 86 %cmp31 = icmp eq i16 %a, %b 87 %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d 88 ret <8x i16> %e 89} 90 91define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) { 92; CHECK-LABEL: test_select_cc_v2i32: 93; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 94; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 95; CHECK: cmeq [[MASK:v[0-9]+]].2s, v[[LHS]].2s, v[[RHS]].2s 96; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0] 97; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b 98 %cmp31 = icmp eq i32 %a, %b 99 %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d 100 ret <2x i32> %e 101} 102 103define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) { 104; CHECK-LABEL: test_select_cc_v4i32: 105; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 106; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 107; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s 108; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] 109; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b 110 %cmp31 = icmp eq i32 %a, %b 111 %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d 112 ret <4x i32> %e 113} 114 115define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) { 116; CHECK-LABEL: test_select_cc_v1i64: 117; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0 118; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1 119; CHECK: cmeq d[[MASK:[0-9]+]], d[[LHS]], d[[RHS]] 120; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b 121 %cmp31 = icmp eq i64 %a, %b 122 %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d 123 ret <1x i64> %e 124} 125 126define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) { 127; CHECK-LABEL: test_select_cc_v2i64: 128; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0 129; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1 130; CHECK: cmeq [[MASK:v[0-9]+]].2d, v[[LHS]].2d, v[[RHS]].2d 131; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0] 132; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b 133 %cmp31 = icmp eq i64 %a, %b 134 %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d 135 ret <2x i64> %e 136} 137 138define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) { 139; CHECK-LABEL: test_select_cc_v1f32: 140; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s 141; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b 142 %cmp31 = fcmp oeq float %a, %b 143 %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d 144 ret <1 x float> %e 145} 146 147define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) { 148; CHECK-LABEL: test_select_cc_v2f32: 149; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s 150; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0] 151; CHECK: bsl [[DUPMASK]].8b, v2.8b, v3.8b 152 %cmp31 = fcmp oeq float %a, %b 153 %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d 154 ret <2 x float> %e 155} 156 157define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) { 158; CHECK-LABEL: test_select_cc_v4f32: 159; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s 160; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] 161; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b 162 %cmp31 = fcmp oeq float %a, %b 163 %e = select i1 %cmp31, <4x float> %c, <4x float> %d 164 ret <4x float> %e 165} 166 167define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) { 168; CHECK-LABEL: test_select_cc_v4f32_icmp: 169; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0 170; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1 171; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s 172; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] 173; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b 174 %cmp31 = icmp eq i32 %a, %b 175 %e = select i1 %cmp31, <4x float> %c, <4x float> %d 176 ret <4x float> %e 177} 178 179define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) { 180; CHECK-LABEL: test_select_cc_v1f64: 181; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1 182; CHECK: bsl v[[MASK]].8b, v2.8b, v3.8b 183 %cmp31 = fcmp oeq double %a, %b 184 %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d 185 ret <1 x double> %e 186} 187 188define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) { 189; CHECK-LABEL: test_select_cc_v1f64_icmp: 190; CHECK-DAG: fmov [[LHS:d[0-9]+]], x0 191; CHECK-DAG: fmov [[RHS:d[0-9]+]], x1 192; CHECK: cmeq d[[MASK:[0-9]+]], [[LHS]], [[RHS]] 193; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b 194 %cmp31 = icmp eq i64 %a, %b 195 %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d 196 ret <1 x double> %e 197} 198 199define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) { 200; CHECK-LABEL: test_select_cc_v2f64: 201; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d 202; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0] 203; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b 204 %cmp31 = fcmp oeq double %a, %b 205 %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d 206 ret <2 x double> %e 207} 208 209; Special case: when the select condition is an icmp with i1 operands, don't 210; do the comparison on vectors. 211; Part of PR21549. 212define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b) { 213; CHECK-LABEL: test_select_cc_v2i32_icmpi1: 214; CHECK: tst w0, #0x1 215; CHECK: csetm [[MASK:w[0-9]+]], ne 216; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]] 217; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b 218; CHECK: mov v0.16b, [[DUPMASK]].16b 219 %cmp = icmp ne i1 %cc, 0 220 %e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b 221 ret <2 x i32> %e 222} 223 224; Also make sure we support irregular/non-power-of-2 types such as v3f32. 225define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 { 226; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32: 227; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].4s, v2.4s, v3.4s 228; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0] 229; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b 230; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b 231; CHECK-NEXT: ret 232 %cc = fcmp oeq float %c1, %c2 233 %r = select i1 %cc, <3 x float> %a, <3 x float> %b 234 ret <3 x float> %r 235} 236 237define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 { 238; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64: 239; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].2d, v2.2d, v3.2d 240; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0] 241; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b 242; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b 243; CHECK-NEXT: ret 244 %cc = fcmp oeq double %c1, %c2 245 %r = select i1 %cc, <3 x float> %a, <3 x float> %b 246 ret <3 x float> %r 247} 248 249attributes #0 = { nounwind} 250