1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX 4 5define i1 @fcmp_and_v2f64(<2 x double> %a) { 6; SSE-LABEL: @fcmp_and_v2f64( 7; SSE-NEXT: [[E1:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0 8; SSE-NEXT: [[E2:%.*]] = extractelement <2 x double> [[A]], i32 1 9; SSE-NEXT: [[CMP1:%.*]] = fcmp olt double [[E1]], 4.200000e+01 10; SSE-NEXT: [[CMP2:%.*]] = fcmp olt double [[E2]], -8.000000e+00 11; SSE-NEXT: [[R:%.*]] = and i1 [[CMP1]], [[CMP2]] 12; SSE-NEXT: ret i1 [[R]] 13; 14; AVX-LABEL: @fcmp_and_v2f64( 15; AVX-NEXT: [[TMP1:%.*]] = fcmp olt <2 x double> [[A:%.*]], <double 4.200000e+01, double -8.000000e+00> 16; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> undef, <2 x i32> <i32 1, i32 undef> 17; AVX-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[SHIFT]] 18; AVX-NEXT: [[R:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0 19; AVX-NEXT: ret i1 [[R]] 20; 21 %e1 = extractelement <2 x double> %a, i32 0 22 %e2 = extractelement <2 x double> %a, i32 1 23 %cmp1 = fcmp olt double %e1, 42.0 24 %cmp2 = fcmp olt double %e2, -8.0 25 %r = and i1 %cmp1, %cmp2 26 ret i1 %r 27} 28 29define i1 @fcmp_or_v4f64(<4 x double> %a) { 30; SSE-LABEL: @fcmp_or_v4f64( 31; SSE-NEXT: [[E1:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0 32; SSE-NEXT: [[E2:%.*]] = extractelement <4 x double> [[A]], i64 2 33; SSE-NEXT: [[CMP1:%.*]] = fcmp olt double [[E1]], 4.200000e+01 34; SSE-NEXT: [[CMP2:%.*]] = fcmp olt double [[E2]], -8.000000e+00 35; SSE-NEXT: [[R:%.*]] = or i1 [[CMP1]], [[CMP2]] 36; SSE-NEXT: ret i1 [[R]] 37; 38; AVX-LABEL: @fcmp_or_v4f64( 39; AVX-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[A:%.*]], <double 4.200000e+01, double undef, double -8.000000e+00, double undef> 40; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef> 41; AVX-NEXT: [[TMP2:%.*]] = or <4 x i1> [[TMP1]], [[SHIFT]] 42; AVX-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0 43; AVX-NEXT: ret i1 [[R]] 44; 45 %e1 = extractelement <4 x double> %a, i32 0 46 %e2 = extractelement <4 x double> %a, i64 2 47 %cmp1 = fcmp olt double %e1, 42.0 48 %cmp2 = fcmp olt double %e2, -8.0 49 %r = or i1 %cmp1, %cmp2 50 ret i1 %r 51} 52 53define i1 @icmp_xor_v4i32(<4 x i32> %a) { 54; CHECK-LABEL: @icmp_xor_v4i32( 55; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], <i32 undef, i32 -8, i32 undef, i32 42> 56; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef> 57; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], [[SHIFT]] 58; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 59; CHECK-NEXT: ret i1 [[R]] 60; 61 %e1 = extractelement <4 x i32> %a, i32 3 62 %e2 = extractelement <4 x i32> %a, i32 1 63 %cmp1 = icmp sgt i32 %e1, 42 64 %cmp2 = icmp sgt i32 %e2, -8 65 %r = xor i1 %cmp1, %cmp2 66 ret i1 %r 67} 68 69; add is not canonical (should be xor), but that is ok. 70 71define i1 @icmp_add_v8i32(<8 x i32> %a) { 72; SSE-LABEL: @icmp_add_v8i32( 73; SSE-NEXT: [[E1:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 7 74; SSE-NEXT: [[E2:%.*]] = extractelement <8 x i32> [[A]], i32 2 75; SSE-NEXT: [[CMP1:%.*]] = icmp eq i32 [[E1]], 42 76; SSE-NEXT: [[CMP2:%.*]] = icmp eq i32 [[E2]], -8 77; SSE-NEXT: [[R:%.*]] = add i1 [[CMP1]], [[CMP2]] 78; SSE-NEXT: ret i1 [[R]] 79; 80; AVX-LABEL: @icmp_add_v8i32( 81; AVX-NEXT: [[TMP1:%.*]] = icmp eq <8 x i32> [[A:%.*]], <i32 undef, i32 undef, i32 -8, i32 undef, i32 undef, i32 undef, i32 undef, i32 42> 82; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 83; AVX-NEXT: [[TMP2:%.*]] = add <8 x i1> [[TMP1]], [[SHIFT]] 84; AVX-NEXT: [[R:%.*]] = extractelement <8 x i1> [[TMP2]], i64 2 85; AVX-NEXT: ret i1 [[R]] 86; 87 %e1 = extractelement <8 x i32> %a, i32 7 88 %e2 = extractelement <8 x i32> %a, i32 2 89 %cmp1 = icmp eq i32 %e1, 42 90 %cmp2 = icmp eq i32 %e2, -8 91 %r = add i1 %cmp1, %cmp2 92 ret i1 %r 93} 94 95; Negative test - this could CSE/simplify. 96 97define i1 @same_extract_index(<4 x i32> %a) { 98; CHECK-LABEL: @same_extract_index( 99; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 2 100; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[A]], i32 2 101; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[E1]], 42 102; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[E2]], -8 103; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP1]], [[CMP2]] 104; CHECK-NEXT: ret i1 [[R]] 105; 106 %e1 = extractelement <4 x i32> %a, i32 2 107 %e2 = extractelement <4 x i32> %a, i32 2 108 %cmp1 = icmp ugt i32 %e1, 42 109 %cmp2 = icmp ugt i32 %e2, -8 110 %r = and i1 %cmp1, %cmp2 111 ret i1 %r 112} 113 114; Negative test - need identical predicates. 115 116define i1 @different_preds(<4 x i32> %a) { 117; CHECK-LABEL: @different_preds( 118; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 1 119; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[A]], i32 2 120; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[E1]], 42 121; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[E2]], -8 122; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP1]], [[CMP2]] 123; CHECK-NEXT: ret i1 [[R]] 124; 125 %e1 = extractelement <4 x i32> %a, i32 1 126 %e2 = extractelement <4 x i32> %a, i32 2 127 %cmp1 = icmp sgt i32 %e1, 42 128 %cmp2 = icmp ugt i32 %e2, -8 129 %r = and i1 %cmp1, %cmp2 130 ret i1 %r 131} 132 133; Negative test - need 1 source vector. 134 135define i1 @different_source_vec(<4 x i32> %a, <4 x i32> %b) { 136; CHECK-LABEL: @different_source_vec( 137; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 1 138; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 2 139; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[E1]], 42 140; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[E2]], -8 141; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP1]], [[CMP2]] 142; CHECK-NEXT: ret i1 [[R]] 143; 144 %e1 = extractelement <4 x i32> %a, i32 1 145 %e2 = extractelement <4 x i32> %b, i32 2 146 %cmp1 = icmp sgt i32 %e1, 42 147 %cmp2 = icmp sgt i32 %e2, -8 148 %r = and i1 %cmp1, %cmp2 149 ret i1 %r 150} 151 152; Negative test - don't try this with scalable vectors. 153 154define i1 @scalable(<vscale x 4 x i32> %a) { 155; CHECK-LABEL: @scalable( 156; CHECK-NEXT: [[E1:%.*]] = extractelement <vscale x 4 x i32> [[A:%.*]], i32 3 157; CHECK-NEXT: [[E2:%.*]] = extractelement <vscale x 4 x i32> [[A]], i32 1 158; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[E1]], 42 159; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[E2]], -8 160; CHECK-NEXT: [[R:%.*]] = xor i1 [[CMP1]], [[CMP2]] 161; CHECK-NEXT: ret i1 [[R]] 162; 163 %e1 = extractelement <vscale x 4 x i32> %a, i32 3 164 %e2 = extractelement <vscale x 4 x i32> %a, i32 1 165 %cmp1 = icmp sgt i32 %e1, 42 166 %cmp2 = icmp sgt i32 %e2, -8 167 %r = xor i1 %cmp1, %cmp2 168 ret i1 %r 169} 170