1; RUN: opt < %s -instcombine -S | FileCheck %s 2 3define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) { 4; CHECK-LABEL: @cmp_slt_v2i64 5; CHECK-NEXT: %1 = icmp slt <2 x i64> %a, %b 6; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64> 7; CHECK-NEXT: ret <2 x i64> %2 8 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b) 9 ret <2 x i64> %1 10} 11 12define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) { 13; CHECK-LABEL: @cmp_ult_v2i64 14; CHECK-NEXT: %1 = icmp ult <2 x i64> %a, %b 15; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64> 16; CHECK-NEXT: ret <2 x i64> %2 17 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b) 18 ret <2 x i64> %1 19} 20 21define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) { 22; CHECK-LABEL: @cmp_sle_v2i64 23; CHECK-NEXT: %1 = icmp sle <2 x i64> %a, %b 24; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64> 25; CHECK-NEXT: ret <2 x i64> %2 26 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b) 27 ret <2 x i64> %1 28} 29 30define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) { 31; CHECK-LABEL: @cmp_ule_v2i64 32; CHECK-NEXT: %1 = icmp ule <2 x i64> %a, %b 33; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64> 34; CHECK-NEXT: ret <2 x i64> %2 35 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b) 36 ret <2 x i64> %1 37} 38 39define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) { 40; CHECK-LABEL: @cmp_sgt_v4i32 41; CHECK-NEXT: %1 = icmp sgt <4 x i32> %a, %b 42; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32> 43; CHECK-NEXT: ret <4 x i32> %2 44 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b) 45 ret <4 x i32> %1 46} 47 48define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) { 49; CHECK-LABEL: @cmp_ugt_v4i32 50; CHECK-NEXT: %1 = icmp ugt <4 x i32> %a, %b 51; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32> 52; CHECK-NEXT: ret <4 x i32> %2 53 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b) 54 ret <4 x i32> %1 55} 56 57define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) { 58; CHECK-LABEL: @cmp_sge_v4i32 59; CHECK-NEXT: %1 = icmp sge <4 x i32> %a, %b 60; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32> 61; CHECK-NEXT: ret <4 x i32> %2 62 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b) 63 ret <4 x i32> %1 64} 65 66define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) { 67; CHECK-LABEL: @cmp_uge_v4i32 68; CHECK-NEXT: %1 = icmp uge <4 x i32> %a, %b 69; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32> 70; CHECK-NEXT: ret <4 x i32> %2 71 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b) 72 ret <4 x i32> %1 73} 74 75define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) { 76; CHECK-LABEL: @cmp_seq_v8i16 77; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b 78; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16> 79; CHECK-NEXT: ret <8 x i16> %2 80 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b) 81 ret <8 x i16> %1 82} 83 84define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) { 85; CHECK-LABEL: @cmp_ueq_v8i16 86; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b 87; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16> 88; CHECK-NEXT: ret <8 x i16> %2 89 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b) 90 ret <8 x i16> %1 91} 92 93define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) { 94; CHECK-LABEL: @cmp_sne_v8i16 95; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b 96; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16> 97; CHECK-NEXT: ret <8 x i16> %2 98 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b) 99 ret <8 x i16> %1 100} 101 102define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) { 103; CHECK-LABEL: @cmp_une_v8i16 104; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b 105; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16> 106; CHECK-NEXT: ret <8 x i16> %2 107 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b) 108 ret <8 x i16> %1 109} 110 111define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) { 112; CHECK-LABEL: @cmp_strue_v16i8 113; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 114 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b) 115 ret <16 x i8> %1 116} 117 118define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) { 119; CHECK-LABEL: @cmp_utrue_v16i8 120; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 121 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b) 122 ret <16 x i8> %1 123} 124 125define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) { 126; CHECK-LABEL: @cmp_sfalse_v16i8 127; CHECK-NEXT: ret <16 x i8> zeroinitializer 128 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b) 129 ret <16 x i8> %1 130} 131 132define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) { 133; CHECK-LABEL: @cmp_ufalse_v16i8 134; CHECK-NEXT: ret <16 x i8> zeroinitializer 135 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b) 136 ret <16 x i8> %1 137} 138 139declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone 140declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone 141declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone 142declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone 143declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone 144declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone 145declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone 146declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone 147 148declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone 149declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone 150declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone 151declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone 152declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone 153declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone 154declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone 155declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone 156 157declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone 158declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone 159declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone 160declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone 161declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone 162declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone 163declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone 164declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone 165 166declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone 167declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone 168declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone 169declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone 170declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone 171declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone 172declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone 173declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone 174 175declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone 176declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone 177declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone 178declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone 179declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone 180declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone 181declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone 182declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone 183 184declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone 185declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone 186declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone 187declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone 188declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone 189declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone 190declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone 191declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone 192 193declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone 194declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone 195declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone 196declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone 197declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone 198declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone 199declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone 200declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone 201 202declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone 203declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone 204declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone 205declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone 206declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone 207declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone 208declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone 209declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone 210