1; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s 2 3define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4;CHECK: vcges8: 5;CHECK: vcge.s8 6 %tmp1 = load <8 x i8>* %A 7 %tmp2 = load <8 x i8>* %B 8 %tmp3 = icmp sge <8 x i8> %tmp1, %tmp2 9 %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> 10 ret <8 x i8> %tmp4 11} 12 13define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 14;CHECK: vcges16: 15;CHECK: vcge.s16 16 %tmp1 = load <4 x i16>* %A 17 %tmp2 = load <4 x i16>* %B 18 %tmp3 = icmp sge <4 x i16> %tmp1, %tmp2 19 %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> 20 ret <4 x i16> %tmp4 21} 22 23define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 24;CHECK: vcges32: 25;CHECK: vcge.s32 26 %tmp1 = load <2 x i32>* %A 27 %tmp2 = load <2 x i32>* %B 28 %tmp3 = icmp sge <2 x i32> %tmp1, %tmp2 29 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 30 ret <2 x i32> %tmp4 31} 32 33define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 34;CHECK: vcgeu8: 35;CHECK: vcge.u8 36 %tmp1 = load <8 x i8>* %A 37 %tmp2 = load <8 x i8>* %B 38 %tmp3 = icmp uge <8 x i8> %tmp1, %tmp2 39 %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> 40 ret <8 x i8> %tmp4 41} 42 43define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 44;CHECK: vcgeu16: 45;CHECK: vcge.u16 46 %tmp1 = load <4 x i16>* %A 47 %tmp2 = load <4 x i16>* %B 48 %tmp3 = icmp uge <4 x i16> %tmp1, %tmp2 49 %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> 50 ret <4 x i16> %tmp4 51} 52 53define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 54;CHECK: vcgeu32: 55;CHECK: vcge.u32 56 %tmp1 = load <2 x i32>* %A 57 %tmp2 = load <2 x i32>* %B 58 %tmp3 = icmp uge <2 x i32> %tmp1, %tmp2 59 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 60 ret <2 x i32> %tmp4 61} 62 63define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind { 64;CHECK: vcgef32: 65;CHECK: vcge.f32 66 %tmp1 = load <2 x float>* %A 67 %tmp2 = load <2 x float>* %B 68 %tmp3 = fcmp oge <2 x float> %tmp1, %tmp2 69 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 70 ret <2 x i32> %tmp4 71} 72 73define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 74;CHECK: vcgeQs8: 75;CHECK: vcge.s8 76 %tmp1 = load <16 x i8>* %A 77 %tmp2 = load <16 x i8>* %B 78 %tmp3 = icmp sge <16 x i8> %tmp1, %tmp2 79 %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> 80 ret <16 x i8> %tmp4 81} 82 83define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 84;CHECK: vcgeQs16: 85;CHECK: vcge.s16 86 %tmp1 = load <8 x i16>* %A 87 %tmp2 = load <8 x i16>* %B 88 %tmp3 = icmp sge <8 x i16> %tmp1, %tmp2 89 %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> 90 ret <8 x i16> %tmp4 91} 92 93define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 94;CHECK: vcgeQs32: 95;CHECK: vcge.s32 96 %tmp1 = load <4 x i32>* %A 97 %tmp2 = load <4 x i32>* %B 98 %tmp3 = icmp sge <4 x i32> %tmp1, %tmp2 99 %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> 100 ret <4 x i32> %tmp4 101} 102 103define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 104;CHECK: vcgeQu8: 105;CHECK: vcge.u8 106 %tmp1 = load <16 x i8>* %A 107 %tmp2 = load <16 x i8>* %B 108 %tmp3 = icmp uge <16 x i8> %tmp1, %tmp2 109 %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> 110 ret <16 x i8> %tmp4 111} 112 113define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 114;CHECK: vcgeQu16: 115;CHECK: vcge.u16 116 %tmp1 = load <8 x i16>* %A 117 %tmp2 = load <8 x i16>* %B 118 %tmp3 = icmp uge <8 x i16> %tmp1, %tmp2 119 %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> 120 ret <8 x i16> %tmp4 121} 122 123define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 124;CHECK: vcgeQu32: 125;CHECK: vcge.u32 126 %tmp1 = load <4 x i32>* %A 127 %tmp2 = load <4 x i32>* %B 128 %tmp3 = icmp uge <4 x i32> %tmp1, %tmp2 129 %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> 130 ret <4 x i32> %tmp4 131} 132 133define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind { 134;CHECK: vcgeQf32: 135;CHECK: vcge.f32 136 %tmp1 = load <4 x float>* %A 137 %tmp2 = load <4 x float>* %B 138 %tmp3 = fcmp oge <4 x float> %tmp1, %tmp2 139 %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> 140 ret <4 x i32> %tmp4 141} 142 143define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind { 144;CHECK: vacgef32: 145;CHECK: vacge.f32 146 %tmp1 = load <2 x float>* %A 147 %tmp2 = load <2 x float>* %B 148 %tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2) 149 ret <2 x i32> %tmp3 150} 151 152define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind { 153;CHECK: vacgeQf32: 154;CHECK: vacge.f32 155 %tmp1 = load <4 x float>* %A 156 %tmp2 = load <4 x float>* %B 157 %tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2) 158 ret <4 x i32> %tmp3 159} 160 161declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone 162declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone 163 164define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind { 165;CHECK: vcgei8Z: 166;CHECK-NOT: vmov 167;CHECK-NOT: vmvn 168;CHECK: vcge.s8 169 %tmp1 = load <8 x i8>* %A 170 %tmp3 = icmp sge <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> 171 %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> 172 ret <8 x i8> %tmp4 173} 174 175define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind { 176;CHECK: vclei8Z: 177;CHECK-NOT: vmov 178;CHECK-NOT: vmvn 179;CHECK: vcle.s8 180 %tmp1 = load <8 x i8>* %A 181 %tmp3 = icmp sle <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> 182 %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> 183 ret <8 x i8> %tmp4 184} 185 186; Radar 8782191 187; Floating-point comparisons against zero produce results with integer 188; elements, not floating-point elements. 189define void @test_vclez_fp() nounwind optsize { 190;CHECK: test_vclez_fp 191;CHECK: vcle.f32 192entry: 193 %0 = fcmp ole <4 x float> undef, zeroinitializer 194 %1 = sext <4 x i1> %0 to <4 x i16> 195 %2 = add <4 x i16> %1, zeroinitializer 196 %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 197 %4 = add <8 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 198 %5 = trunc <8 x i16> %4 to <8 x i8> 199 tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %5, i32 1) 200 unreachable 201} 202 203declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind 204