1; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s 2 3; This tests icmp operations that do not map directly to NEON instructions. 4; Not-equal (ne) operations are implemented by VCEQ/VMVN. Less-than (lt/ult) 5; and less-than-or-equal (le/ule) are implemented by swapping the arguments 6; to VCGT and VCGE. Test all the operand types for not-equal but only sample 7; the other operations. 8 9define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 10;CHECK: vcnei8: 11;CHECK: vceq.i8 12;CHECK-NEXT: vmvn 13 %tmp1 = load <8 x i8>* %A 14 %tmp2 = load <8 x i8>* %B 15 %tmp3 = icmp ne <8 x i8> %tmp1, %tmp2 16 %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> 17 ret <8 x i8> %tmp4 18} 19 20define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 21;CHECK: vcnei16: 22;CHECK: vceq.i16 23;CHECK-NEXT: vmvn 24 %tmp1 = load <4 x i16>* %A 25 %tmp2 = load <4 x i16>* %B 26 %tmp3 = icmp ne <4 x i16> %tmp1, %tmp2 27 %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> 28 ret <4 x i16> %tmp4 29} 30 31define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 32;CHECK: vcnei32: 33;CHECK: vceq.i32 34;CHECK-NEXT: vmvn 35 %tmp1 = load <2 x i32>* %A 36 %tmp2 = load <2 x i32>* %B 37 %tmp3 = icmp ne <2 x i32> %tmp1, %tmp2 38 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 39 ret <2 x i32> %tmp4 40} 41 42define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 43;CHECK: vcneQi8: 44;CHECK: vceq.i8 45;CHECK-NEXT: vmvn 46 %tmp1 = load <16 x i8>* %A 47 %tmp2 = load <16 x i8>* %B 48 %tmp3 = icmp ne <16 x i8> %tmp1, %tmp2 49 %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> 50 ret <16 x i8> %tmp4 51} 52 53define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 54;CHECK: vcneQi16: 55;CHECK: vceq.i16 56;CHECK-NEXT: vmvn 57 %tmp1 = load <8 x i16>* %A 58 %tmp2 = load <8 x i16>* %B 59 %tmp3 = icmp ne <8 x i16> %tmp1, %tmp2 60 %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> 61 ret <8 x i16> %tmp4 62} 63 64define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 65;CHECK: vcneQi32: 66;CHECK: vceq.i32 67;CHECK-NEXT: vmvn 68 %tmp1 = load <4 x i32>* %A 69 %tmp2 = load <4 x i32>* %B 70 %tmp3 = icmp ne <4 x i32> %tmp1, %tmp2 71 %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> 72 ret <4 x i32> %tmp4 73} 74 75define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 76;CHECK: vcltQs8: 77;CHECK: vcgt.s8 78 %tmp1 = load <16 x i8>* %A 79 %tmp2 = load <16 x i8>* %B 80 %tmp3 = icmp slt <16 x i8> %tmp1, %tmp2 81 %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> 82 ret <16 x i8> %tmp4 83} 84 85define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 86;CHECK: vcles16: 87;CHECK: vcge.s16 88 %tmp1 = load <4 x i16>* %A 89 %tmp2 = load <4 x i16>* %B 90 %tmp3 = icmp sle <4 x i16> %tmp1, %tmp2 91 %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> 92 ret <4 x i16> %tmp4 93} 94 95define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 96;CHECK: vcltu16: 97;CHECK: vcgt.u16 98 %tmp1 = load <4 x i16>* %A 99 %tmp2 = load <4 x i16>* %B 100 %tmp3 = icmp ult <4 x i16> %tmp1, %tmp2 101 %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> 102 ret <4 x i16> %tmp4 103} 104 105define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 106;CHECK: vcleQu32: 107;CHECK: vcge.u32 108 %tmp1 = load <4 x i32>* %A 109 %tmp2 = load <4 x i32>* %B 110 %tmp3 = icmp ule <4 x i32> %tmp1, %tmp2 111 %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> 112 ret <4 x i32> %tmp4 113} 114