1; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s 2 3; This tests fcmp operations that do not map directly to NEON instructions. 4 5; une is implemented with VCEQ/VMVN 6define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind { 7;CHECK-LABEL: vcunef32: 8;CHECK: vceq.f32 9;CHECK-NEXT: vmvn 10 %tmp1 = load <2 x float>, <2 x float>* %A 11 %tmp2 = load <2 x float>, <2 x float>* %B 12 %tmp3 = fcmp une <2 x float> %tmp1, %tmp2 13 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 14 ret <2 x i32> %tmp4 15} 16 17; olt is implemented with VCGT 18define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind { 19;CHECK-LABEL: vcoltf32: 20;CHECK: vcgt.f32 21 %tmp1 = load <2 x float>, <2 x float>* %A 22 %tmp2 = load <2 x float>, <2 x float>* %B 23 %tmp3 = fcmp olt <2 x float> %tmp1, %tmp2 24 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 25 ret <2 x i32> %tmp4 26} 27 28; ole is implemented with VCGE 29define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind { 30;CHECK-LABEL: vcolef32: 31;CHECK: vcge.f32 32 %tmp1 = load <2 x float>, <2 x float>* %A 33 %tmp2 = load <2 x float>, <2 x float>* %B 34 %tmp3 = fcmp ole <2 x float> %tmp1, %tmp2 35 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 36 ret <2 x i32> %tmp4 37} 38 39; uge is implemented with VCGT/VMVN 40define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind { 41;CHECK-LABEL: vcugef32: 42;CHECK: vcgt.f32 43;CHECK-NEXT: vmvn 44 %tmp1 = load <2 x float>, <2 x float>* %A 45 %tmp2 = load <2 x float>, <2 x float>* %B 46 %tmp3 = fcmp uge <2 x float> %tmp1, %tmp2 47 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 48 ret <2 x i32> %tmp4 49} 50 51; ule is implemented with VCGT/VMVN 52define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind { 53;CHECK-LABEL: vculef32: 54;CHECK: vcgt.f32 55;CHECK-NEXT: vmvn 56 %tmp1 = load <2 x float>, <2 x float>* %A 57 %tmp2 = load <2 x float>, <2 x float>* %B 58 %tmp3 = fcmp ule <2 x float> %tmp1, %tmp2 59 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 60 ret <2 x i32> %tmp4 61} 62 63; ugt is implemented with VCGE/VMVN 64define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind { 65;CHECK-LABEL: vcugtf32: 66;CHECK: vcge.f32 67;CHECK-NEXT: vmvn 68 %tmp1 = load <2 x float>, <2 x float>* %A 69 %tmp2 = load <2 x float>, <2 x float>* %B 70 %tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2 71 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 72 ret <2 x i32> %tmp4 73} 74 75; ult is implemented with VCGE/VMVN 76define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind { 77;CHECK-LABEL: vcultf32: 78;CHECK: vcge.f32 79;CHECK-NEXT: vmvn 80 %tmp1 = load <2 x float>, <2 x float>* %A 81 %tmp2 = load <2 x float>, <2 x float>* %B 82 %tmp3 = fcmp ult <2 x float> %tmp1, %tmp2 83 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 84 ret <2 x i32> %tmp4 85} 86 87; ueq is implemented with VCGT/VCGT/VORR/VMVN 88define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind { 89;CHECK-LABEL: vcueqf32: 90;CHECK: vcgt.f32 91;CHECK-NEXT: vcgt.f32 92;CHECK-NEXT: vorr 93;CHECK-NEXT: vmvn 94 %tmp1 = load <2 x float>, <2 x float>* %A 95 %tmp2 = load <2 x float>, <2 x float>* %B 96 %tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2 97 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 98 ret <2 x i32> %tmp4 99} 100 101; one is implemented with VCGT/VCGT/VORR 102define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind { 103;CHECK-LABEL: vconef32: 104;CHECK: vcgt.f32 105;CHECK-NEXT: vcgt.f32 106;CHECK-NEXT: vorr 107 %tmp1 = load <2 x float>, <2 x float>* %A 108 %tmp2 = load <2 x float>, <2 x float>* %B 109 %tmp3 = fcmp one <2 x float> %tmp1, %tmp2 110 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 111 ret <2 x i32> %tmp4 112} 113 114; uno is implemented with VCGT/VCGE/VORR/VMVN 115define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind { 116;CHECK-LABEL: vcunof32: 117;CHECK: vcge.f32 118;CHECK-NEXT: vcgt.f32 119;CHECK-NEXT: vorr 120;CHECK-NEXT: vmvn 121 %tmp1 = load <2 x float>, <2 x float>* %A 122 %tmp2 = load <2 x float>, <2 x float>* %B 123 %tmp3 = fcmp uno <2 x float> %tmp1, %tmp2 124 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 125 ret <2 x i32> %tmp4 126} 127 128; ord is implemented with VCGT/VCGE/VORR 129define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind { 130;CHECK-LABEL: vcordf32: 131;CHECK: vcge.f32 132;CHECK-NEXT: vcgt.f32 133;CHECK-NEXT: vorr 134 %tmp1 = load <2 x float>, <2 x float>* %A 135 %tmp2 = load <2 x float>, <2 x float>* %B 136 %tmp3 = fcmp ord <2 x float> %tmp1, %tmp2 137 %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> 138 ret <2 x i32> %tmp4 139} 140