1; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s 2; NB: this tests vcnt, vclz, and vcls 3 4define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind { 5;CHECK-LABEL: vcnt8: 6;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}} 7 %tmp1 = load <8 x i8>* %A 8 %tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1) 9 ret <8 x i8> %tmp2 10} 11 12define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind { 13;CHECK-LABEL: vcntQ8: 14;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}} 15 %tmp1 = load <16 x i8>* %A 16 %tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1) 17 ret <16 x i8> %tmp2 18} 19 20declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone 21declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone 22 23define <8 x i8> @vclz8(<8 x i8>* %A) nounwind { 24;CHECK-LABEL: vclz8: 25;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}} 26 %tmp1 = load <8 x i8>* %A 27 %tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0) 28 ret <8 x i8> %tmp2 29} 30 31define <4 x i16> @vclz16(<4 x i16>* %A) nounwind { 32;CHECK-LABEL: vclz16: 33;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}} 34 %tmp1 = load <4 x i16>* %A 35 %tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0) 36 ret <4 x i16> %tmp2 37} 38 39define <2 x i32> @vclz32(<2 x i32>* %A) nounwind { 40;CHECK-LABEL: vclz32: 41;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}} 42 %tmp1 = load <2 x i32>* %A 43 %tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0) 44 ret <2 x i32> %tmp2 45} 46 47define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind { 48;CHECK-LABEL: vclzQ8: 49;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}} 50 %tmp1 = load <16 x i8>* %A 51 %tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0) 52 ret <16 x i8> %tmp2 53} 54 55define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind { 56;CHECK-LABEL: vclzQ16: 57;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}} 58 %tmp1 = load <8 x i16>* %A 59 %tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0) 60 ret <8 x i16> %tmp2 61} 62 63define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind { 64;CHECK-LABEL: vclzQ32: 65;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}} 66 %tmp1 = load <4 x i32>* %A 67 %tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0) 68 ret <4 x i32> %tmp2 69} 70 71declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone 72declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone 73declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone 74 75declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone 76declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone 77declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone 78 79define <8 x i8> @vclss8(<8 x i8>* %A) nounwind { 80;CHECK-LABEL: vclss8: 81;CHECK: vcls.s8 82 %tmp1 = load <8 x i8>* %A 83 %tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1) 84 ret <8 x i8> %tmp2 85} 86 87define <4 x i16> @vclss16(<4 x i16>* %A) nounwind { 88;CHECK-LABEL: vclss16: 89;CHECK: vcls.s16 90 %tmp1 = load <4 x i16>* %A 91 %tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1) 92 ret <4 x i16> %tmp2 93} 94 95define <2 x i32> @vclss32(<2 x i32>* %A) nounwind { 96;CHECK-LABEL: vclss32: 97;CHECK: vcls.s32 98 %tmp1 = load <2 x i32>* %A 99 %tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1) 100 ret <2 x i32> %tmp2 101} 102 103define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind { 104;CHECK-LABEL: vclsQs8: 105;CHECK: vcls.s8 106 %tmp1 = load <16 x i8>* %A 107 %tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1) 108 ret <16 x i8> %tmp2 109} 110 111define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind { 112;CHECK-LABEL: vclsQs16: 113;CHECK: vcls.s16 114 %tmp1 = load <8 x i16>* %A 115 %tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1) 116 ret <8 x i16> %tmp2 117} 118 119define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind { 120;CHECK-LABEL: vclsQs32: 121;CHECK: vcls.s32 122 %tmp1 = load <4 x i32>* %A 123 %tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1) 124 ret <4 x i32> %tmp2 125} 126 127declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone 128declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone 129declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone 130 131declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone 132declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone 133declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone 134