1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -asm-verbose=0 -mtriple=aarch64-none-eabi -mattr=+bf16 | FileCheck %s 3 4; bfloat16x4_t test_vcreate_bf16(uint64_t a) { return vcreate_bf16(a); } 5define <4 x bfloat> @test_vcreate_bf16(i64 %a) nounwind { 6; CHECK-LABEL: test_vcreate_bf16: 7; CHECK-NEXT: fmov d0, x0 8; CHECK-NEXT: ret 9entry: 10 %0 = bitcast i64 %a to <4 x bfloat> 11 ret <4 x bfloat> %0 12} 13 14; bfloat16x4_t test_vdup_n_bf16(bfloat16_t v) { return vdup_n_bf16(v); } 15define <4 x bfloat> @test_vdup_n_bf16(bfloat %v) nounwind { 16; CHECK-LABEL: test_vdup_n_bf16: 17; CHECK-NEXT: dup v0.4h, v0.h[0] 18; CHECK-NEXT: ret 19entry: 20 %vecinit.i = insertelement <4 x bfloat> undef, bfloat %v, i32 0 21 %vecinit3.i = shufflevector <4 x bfloat> %vecinit.i, <4 x bfloat> undef, <4 x i32> zeroinitializer 22 ret <4 x bfloat> %vecinit3.i 23} 24 25; bfloat16x8_t test_vdupq_n_bf16(bfloat16_t v) { return vdupq_n_bf16(v); } 26define <8 x bfloat> @test_vdupq_n_bf16(bfloat %v) nounwind { 27; CHECK-LABEL: test_vdupq_n_bf16: 28; CHECK-NEXT: dup v0.8h, v0.h[0] 29; CHECK-NEXT: ret 30entry: 31 %vecinit.i = insertelement <8 x bfloat> undef, bfloat %v, i32 0 32 %vecinit7.i = shufflevector <8 x bfloat> %vecinit.i, <8 x bfloat> undef, <8 x i32> zeroinitializer 33 ret <8 x bfloat> %vecinit7.i 34} 35 36; bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) { return vdup_lane_bf16(v, 1); } 37define <4 x bfloat> @test_vdup_lane_bf16(<4 x bfloat> %v) nounwind { 38; CHECK-LABEL: test_vdup_lane_bf16: 39; CHECK-NEXT: dup v0.4h, v0.h[1] 40; CHECK-NEXT: ret 41entry: 42 %lane = shufflevector <4 x bfloat> %v, <4 x bfloat> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 43 ret <4 x bfloat> %lane 44} 45 46; bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) { return vdupq_lane_bf16(v, 1); } 47define <8 x bfloat> @test_vdupq_lane_bf16(<4 x bfloat> %v) nounwind { 48; CHECK-LABEL: test_vdupq_lane_bf16: 49; CHECK-NEXT: dup v0.8h, v0.h[1] 50; CHECK-NEXT: ret 51entry: 52 %lane = shufflevector <4 x bfloat> %v, <4 x bfloat> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 53 ret <8 x bfloat> %lane 54} 55 56; bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) { return vdup_laneq_bf16(v, 7); } 57define <4 x bfloat> @test_vdup_laneq_bf16(<8 x bfloat> %v) nounwind { 58; CHECK-LABEL: test_vdup_laneq_bf16: 59; CHECK-NEXT: dup v0.4h, v0.h[7] 60; CHECK-NEXT: ret 61entry: 62 %lane = shufflevector <8 x bfloat> %v, <8 x bfloat> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 63 ret <4 x bfloat> %lane 64} 65 66; bfloat16x8_t test_vdupq_laneq_bf16(bfloat16x8_t v) { return vdupq_laneq_bf16(v, 7); } 67define <8 x bfloat> @test_vdupq_laneq_bf16(<8 x bfloat> %v) nounwind { 68; CHECK-LABEL: test_vdupq_laneq_bf16: 69; CHECK-NEXT: dup v0.8h, v0.h[7] 70; CHECK-NEXT: ret 71entry: 72 %lane = shufflevector <8 x bfloat> %v, <8 x bfloat> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 73 ret <8 x bfloat> %lane 74} 75 76; bfloat16x8_t test_vcombine_bf16(bfloat16x4_t low, bfloat16x4_t high) { return vcombine_bf16(low, high); } 77define <8 x bfloat> @test_vcombine_bf16(<4 x bfloat> %low, <4 x bfloat> %high) nounwind { 78; CHECK-LABEL: test_vcombine_bf16: 79; CHECK-NEXT: mov v0.d[1], v1.d[0] 80; CHECK-NEXT: ret 81entry: 82 %shuffle.i = shufflevector <4 x bfloat> %low, <4 x bfloat> %high, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 83 ret <8 x bfloat> %shuffle.i 84} 85 86; bfloat16x4_t test_vget_high_bf16(bfloat16x8_t a) { return vget_high_bf16(a); } 87define <4 x bfloat> @test_vget_high_bf16(<8 x bfloat> %a) nounwind { 88; CHECK-LABEL: test_vget_high_bf16: 89; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 90; CHECK-NEXT: ret 91entry: 92 %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 93 ret <4 x bfloat> %shuffle.i 94} 95 96; bfloat16x4_t test_vget_low_bf16(bfloat16x8_t a) { return vget_low_bf16(a); } 97define <4 x bfloat> @test_vget_low_bf16(<8 x bfloat> %a) nounwind { 98; CHECK-LABEL: test_vget_low_bf16: 99; CHECK-NEXT: ret 100entry: 101 %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 102 ret <4 x bfloat> %shuffle.i 103} 104 105; bfloat16_t test_vget_lane_bf16(bfloat16x4_t v) { return vget_lane_bf16(v, 1); } 106define bfloat @test_vget_lane_bf16(<4 x bfloat> %v) nounwind { 107; CHECK-LABEL: test_vget_lane_bf16: 108; CHECK-NEXT: mov h0, v0.h[1] 109; CHECK-NEXT: ret 110entry: 111 %vget_lane = extractelement <4 x bfloat> %v, i32 1 112 ret bfloat %vget_lane 113} 114 115; bfloat16_t test_vgetq_lane_bf16(bfloat16x8_t v) { return vgetq_lane_bf16(v, 7); } 116define bfloat @test_vgetq_lane_bf16(<8 x bfloat> %v) nounwind { 117; CHECK-LABEL: test_vgetq_lane_bf16: 118; CHECK-NEXT: mov h0, v0.h[7] 119; CHECK-NEXT: ret 120entry: 121 %vgetq_lane = extractelement <8 x bfloat> %v, i32 7 122 ret bfloat %vgetq_lane 123} 124 125; bfloat16x4_t test_vset_lane_bf16(bfloat16_t a, bfloat16x4_t v) { return vset_lane_bf16(a, v, 1); } 126define <4 x bfloat> @test_vset_lane_bf16(bfloat %a, <4 x bfloat> %v) nounwind { 127; CHECK-LABEL: test_vset_lane_bf16: 128; CHECK-NEXT: mov v1.h[1], v0.h[0] 129; CHECK-NEXT: mov v0.16b, v1.16b 130; CHECK-NEXT: ret 131entry: 132 %vset_lane = insertelement <4 x bfloat> %v, bfloat %a, i32 1 133 ret <4 x bfloat> %vset_lane 134} 135 136; bfloat16x8_t test_vsetq_lane_bf16(bfloat16_t a, bfloat16x8_t v) { return vsetq_lane_bf16(a, v, 7); } 137define <8 x bfloat> @test_vsetq_lane_bf16(bfloat %a, <8 x bfloat> %v) nounwind { 138; CHECK-LABEL: test_vsetq_lane_bf16: 139; CHECK-NEXT: mov v1.h[7], v0.h[0] 140; CHECK-NEXT: mov v0.16b, v1.16b 141; CHECK-NEXT: ret 142entry: 143 %vset_lane = insertelement <8 x bfloat> %v, bfloat %a, i32 7 144 ret <8 x bfloat> %vset_lane 145} 146 147; bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) { return vduph_lane_bf16(v, 1); } 148define bfloat @test_vduph_lane_bf16(<4 x bfloat> %v) nounwind { 149; CHECK-LABEL: test_vduph_lane_bf16: 150; CHECK-NEXT: mov h0, v0.h[1] 151; CHECK-NEXT: ret 152entry: 153 %vget_lane = extractelement <4 x bfloat> %v, i32 1 154 ret bfloat %vget_lane 155} 156 157; bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) { return vduph_laneq_bf16(v, 7); } 158define bfloat @test_vduph_laneq_bf16(<8 x bfloat> %v) nounwind { 159; CHECK-LABEL: test_vduph_laneq_bf16: 160; CHECK-NEXT: mov h0, v0.h[7] 161; CHECK-NEXT: ret 162entry: 163 %vgetq_lane = extractelement <8 x bfloat> %v, i32 7 164 ret bfloat %vgetq_lane 165} 166 167; vcopy_lane_bf16(a, 1, b, 3); 168define <4 x bfloat> @test_vcopy_lane_bf16_v1(<4 x bfloat> %a, <4 x bfloat> %b) nounwind { 169; CHECK-LABEL: test_vcopy_lane_bf16_v1: 170; CHECK-NEXT: mov v0.h[1], v1.h[3] 171; CHECK-NEXT: ret 172entry: 173 %vset_lane = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3> 174 ret <4 x bfloat> %vset_lane 175} 176 177; vcopy_lane_bf16(a, 2, b, 0); 178define <4 x bfloat> @test_vcopy_lane_bf16_v2(<4 x bfloat> %a, <4 x bfloat> %b) nounwind { 179; CHECK-LABEL: test_vcopy_lane_bf16_v2: 180; CHECK-NEXT: mov v0.h[2], v1.h[0] 181; CHECK-NEXT: ret 182entry: 183 %vset_lane = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 3> 184 ret <4 x bfloat> %vset_lane 185} 186 187; vcopyq_lane_bf16(a, 0, b, 2); 188define <8 x bfloat> @test_vcopyq_lane_bf16_v1(<8 x bfloat> %a, <4 x bfloat> %b) nounwind { 189; CHECK-LABEL: test_vcopyq_lane_bf16_v1: 190; CHECK-NEXT: mov v0.h[0], v1.h[2] 191; CHECK-NEXT: ret 192entry: 193 %0 = shufflevector <4 x bfloat> %b, <4 x bfloat> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 194 %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %0, <8 x i32> <i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 195 ret <8 x bfloat> %vset_lane 196} 197 198; vcopyq_lane_bf16(a, 6, b, 0); 199define <8 x bfloat> @test_vcopyq_lane_bf16_v2(<8 x bfloat> %a, <4 x bfloat> %b) nounwind { 200; CHECK-LABEL: test_vcopyq_lane_bf16_v2: 201; CHECK-NEXT: mov v0.h[6], v1.h[0] 202; CHECK-NEXT: ret 203entry: 204 %0 = shufflevector <4 x bfloat> %b, <4 x bfloat> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 205 %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 7> 206 ret <8 x bfloat> %vset_lane 207} 208 209; vcopy_laneq_bf16(a, 0, b, 7); 210define <4 x bfloat> @test_vcopy_laneq_bf16_v1(<4 x bfloat> %a, <8 x bfloat> %b) nounwind { 211; CHECK-LABEL: test_vcopy_laneq_bf16_v1: 212; CHECK-NEXT: mov v0.h[0], v1.h[7] 213; CHECK-NEXT: ret 214entry: 215 %vgetq_lane = extractelement <8 x bfloat> %b, i32 7 216 %vset_lane = insertelement <4 x bfloat> %a, bfloat %vgetq_lane, i32 0 217 ret <4 x bfloat> %vset_lane 218} 219 220; vcopy_laneq_bf16(a, 3, b, 4); 221define <4 x bfloat> @test_vcopy_laneq_bf16_v2(<4 x bfloat> %a, <8 x bfloat> %b) nounwind { 222; CHECK-LABEL: test_vcopy_laneq_bf16_v2: 223; CHECK-NEXT: mov v0.h[3], v1.h[4] 224; CHECK-NEXT: ret 225entry: 226 %vgetq_lane = extractelement <8 x bfloat> %b, i32 4 227 %vset_lane = insertelement <4 x bfloat> %a, bfloat %vgetq_lane, i32 3 228 ret <4 x bfloat> %vset_lane 229} 230 231; vcopyq_laneq_bf16(a, 3, b, 7); 232define <8 x bfloat> @test_vcopyq_laneq_bf16_v1(<8 x bfloat> %a, <8 x bfloat> %b) nounwind { 233; CHECK-LABEL: test_vcopyq_laneq_bf16_v1: 234; CHECK-NEXT: mov v0.h[3], v1.h[7] 235; CHECK-NEXT: ret 236entry: 237 %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6, i32 7> 238 ret <8 x bfloat> %vset_lane 239} 240 241; vcopyq_laneq_bf16(a, 6, b, 2); 242define <8 x bfloat> @test_vcopyq_laneq_bf16_v2(<8 x bfloat> %a, <8 x bfloat> %b) nounwind { 243; CHECK-LABEL: test_vcopyq_laneq_bf16_v2: 244; CHECK-NEXT: mov v0.h[6], v1.h[2] 245; CHECK-NEXT: ret 246entry: 247 %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 10, i32 7> 248 ret <8 x bfloat> %vset_lane 249} 250