1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +dotprod \
2 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -instcombine | FileCheck %s
3
4 // REQUIRES: aarch64-registered-target
5
6 // Test AArch64 Armv8.2-A dot product intrinsics
7
8 #include <arm_neon.h>
9
test_vdot_u32(uint32x2_t a,uint8x8_t b,uint8x8_t c)10 uint32x2_t test_vdot_u32(uint32x2_t a, uint8x8_t b, uint8x8_t c) {
11 // CHECK-LABEL: define <2 x i32> @test_vdot_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
12 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
13 // CHECK: ret <2 x i32> [[RESULT]]
14 return vdot_u32(a, b, c);
15 }
16
test_vdotq_u32(uint32x4_t a,uint8x16_t b,uint8x16_t c)17 uint32x4_t test_vdotq_u32(uint32x4_t a, uint8x16_t b, uint8x16_t c) {
18 // CHECK-LABEL: define <4 x i32> @test_vdotq_u32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
19 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
20 // CHECK: ret <4 x i32> [[RESULT]]
21 return vdotq_u32(a, b, c);
22 }
23
test_vdot_s32(int32x2_t a,int8x8_t b,int8x8_t c)24 int32x2_t test_vdot_s32(int32x2_t a, int8x8_t b, int8x8_t c) {
25 // CHECK-LABEL: define <2 x i32> @test_vdot_s32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
26 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
27 // CHECK: ret <2 x i32> [[RESULT]]
28 return vdot_s32(a, b, c);
29 }
30
test_vdotq_s32(int32x4_t a,int8x16_t b,int8x16_t c)31 int32x4_t test_vdotq_s32(int32x4_t a, int8x16_t b, int8x16_t c) {
32 // CHECK-LABEL: define <4 x i32> @test_vdotq_s32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
33 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
34 // CHECK: ret <4 x i32> [[RESULT]]
35 return vdotq_s32(a, b, c);
36 }
37
test_vdot_lane_u32(uint32x2_t a,uint8x8_t b,uint8x8_t c)38 uint32x2_t test_vdot_lane_u32(uint32x2_t a, uint8x8_t b, uint8x8_t c) {
39 // CHECK-LABEL: define <2 x i32> @test_vdot_lane_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
40 // CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
41 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <2 x i32> <i32 1, i32 1>
42 // CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
43 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
44 // CHECK: ret <2 x i32> [[RESULT]]
45 return vdot_lane_u32(a, b, c, 1);
46 }
47
test_vdotq_lane_u32(uint32x4_t a,uint8x16_t b,uint8x8_t c)48 uint32x4_t test_vdotq_lane_u32(uint32x4_t a, uint8x16_t b, uint8x8_t c) {
49 // CHECK-LABEL: define <4 x i32> @test_vdotq_lane_u32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c)
50 // CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
51 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
52 // CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
53 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
54 // CHECK: ret <4 x i32> [[RESULT]]
55 return vdotq_lane_u32(a, b, c, 1);
56 }
57
test_vdot_laneq_u32(uint32x2_t a,uint8x8_t b,uint8x16_t c)58 uint32x2_t test_vdot_laneq_u32(uint32x2_t a, uint8x8_t b, uint8x16_t c) {
59 // CHECK-LABEL: define <2 x i32> @test_vdot_laneq_u32(<2 x i32> %a, <8 x i8> %b, <16 x i8> %c)
60 // CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32>
61 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> undef, <2 x i32> <i32 1, i32 1>
62 // CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
63 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
64 // CHECK: ret <2 x i32> [[RESULT]]
65 return vdot_laneq_u32(a, b, c, 1);
66 }
67
test_vdotq_laneq_u32(uint32x4_t a,uint8x16_t b,uint8x16_t c)68 uint32x4_t test_vdotq_laneq_u32(uint32x4_t a, uint8x16_t b, uint8x16_t c) {
69 // CHECK-LABEL: define <4 x i32> @test_vdotq_laneq_u32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
70 // CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32>
71 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
72 // CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
73 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
74 // CHECK: ret <4 x i32> [[RESULT]]
75 return vdotq_laneq_u32(a, b, c, 1);
76 }
77
test_vdot_lane_s32(int32x2_t a,int8x8_t b,int8x8_t c)78 int32x2_t test_vdot_lane_s32(int32x2_t a, int8x8_t b, int8x8_t c) {
79 // CHECK-LABEL: define <2 x i32> @test_vdot_lane_s32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c)
80 // CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
81 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <2 x i32> <i32 1, i32 1>
82 // CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
83 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
84 // CHECK: ret <2 x i32> [[RESULT]]
85 return vdot_lane_s32(a, b, c, 1);
86 }
87
test_vdotq_lane_s32(int32x4_t a,int8x16_t b,int8x8_t c)88 int32x4_t test_vdotq_lane_s32(int32x4_t a, int8x16_t b, int8x8_t c) {
89 // CHECK-LABEL: define <4 x i32> @test_vdotq_lane_s32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c)
90 // CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32>
91 // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
92 // CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
93 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
94 // CHECK: ret <4 x i32> [[RESULT]]
95 return vdotq_lane_s32(a, b, c, 1);
96 }
97
test_vdot_laneq_s32(int32x2_t a,int8x8_t b,int8x16_t c)98 int32x2_t test_vdot_laneq_s32(int32x2_t a, int8x8_t b, int8x16_t c) {
99 // CHECK-LABEL: define <2 x i32> @test_vdot_laneq_s32(<2 x i32> %a, <8 x i8> %b, <16 x i8> %c)
100 // CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32>
101 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> undef, <2 x i32> <i32 1, i32 1>
102 // CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8>
103 // CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]])
104 // CHECK: ret <2 x i32> [[RESULT]]
105 return vdot_laneq_s32(a, b, c, 1);
106 }
107
test_vdotq_laneq_s32(int32x4_t a,int8x16_t b,int8x16_t c)108 int32x4_t test_vdotq_laneq_s32(int32x4_t a, int8x16_t b, int8x16_t c) {
109 // CHECK-LABEL: define <4 x i32> @test_vdotq_laneq_s32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c)
110 // CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32>
111 // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
112 // CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8>
113 // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]])
114 // CHECK: ret <4 x i32> [[RESULT]]
115 return vdotq_laneq_s32(a, b, c, 1);
116 }
117
118