• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3; GCN-LABEL: {{^}}store_build_vector_multiple_uses_v4i32:
4; GCN: buffer_load_dword
5; GCN: buffer_load_dword
6; GCN: buffer_load_dword
7; GCN: buffer_load_dword
8
9; GCN: buffer_store_dwordx4
10; GCN: buffer_store_dwordx4
11
12; GCN: buffer_store_dword
13; GCN: buffer_store_dword
14; GCN: buffer_store_dword
15; GCN: buffer_store_dword
16define amdgpu_kernel void @store_build_vector_multiple_uses_v4i32(<4 x i32> addrspace(1)* noalias %out0,
17                                                    <4 x i32> addrspace(1)* noalias %out1,
18                                                    i32 addrspace(1)* noalias %out2,
19                                                    i32 addrspace(1)* %in) {
20  %elt0 = load volatile i32, i32 addrspace(1)* %in
21  %elt1 = load volatile i32, i32 addrspace(1)* %in
22  %elt2 = load volatile i32, i32 addrspace(1)* %in
23  %elt3 = load volatile i32, i32 addrspace(1)* %in
24
25  %vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0
26  %vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1
27  %vec2 = insertelement <4 x i32> %vec1, i32 %elt2, i32 2
28  %vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3
29
30  store <4 x i32> %vec3, <4 x i32> addrspace(1)* %out0
31  store <4 x i32> %vec3, <4 x i32> addrspace(1)* %out1
32
33  %extract0 = extractelement <4 x i32> %vec3, i32 0
34  %extract1 = extractelement <4 x i32> %vec3, i32 1
35  %extract2 = extractelement <4 x i32> %vec3, i32 2
36  %extract3 = extractelement <4 x i32> %vec3, i32 3
37
38  store volatile i32 %extract0, i32 addrspace(1)* %out2
39  store volatile i32 %extract1, i32 addrspace(1)* %out2
40  store volatile i32 %extract2, i32 addrspace(1)* %out2
41  store volatile i32 %extract3, i32 addrspace(1)* %out2
42
43  ret void
44}
45
46; GCN-LABEL: {{^}}store_build_vector_multiple_extract_uses_v4i32:
47; GCN: buffer_load_dword
48; GCN: buffer_load_dword
49; GCN: buffer_load_dword
50; GCN: buffer_load_dword
51
52; GCN: buffer_store_dwordx4
53
54; GCN: buffer_store_dword
55; GCN: buffer_store_dword
56; GCN: buffer_store_dword
57; GCN: buffer_store_dword
58define amdgpu_kernel void @store_build_vector_multiple_extract_uses_v4i32(<4 x i32> addrspace(1)* noalias %out0,
59                                                            <4 x i32> addrspace(1)* noalias %out1,
60                                                            i32 addrspace(1)* noalias %out2,
61                                                            i32 addrspace(1)* %in) {
62  %elt0 = load volatile i32, i32 addrspace(1)* %in
63  %elt1 = load volatile i32, i32 addrspace(1)* %in
64  %elt2 = load volatile i32, i32 addrspace(1)* %in
65  %elt3 = load volatile i32, i32 addrspace(1)* %in
66
67  %vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0
68  %vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1
69  %vec2 = insertelement <4 x i32> %vec1, i32 %elt2, i32 2
70  %vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3
71
72  %extract0 = extractelement <4 x i32> %vec3, i32 0
73  %extract1 = extractelement <4 x i32> %vec3, i32 1
74  %extract2 = extractelement <4 x i32> %vec3, i32 2
75  %extract3 = extractelement <4 x i32> %vec3, i32 3
76
77  %op0 = add i32 %extract0, 3
78  %op1 = sub i32 %extract1, 9
79  %op2 = xor i32 %extract2, 1231412
80  %op3 = and i32 %extract3, 258233412312
81
82  store <4 x i32> %vec3, <4 x i32> addrspace(1)* %out0
83
84  store volatile i32 %op0, i32 addrspace(1)* %out2
85  store volatile i32 %op1, i32 addrspace(1)* %out2
86  store volatile i32 %op2, i32 addrspace(1)* %out2
87  store volatile i32 %op3, i32 addrspace(1)* %out2
88
89  ret void
90}
91
92; GCN-LABEL: {{^}}store_build_vector_multiple_uses_v4i32_bitcast_to_v2i64:
93; GCN: buffer_load_dword
94; GCN: buffer_load_dword
95; GCN: buffer_load_dword
96; GCN: buffer_load_dword
97
98; GCN: buffer_store_dwordx4
99
100; GCN: buffer_store_dwordx2
101; GCN: buffer_store_dwordx2
102define amdgpu_kernel void @store_build_vector_multiple_uses_v4i32_bitcast_to_v2i64(<2 x i64> addrspace(1)* noalias %out0,
103                                                                     <4 x i32> addrspace(1)* noalias %out1,
104                                                                     i64 addrspace(1)* noalias %out2,
105                                                                     i32 addrspace(1)* %in) {
106  %elt0 = load volatile i32, i32 addrspace(1)* %in
107  %elt1 = load volatile i32, i32 addrspace(1)* %in
108  %elt2 = load volatile i32, i32 addrspace(1)* %in
109  %elt3 = load volatile i32, i32 addrspace(1)* %in
110
111  %vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0
112  %vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1
113  %vec2 = insertelement <4 x i32> %vec1, i32 %elt2, i32 2
114  %vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3
115
116  %bc.vec3 = bitcast <4 x i32> %vec3 to <2 x i64>
117  store <2 x i64> %bc.vec3, <2 x i64> addrspace(1)* %out0
118
119  %extract0 = extractelement <2 x i64> %bc.vec3, i32 0
120  %extract1 = extractelement <2 x i64> %bc.vec3, i32 1
121
122  store volatile i64 %extract0, i64 addrspace(1)* %out2
123  store volatile i64 %extract1, i64 addrspace(1)* %out2
124
125  ret void
126}
127