• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s
3; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s
4
5define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
6  ; HSA-VI-LABEL: name: i8_arg
7  ; HSA-VI: bb.1 (%ir-block.0):
8  ; HSA-VI:   liveins: $sgpr4_sgpr5
9  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
10  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
11  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
12  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
13  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
14  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
15  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4)
16  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
17  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
18  ; HSA-VI:   S_ENDPGM 0
19  ; LEGACY-MESA-VI-LABEL: name: i8_arg
20  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
21  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
22  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
23  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
24  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
25  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
26  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
27  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
28  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4)
29  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
30  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
31  ; LEGACY-MESA-VI:   S_ENDPGM 0
32  %ext = zext i8 %in to i32
33  store i32 %ext, i32 addrspace(1)* %out, align 4
34  ret void
35}
36
37define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
38  ; HSA-VI-LABEL: name: i8_zext_arg
39  ; HSA-VI: bb.1 (%ir-block.0):
40  ; HSA-VI:   liveins: $sgpr4_sgpr5
41  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
42  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
43  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
44  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
45  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
46  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
47  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4)
48  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
49  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
50  ; HSA-VI:   S_ENDPGM 0
51  ; LEGACY-MESA-VI-LABEL: name: i8_zext_arg
52  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
53  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
54  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
55  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
56  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
57  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
58  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
59  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
60  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4)
61  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
62  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
63  ; LEGACY-MESA-VI:   S_ENDPGM 0
64  %ext = zext i8 %in to i32
65  store i32 %ext, i32 addrspace(1)* %out, align 4
66  ret void
67}
68
69define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
70  ; HSA-VI-LABEL: name: i8_sext_arg
71  ; HSA-VI: bb.1 (%ir-block.0):
72  ; HSA-VI:   liveins: $sgpr4_sgpr5
73  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
74  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
75  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
76  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
77  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
78  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
79  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4)
80  ; HSA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
81  ; HSA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
82  ; HSA-VI:   S_ENDPGM 0
83  ; LEGACY-MESA-VI-LABEL: name: i8_sext_arg
84  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
85  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
86  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
87  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
88  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
89  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
90  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
91  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
92  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4)
93  ; LEGACY-MESA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
94  ; LEGACY-MESA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
95  ; LEGACY-MESA-VI:   S_ENDPGM 0
96  %ext = sext i8 %in to i32
97  store i32 %ext, i32 addrspace(1)* %out, align 4
98  ret void
99}
100
101define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
102  ; HSA-VI-LABEL: name: i16_arg
103  ; HSA-VI: bb.1 (%ir-block.0):
104  ; HSA-VI:   liveins: $sgpr4_sgpr5
105  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
106  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
107  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
108  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
109  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
110  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
111  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 8, addrspace 4)
112  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
113  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
114  ; HSA-VI:   S_ENDPGM 0
115  ; LEGACY-MESA-VI-LABEL: name: i16_arg
116  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
117  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
118  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
119  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
120  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
121  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
122  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
123  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
124  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 4, addrspace 4)
125  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
126  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
127  ; LEGACY-MESA-VI:   S_ENDPGM 0
128  %ext = zext i16 %in to i32
129  store i32 %ext, i32 addrspace(1)* %out, align 4
130  ret void
131}
132
133define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
134  ; HSA-VI-LABEL: name: i16_zext_arg
135  ; HSA-VI: bb.1 (%ir-block.0):
136  ; HSA-VI:   liveins: $sgpr4_sgpr5
137  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
138  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
139  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
140  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
141  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
142  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
143  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 8, addrspace 4)
144  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
145  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
146  ; HSA-VI:   S_ENDPGM 0
147  ; LEGACY-MESA-VI-LABEL: name: i16_zext_arg
148  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
149  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
150  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
151  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
152  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
153  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
154  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
155  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
156  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 4, addrspace 4)
157  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
158  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
159  ; LEGACY-MESA-VI:   S_ENDPGM 0
160  %ext = zext i16 %in to i32
161  store i32 %ext, i32 addrspace(1)* %out, align 4
162  ret void
163}
164
165define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
166  ; HSA-VI-LABEL: name: i16_sext_arg
167  ; HSA-VI: bb.1 (%ir-block.0):
168  ; HSA-VI:   liveins: $sgpr4_sgpr5
169  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
170  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
171  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
172  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
173  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
174  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
175  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 8, addrspace 4)
176  ; HSA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16)
177  ; HSA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
178  ; HSA-VI:   S_ENDPGM 0
179  ; LEGACY-MESA-VI-LABEL: name: i16_sext_arg
180  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
181  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
182  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
183  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
184  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
185  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
186  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
187  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
188  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 4, addrspace 4)
189  ; LEGACY-MESA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16)
190  ; LEGACY-MESA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
191  ; LEGACY-MESA-VI:   S_ENDPGM 0
192  %ext = sext i16 %in to i32
193  store i32 %ext, i32 addrspace(1)* %out, align 4
194  ret void
195}
196
197define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
198  ; HSA-VI-LABEL: name: i32_arg
199  ; HSA-VI: bb.1.entry:
200  ; HSA-VI:   liveins: $sgpr4_sgpr5
201  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
202  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
203  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
204  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
205  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
206  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
207  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4)
208  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
209  ; HSA-VI:   S_ENDPGM 0
210  ; LEGACY-MESA-VI-LABEL: name: i32_arg
211  ; LEGACY-MESA-VI: bb.1.entry:
212  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
213  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
214  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
215  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
216  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
217  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
218  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
219  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, addrspace 4)
220  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
221  ; LEGACY-MESA-VI:   S_ENDPGM 0
222entry:
223  store i32 %in, i32 addrspace(1)* %out, align 4
224  ret void
225}
226
227define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
228  ; HSA-VI-LABEL: name: f32_arg
229  ; HSA-VI: bb.1.entry:
230  ; HSA-VI:   liveins: $sgpr4_sgpr5
231  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
232  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
233  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
234  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
235  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
236  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
237  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4)
238  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
239  ; HSA-VI:   S_ENDPGM 0
240  ; LEGACY-MESA-VI-LABEL: name: f32_arg
241  ; LEGACY-MESA-VI: bb.1.entry:
242  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
243  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
244  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
245  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
246  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
247  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
248  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
249  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, addrspace 4)
250  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
251  ; LEGACY-MESA-VI:   S_ENDPGM 0
252entry:
253  store float %in, float addrspace(1)* %out, align 4
254  ret void
255}
256
257define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
258  ; HSA-VI-LABEL: name: v2i8_arg
259  ; HSA-VI: bb.1.entry:
260  ; HSA-VI:   liveins: $sgpr4_sgpr5
261  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
262  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
263  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
264  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
265  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
266  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
267  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 8, addrspace 4)
268  ; HSA-VI:   G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store 2 into %ir.out, addrspace 1)
269  ; HSA-VI:   S_ENDPGM 0
270  ; LEGACY-MESA-VI-LABEL: name: v2i8_arg
271  ; LEGACY-MESA-VI: bb.1.entry:
272  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
273  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
274  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
275  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
276  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
277  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
278  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
279  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 2, align 4, addrspace 4)
280  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store 2 into %ir.out, addrspace 1)
281  ; LEGACY-MESA-VI:   S_ENDPGM 0
282entry:
283  store <2 x i8> %in, <2 x i8> addrspace(1)* %out
284  ret void
285}
286
287define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
288  ; HSA-VI-LABEL: name: v2i16_arg
289  ; HSA-VI: bb.1.entry:
290  ; HSA-VI:   liveins: $sgpr4_sgpr5
291  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
292  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
293  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
294  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
295  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
296  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
297  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4)
298  ; HSA-VI:   G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
299  ; HSA-VI:   S_ENDPGM 0
300  ; LEGACY-MESA-VI-LABEL: name: v2i16_arg
301  ; LEGACY-MESA-VI: bb.1.entry:
302  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
303  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
304  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
305  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
306  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
307  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
308  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
309  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, addrspace 4)
310  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
311  ; LEGACY-MESA-VI:   S_ENDPGM 0
312entry:
313  store <2 x i16> %in, <2 x i16> addrspace(1)* %out
314  ret void
315}
316
317define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
318  ; HSA-VI-LABEL: name: v2i32_arg
319  ; HSA-VI: bb.1.entry:
320  ; HSA-VI:   liveins: $sgpr4_sgpr5
321  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
322  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
323  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
324  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
325  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
326  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
327  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4)
328  ; HSA-VI:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store 8 into %ir.out, align 4, addrspace 1)
329  ; HSA-VI:   S_ENDPGM 0
330  ; LEGACY-MESA-VI-LABEL: name: v2i32_arg
331  ; LEGACY-MESA-VI: bb.1.entry:
332  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
333  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
334  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
335  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
336  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
337  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
338  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
339  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
340  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store 8 into %ir.out, align 4, addrspace 1)
341  ; LEGACY-MESA-VI:   S_ENDPGM 0
342entry:
343  store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
344  ret void
345}
346
347define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
348  ; HSA-VI-LABEL: name: v2f32_arg
349  ; HSA-VI: bb.1.entry:
350  ; HSA-VI:   liveins: $sgpr4_sgpr5
351  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
352  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
353  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
354  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
355  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
356  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
357  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4)
358  ; HSA-VI:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store 8 into %ir.out, align 4, addrspace 1)
359  ; HSA-VI:   S_ENDPGM 0
360  ; LEGACY-MESA-VI-LABEL: name: v2f32_arg
361  ; LEGACY-MESA-VI: bb.1.entry:
362  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
363  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
364  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
365  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
366  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
367  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
368  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
369  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
370  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store 8 into %ir.out, align 4, addrspace 1)
371  ; LEGACY-MESA-VI:   S_ENDPGM 0
372entry:
373  store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
374  ret void
375}
376
377define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
378  ; HSA-VI-LABEL: name: v3i8_arg
379  ; HSA-VI: bb.1.entry:
380  ; HSA-VI:   liveins: $sgpr4_sgpr5
381  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
382  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
383  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
384  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
385  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
386  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
387  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 3, align 8, addrspace 4)
388  ; HSA-VI:   G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store 3 into %ir.out, align 4, addrspace 1)
389  ; HSA-VI:   S_ENDPGM 0
390  ; LEGACY-MESA-VI-LABEL: name: v3i8_arg
391  ; LEGACY-MESA-VI: bb.1.entry:
392  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
393  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
394  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
395  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
396  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
397  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
398  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
399  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 3, align 4, addrspace 4)
400  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store 3 into %ir.out, align 4, addrspace 1)
401  ; LEGACY-MESA-VI:   S_ENDPGM 0
402entry:
403  store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
404  ret void
405}
406
407define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
408  ; HSA-VI-LABEL: name: v3i16_arg
409  ; HSA-VI: bb.1.entry:
410  ; HSA-VI:   liveins: $sgpr4_sgpr5
411  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
412  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
413  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
414  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
415  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
416  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
417  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 6, align 8, addrspace 4)
418  ; HSA-VI:   G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store 6 into %ir.out, align 4, addrspace 1)
419  ; HSA-VI:   S_ENDPGM 0
420  ; LEGACY-MESA-VI-LABEL: name: v3i16_arg
421  ; LEGACY-MESA-VI: bb.1.entry:
422  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
423  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
424  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
425  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
426  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
427  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
428  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
429  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 6, align 4, addrspace 4)
430  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store 6 into %ir.out, align 4, addrspace 1)
431  ; LEGACY-MESA-VI:   S_ENDPGM 0
432entry:
433  store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
434  ret void
435}
436
437define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
438  ; HSA-VI-LABEL: name: v3i32_arg
439  ; HSA-VI: bb.1.entry:
440  ; HSA-VI:   liveins: $sgpr4_sgpr5
441  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
442  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
443  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
444  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
445  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
446  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
447  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 16, addrspace 4)
448  ; HSA-VI:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store 12 into %ir.out, align 4, addrspace 1)
449  ; HSA-VI:   S_ENDPGM 0
450  ; LEGACY-MESA-VI-LABEL: name: v3i32_arg
451  ; LEGACY-MESA-VI: bb.1.entry:
452  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
453  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
454  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
455  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
456  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
457  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
458  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
459  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 4, addrspace 4)
460  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store 12 into %ir.out, align 4, addrspace 1)
461  ; LEGACY-MESA-VI:   S_ENDPGM 0
462entry:
463  store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
464  ret void
465}
466
467define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
468  ; HSA-VI-LABEL: name: v3f32_arg
469  ; HSA-VI: bb.1.entry:
470  ; HSA-VI:   liveins: $sgpr4_sgpr5
471  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
472  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
473  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
474  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
475  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
476  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
477  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 16, addrspace 4)
478  ; HSA-VI:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store 12 into %ir.out, align 4, addrspace 1)
479  ; HSA-VI:   S_ENDPGM 0
480  ; LEGACY-MESA-VI-LABEL: name: v3f32_arg
481  ; LEGACY-MESA-VI: bb.1.entry:
482  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
483  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
484  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
485  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
486  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
487  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
488  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
489  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 4, addrspace 4)
490  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store 12 into %ir.out, align 4, addrspace 1)
491  ; LEGACY-MESA-VI:   S_ENDPGM 0
492entry:
493  store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
494  ret void
495}
496
497define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
498  ; HSA-VI-LABEL: name: v4i8_arg
499  ; HSA-VI: bb.1.entry:
500  ; HSA-VI:   liveins: $sgpr4_sgpr5
501  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
502  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
503  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
504  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
505  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
506  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
507  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4)
508  ; HSA-VI:   G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
509  ; HSA-VI:   S_ENDPGM 0
510  ; LEGACY-MESA-VI-LABEL: name: v4i8_arg
511  ; LEGACY-MESA-VI: bb.1.entry:
512  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
513  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
514  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
515  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
516  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
517  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
518  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
519  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, addrspace 4)
520  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
521  ; LEGACY-MESA-VI:   S_ENDPGM 0
522entry:
523  store <4 x i8> %in, <4 x i8> addrspace(1)* %out
524  ret void
525}
526
527define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
528  ; HSA-VI-LABEL: name: v4i16_arg
529  ; HSA-VI: bb.1.entry:
530  ; HSA-VI:   liveins: $sgpr4_sgpr5
531  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
532  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
533  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
534  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
535  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
536  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
537  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4)
538  ; HSA-VI:   G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
539  ; HSA-VI:   S_ENDPGM 0
540  ; LEGACY-MESA-VI-LABEL: name: v4i16_arg
541  ; LEGACY-MESA-VI: bb.1.entry:
542  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
543  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
544  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
545  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
546  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
547  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
548  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
549  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
550  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
551  ; LEGACY-MESA-VI:   S_ENDPGM 0
552entry:
553  store <4 x i16> %in, <4 x i16> addrspace(1)* %out
554  ret void
555}
556
557define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
558  ; HSA-VI-LABEL: name: v4i32_arg
559  ; HSA-VI: bb.1.entry:
560  ; HSA-VI:   liveins: $sgpr4_sgpr5
561  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
562  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
563  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
564  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
565  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
566  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
567  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, addrspace 4)
568  ; HSA-VI:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store 16 into %ir.out, align 4, addrspace 1)
569  ; HSA-VI:   S_ENDPGM 0
570  ; LEGACY-MESA-VI-LABEL: name: v4i32_arg
571  ; LEGACY-MESA-VI: bb.1.entry:
572  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
573  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
574  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
575  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
576  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
577  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
578  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
579  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4)
580  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store 16 into %ir.out, align 4, addrspace 1)
581  ; LEGACY-MESA-VI:   S_ENDPGM 0
582entry:
583  store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
584  ret void
585}
586
587define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
588  ; HSA-VI-LABEL: name: v4f32_arg
589  ; HSA-VI: bb.1.entry:
590  ; HSA-VI:   liveins: $sgpr4_sgpr5
591  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
592  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
593  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
594  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
595  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
596  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
597  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, addrspace 4)
598  ; HSA-VI:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store 16 into %ir.out, align 4, addrspace 1)
599  ; HSA-VI:   S_ENDPGM 0
600  ; LEGACY-MESA-VI-LABEL: name: v4f32_arg
601  ; LEGACY-MESA-VI: bb.1.entry:
602  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
603  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
604  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
605  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
606  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
607  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
608  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
609  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4)
610  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store 16 into %ir.out, align 4, addrspace 1)
611  ; LEGACY-MESA-VI:   S_ENDPGM 0
612entry:
613  store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
614  ret void
615}
616
617define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
618  ; HSA-VI-LABEL: name: v8i8_arg
619  ; HSA-VI: bb.1.entry:
620  ; HSA-VI:   liveins: $sgpr4_sgpr5
621  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
622  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
623  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
624  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
625  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
626  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
627  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4)
628  ; HSA-VI:   G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
629  ; HSA-VI:   S_ENDPGM 0
630  ; LEGACY-MESA-VI-LABEL: name: v8i8_arg
631  ; LEGACY-MESA-VI: bb.1.entry:
632  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
633  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
634  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
635  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
636  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
637  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
638  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
639  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
640  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
641  ; LEGACY-MESA-VI:   S_ENDPGM 0
642entry:
643  store <8 x i8> %in, <8 x i8> addrspace(1)* %out
644  ret void
645}
646
647define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
648  ; HSA-VI-LABEL: name: v8i16_arg
649  ; HSA-VI: bb.1.entry:
650  ; HSA-VI:   liveins: $sgpr4_sgpr5
651  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
652  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
653  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
654  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
655  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
656  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
657  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, addrspace 4)
658  ; HSA-VI:   G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store 16 into %ir.out, addrspace 1)
659  ; HSA-VI:   S_ENDPGM 0
660  ; LEGACY-MESA-VI-LABEL: name: v8i16_arg
661  ; LEGACY-MESA-VI: bb.1.entry:
662  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
663  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
664  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
665  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
666  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
667  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
668  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
669  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4)
670  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store 16 into %ir.out, addrspace 1)
671  ; LEGACY-MESA-VI:   S_ENDPGM 0
672entry:
673  store <8 x i16> %in, <8 x i16> addrspace(1)* %out
674  ret void
675}
676
677define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
678  ; HSA-VI-LABEL: name: v8i32_arg
679  ; HSA-VI: bb.1.entry:
680  ; HSA-VI:   liveins: $sgpr4_sgpr5
681  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
682  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
683  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
684  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
685  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
686  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
687  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 16, addrspace 4)
688  ; HSA-VI:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store 32 into %ir.out, align 4, addrspace 1)
689  ; HSA-VI:   S_ENDPGM 0
690  ; LEGACY-MESA-VI-LABEL: name: v8i32_arg
691  ; LEGACY-MESA-VI: bb.1.entry:
692  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
693  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
694  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
695  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
696  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
697  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
698  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
699  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 4, addrspace 4)
700  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store 32 into %ir.out, align 4, addrspace 1)
701  ; LEGACY-MESA-VI:   S_ENDPGM 0
702entry:
703  store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
704  ret void
705}
706
707define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
708  ; HSA-VI-LABEL: name: v8f32_arg
709  ; HSA-VI: bb.1.entry:
710  ; HSA-VI:   liveins: $sgpr4_sgpr5
711  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
712  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
713  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
714  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
715  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
716  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
717  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 16, addrspace 4)
718  ; HSA-VI:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store 32 into %ir.out, align 4, addrspace 1)
719  ; HSA-VI:   S_ENDPGM 0
720  ; LEGACY-MESA-VI-LABEL: name: v8f32_arg
721  ; LEGACY-MESA-VI: bb.1.entry:
722  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
723  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
724  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
725  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
726  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
727  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
728  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
729  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 4, addrspace 4)
730  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store 32 into %ir.out, align 4, addrspace 1)
731  ; LEGACY-MESA-VI:   S_ENDPGM 0
732entry:
733  store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
734  ret void
735}
736
737define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
738  ; HSA-VI-LABEL: name: v16i8_arg
739  ; HSA-VI: bb.1.entry:
740  ; HSA-VI:   liveins: $sgpr4_sgpr5
741  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
742  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
743  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
744  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
745  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
746  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
747  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, addrspace 4)
748  ; HSA-VI:   G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store 16 into %ir.out, addrspace 1)
749  ; HSA-VI:   S_ENDPGM 0
750  ; LEGACY-MESA-VI-LABEL: name: v16i8_arg
751  ; LEGACY-MESA-VI: bb.1.entry:
752  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
753  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
754  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
755  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
756  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
757  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
758  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
759  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4)
760  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store 16 into %ir.out, addrspace 1)
761  ; LEGACY-MESA-VI:   S_ENDPGM 0
762entry:
763  store <16 x i8> %in, <16 x i8> addrspace(1)* %out
764  ret void
765}
766
767define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
768  ; HSA-VI-LABEL: name: v16i16_arg
769  ; HSA-VI: bb.1.entry:
770  ; HSA-VI:   liveins: $sgpr4_sgpr5
771  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
772  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
773  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
774  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
775  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
776  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
777  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 16, addrspace 4)
778  ; HSA-VI:   G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store 32 into %ir.out, addrspace 1)
779  ; HSA-VI:   S_ENDPGM 0
780  ; LEGACY-MESA-VI-LABEL: name: v16i16_arg
781  ; LEGACY-MESA-VI: bb.1.entry:
782  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
783  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
784  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
785  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
786  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
787  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
788  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
789  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 32, align 4, addrspace 4)
790  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store 32 into %ir.out, addrspace 1)
791  ; LEGACY-MESA-VI:   S_ENDPGM 0
792entry:
793  store <16 x i16> %in, <16 x i16> addrspace(1)* %out
794  ret void
795}
796
797define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
798  ; HSA-VI-LABEL: name: v16i32_arg
799  ; HSA-VI: bb.1.entry:
800  ; HSA-VI:   liveins: $sgpr4_sgpr5
801  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
802  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
803  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
804  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
805  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
806  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
807  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 64, align 16, addrspace 4)
808  ; HSA-VI:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store 64 into %ir.out, align 4, addrspace 1)
809  ; HSA-VI:   S_ENDPGM 0
810  ; LEGACY-MESA-VI-LABEL: name: v16i32_arg
811  ; LEGACY-MESA-VI: bb.1.entry:
812  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
813  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
814  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
815  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
816  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
817  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
818  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
819  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 64, align 4, addrspace 4)
820  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store 64 into %ir.out, align 4, addrspace 1)
821  ; LEGACY-MESA-VI:   S_ENDPGM 0
822entry:
823  store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
824  ret void
825}
826
827define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
828  ; HSA-VI-LABEL: name: v16f32_arg
829  ; HSA-VI: bb.1.entry:
830  ; HSA-VI:   liveins: $sgpr4_sgpr5
831  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
832  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
833  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
834  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
835  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
836  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
837  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 64, align 16, addrspace 4)
838  ; HSA-VI:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store 64 into %ir.out, align 4, addrspace 1)
839  ; HSA-VI:   S_ENDPGM 0
840  ; LEGACY-MESA-VI-LABEL: name: v16f32_arg
841  ; LEGACY-MESA-VI: bb.1.entry:
842  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
843  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
844  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
845  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
846  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
847  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
848  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
849  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 64, align 4, addrspace 4)
850  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store 64 into %ir.out, align 4, addrspace 1)
851  ; LEGACY-MESA-VI:   S_ENDPGM 0
852entry:
853  store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
854  ret void
855}
856
857define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
858  ; HSA-VI-LABEL: name: kernel_arg_i64
859  ; HSA-VI: bb.1 (%ir-block.0):
860  ; HSA-VI:   liveins: $sgpr4_sgpr5
861  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
862  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
863  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
864  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
865  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
866  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
867  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4)
868  ; HSA-VI:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
869  ; HSA-VI:   S_ENDPGM 0
870  ; LEGACY-MESA-VI-LABEL: name: kernel_arg_i64
871  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
872  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
873  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
874  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
875  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
876  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
877  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
878  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
879  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
880  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
881  ; LEGACY-MESA-VI:   S_ENDPGM 0
882  store i64 %a, i64 addrspace(1)* %out, align 8
883  ret void
884}
885
886define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double  %in) {
887  ; HSA-VI-LABEL: name: f64_kernel_arg
888  ; HSA-VI: bb.1.entry:
889  ; HSA-VI:   liveins: $sgpr4_sgpr5
890  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
891  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
892  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
893  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
894  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
895  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
896  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, addrspace 4)
897  ; HSA-VI:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
898  ; HSA-VI:   S_ENDPGM 0
899  ; LEGACY-MESA-VI-LABEL: name: f64_kernel_arg
900  ; LEGACY-MESA-VI: bb.1.entry:
901  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
902  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
903  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
904  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
905  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
906  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
907  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
908  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
909  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
910  ; LEGACY-MESA-VI:   S_ENDPGM 0
911entry:
912  store double %in, double addrspace(1)* %out
913  ret void
914}
915
916define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
917  ; HSA-VI-LABEL: name: i1_arg
918  ; HSA-VI: bb.1 (%ir-block.0):
919  ; HSA-VI:   liveins: $sgpr4_sgpr5
920  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
921  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
922  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
923  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
924  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
925  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
926  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4)
927  ; HSA-VI:   G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store 1 into %ir.out, addrspace 1)
928  ; HSA-VI:   S_ENDPGM 0
929  ; LEGACY-MESA-VI-LABEL: name: i1_arg
930  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
931  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
932  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
933  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
934  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
935  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
936  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
937  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
938  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4)
939  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store 1 into %ir.out, addrspace 1)
940  ; LEGACY-MESA-VI:   S_ENDPGM 0
941  store i1 %x, i1 addrspace(1)* %out, align 1
942  ret void
943}
944
945define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
946  ; HSA-VI-LABEL: name: i1_arg_zext_i32
947  ; HSA-VI: bb.1 (%ir-block.0):
948  ; HSA-VI:   liveins: $sgpr4_sgpr5
949  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
950  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
951  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
952  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
953  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
954  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
955  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4)
956  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
957  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
958  ; HSA-VI:   S_ENDPGM 0
959  ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i32
960  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
961  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
962  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
963  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
964  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
965  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
966  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
967  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
968  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4)
969  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
970  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
971  ; LEGACY-MESA-VI:   S_ENDPGM 0
972  %ext = zext i1 %x to i32
973  store i32 %ext, i32 addrspace(1)* %out, align 4
974  ret void
975}
976
977define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
978  ; HSA-VI-LABEL: name: i1_arg_zext_i64
979  ; HSA-VI: bb.1 (%ir-block.0):
980  ; HSA-VI:   liveins: $sgpr4_sgpr5
981  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
982  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
983  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
984  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
985  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
986  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
987  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4)
988  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
989  ; HSA-VI:   G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
990  ; HSA-VI:   S_ENDPGM 0
991  ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i64
992  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
993  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
994  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
995  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
996  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
997  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
998  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
999  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1000  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4)
1001  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
1002  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
1003  ; LEGACY-MESA-VI:   S_ENDPGM 0
1004  %ext = zext i1 %x to i64
1005  store i64 %ext, i64 addrspace(1)* %out, align 8
1006  ret void
1007}
1008
1009define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
1010  ; HSA-VI-LABEL: name: i1_arg_sext_i32
1011  ; HSA-VI: bb.1 (%ir-block.0):
1012  ; HSA-VI:   liveins: $sgpr4_sgpr5
1013  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1014  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1015  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1016  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
1017  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1018  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1019  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4)
1020  ; HSA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
1021  ; HSA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1022  ; HSA-VI:   S_ENDPGM 0
1023  ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i32
1024  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1025  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1026  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1027  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1028  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1029  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
1030  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1031  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1032  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4)
1033  ; LEGACY-MESA-VI:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
1034  ; LEGACY-MESA-VI:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1035  ; LEGACY-MESA-VI:   S_ENDPGM 0
1036  %ext = sext i1 %x to i32
1037  store i32 %ext, i32addrspace(1)* %out, align 4
1038  ret void
1039}
1040
1041define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
1042  ; HSA-VI-LABEL: name: i1_arg_sext_i64
1043  ; HSA-VI: bb.1 (%ir-block.0):
1044  ; HSA-VI:   liveins: $sgpr4_sgpr5
1045  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1046  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1047  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1048  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
1049  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1050  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1051  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 8, addrspace 4)
1052  ; HSA-VI:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
1053  ; HSA-VI:   G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
1054  ; HSA-VI:   S_ENDPGM 0
1055  ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i64
1056  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1057  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1058  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1059  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1060  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1061  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
1062  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1063  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1064  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4)
1065  ; LEGACY-MESA-VI:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
1066  ; LEGACY-MESA-VI:   G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store 8 into %ir.out, addrspace 1)
1067  ; LEGACY-MESA-VI:   S_ENDPGM 0
1068  %ext = sext i1 %x to i64
1069  store i64 %ext, i64 addrspace(1)* %out, align 8
1070  ret void
1071}
1072
1073; 0-sized arguments do not add a slot to the argument register set, so
1074; waste an index in the virtual register array.
1075define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind {
1076  ; HSA-VI-LABEL: name: empty_struct_arg
1077  ; HSA-VI: bb.1 (%ir-block.0):
1078  ; HSA-VI:   liveins: $sgpr4_sgpr5
1079  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1080  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1081  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1082  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4)
1083  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1084  ; HSA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1085  ; HSA-VI:   S_ENDPGM 0
1086  ; LEGACY-MESA-VI-LABEL: name: empty_struct_arg
1087  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1088  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1089  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1090  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1091  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1092  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4)
1093  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1094  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1095  ; LEGACY-MESA-VI:   S_ENDPGM 0
1096  store i32 %arg1, i32 addrspace(1)* undef
1097  ret void
1098}
1099
1100define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind {
1101  ; HSA-VI-LABEL: name: empty_array_arg
1102  ; HSA-VI: bb.1 (%ir-block.0):
1103  ; HSA-VI:   liveins: $sgpr4_sgpr5
1104  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1105  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1106  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1107  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4)
1108  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1109  ; HSA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1110  ; HSA-VI:   S_ENDPGM 0
1111  ; LEGACY-MESA-VI-LABEL: name: empty_array_arg
1112  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1113  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1114  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1115  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1116  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1117  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4)
1118  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1119  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1120  ; LEGACY-MESA-VI:   S_ENDPGM 0
1121  store i32 %arg1, i32 addrspace(1)* undef
1122  ret void
1123}
1124
1125; The correct load offsets for these:
1126; load 4 from 0,
1127; load 8 from 8
1128; load 4 from 24
1129; load 8 from 32
1130
1131; With the SelectionDAG argument lowering, the alignments for the
1132; struct members is not properly considered, making these wrong.
1133define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, {i32, i64} %arg1) {
1134  ; HSA-VI-LABEL: name: struct_argument_alignment
1135  ; HSA-VI: bb.1 (%ir-block.0):
1136  ; HSA-VI:   liveins: $sgpr4_sgpr5
1137  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1138  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1139  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1140  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 16, addrspace 4)
1141  ; HSA-VI:   [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0
1142  ; HSA-VI:   [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64
1143  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1144  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1145  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 16, addrspace 4)
1146  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
1147  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1148  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 16, align 8, addrspace 4)
1149  ; HSA-VI:   [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s128), 0
1150  ; HSA-VI:   [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64
1151  ; HSA-VI:   [[C3:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1152  ; HSA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
1153  ; HSA-VI:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
1154  ; HSA-VI:   G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
1155  ; HSA-VI:   G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
1156  ; HSA-VI:   G_STORE [[LOAD1]](s8), [[COPY2]](p1) :: (volatile store 1 into `i8 addrspace(1)* null`, addrspace 1)
1157  ; HSA-VI:   G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
1158  ; HSA-VI:   G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
1159  ; HSA-VI:   S_ENDPGM 0
1160  ; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment
1161  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1162  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1163  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1164  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1165  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1166  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4)
1167  ; LEGACY-MESA-VI:   [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0
1168  ; LEGACY-MESA-VI:   [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64
1169  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1170  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1171  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4)
1172  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
1173  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1174  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 16, align 4, addrspace 4)
1175  ; LEGACY-MESA-VI:   [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s128), 0
1176  ; LEGACY-MESA-VI:   [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64
1177  ; LEGACY-MESA-VI:   [[C3:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1178  ; LEGACY-MESA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
1179  ; LEGACY-MESA-VI:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
1180  ; LEGACY-MESA-VI:   G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
1181  ; LEGACY-MESA-VI:   G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
1182  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s8), [[COPY2]](p1) :: (volatile store 1 into `i8 addrspace(1)* null`, addrspace 1)
1183  ; LEGACY-MESA-VI:   G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
1184  ; LEGACY-MESA-VI:   G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
1185  ; LEGACY-MESA-VI:   S_ENDPGM 0
1186  %val0 = extractvalue {i32, i64} %arg0, 0
1187  %val1 = extractvalue {i32, i64} %arg0, 1
1188  %val2 = extractvalue {i32, i64} %arg1, 0
1189  %val3 = extractvalue {i32, i64} %arg1, 1
1190  store volatile i32 %val0, i32 addrspace(1)* null
1191  store volatile i64 %val1, i64 addrspace(1)* null
1192  store volatile i8 %pad, i8 addrspace(1)* null
1193  store volatile i32 %val2, i32 addrspace(1)* null
1194  store volatile i64 %val3, i64 addrspace(1)* null
1195  ret void
1196}
1197
1198; No padding between i8 and next struct, but round up at end to 4 byte
1199; multiple.
1200define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) {
1201  ; HSA-VI-LABEL: name: packed_struct_argument_alignment
1202  ; HSA-VI: bb.1 (%ir-block.1):
1203  ; HSA-VI:   liveins: $sgpr4_sgpr5
1204  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1205  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1206  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1207  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 12, align 16, addrspace 4)
1208  ; HSA-VI:   [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0
1209  ; HSA-VI:   [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
1210  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
1211  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1212  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4)
1213  ; HSA-VI:   [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0
1214  ; HSA-VI:   [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32
1215  ; HSA-VI:   [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1216  ; HSA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1)
1217  ; HSA-VI:   G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
1218  ; HSA-VI:   G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
1219  ; HSA-VI:   G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
1220  ; HSA-VI:   G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
1221  ; HSA-VI:   S_ENDPGM 0
1222  ; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment
1223  ; LEGACY-MESA-VI: bb.1 (%ir-block.1):
1224  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1225  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1226  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1227  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1228  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 12, align 4, addrspace 4)
1229  ; LEGACY-MESA-VI:   [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0
1230  ; LEGACY-MESA-VI:   [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
1231  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 49
1232  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1233  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4)
1234  ; LEGACY-MESA-VI:   [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s96), 0
1235  ; LEGACY-MESA-VI:   [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD1]](s96), 32
1236  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1237  ; LEGACY-MESA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1)
1238  ; LEGACY-MESA-VI:   G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
1239  ; LEGACY-MESA-VI:   G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
1240  ; LEGACY-MESA-VI:   G_STORE [[EXTRACT2]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
1241  ; LEGACY-MESA-VI:   G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
1242  ; LEGACY-MESA-VI:   S_ENDPGM 0
1243  %val0 = extractvalue <{i32, i64}> %arg0, 0
1244  %val1 = extractvalue <{i32, i64}> %arg0, 1
1245  %val2 = extractvalue <{i32, i64}> %arg1, 0
1246  %val3 = extractvalue <{i32, i64}> %arg1, 1
1247  store volatile i32 %val0, i32 addrspace(1)* null
1248  store volatile i64 %val1, i64 addrspace(1)* null
1249  store volatile i32 %val2, i32 addrspace(1)* null
1250  store volatile i64 %val3, i64 addrspace(1)* null
1251  ret void
1252}
1253
1254define amdgpu_kernel void @unused_i32_arg(i32 addrspace(1)* nocapture %out, i32 %unused, i32 %in) nounwind {
1255  ; HSA-VI-LABEL: name: unused_i32_arg
1256  ; HSA-VI: bb.1.entry:
1257  ; HSA-VI:   liveins: $sgpr4_sgpr5
1258  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1259  ; HSA-VI:   S_ENDPGM 0
1260  ; LEGACY-MESA-VI-LABEL: name: unused_i32_arg
1261  ; LEGACY-MESA-VI: bb.1.entry:
1262  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1263  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1264  ; LEGACY-MESA-VI:   S_ENDPGM 0
1265entry:
1266  ret void
1267}
1268
1269; Byref pointers should only be treated as offsets from kernarg
1270define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %out, i8 addrspace(4)* byref(i8) %in.byref) {
1271  ; HSA-VI-LABEL: name: byref_constant_i8_arg
1272  ; HSA-VI: bb.1 (%ir-block.0):
1273  ; HSA-VI:   liveins: $sgpr4_sgpr5
1274  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1275  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1276  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1277  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
1278  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1279  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1280  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 1 from %ir.in.byref, addrspace 4)
1281  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
1282  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1283  ; HSA-VI:   S_ENDPGM 0
1284  ; LEGACY-MESA-VI-LABEL: name: byref_constant_i8_arg
1285  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1286  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1287  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1288  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1289  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1290  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
1291  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1292  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1293  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 1 from %ir.in.byref, addrspace 4)
1294  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
1295  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1296  ; LEGACY-MESA-VI:   S_ENDPGM 0
1297  %in = load i8, i8 addrspace(4)* %in.byref
1298  %ext = zext i8 %in to i32
1299  store i32 %ext, i32 addrspace(1)* %out, align 4
1300  ret void
1301}
1302
1303define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %out, i16 addrspace(4)* byref(i16) align 2 %in.byref) {
1304  ; HSA-VI-LABEL: name: byref_constant_i16_arg
1305  ; HSA-VI: bb.1 (%ir-block.0):
1306  ; HSA-VI:   liveins: $sgpr4_sgpr5
1307  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1308  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1309  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1310  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
1311  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1312  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1313  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 2 from %ir.in.byref, addrspace 4)
1314  ; HSA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
1315  ; HSA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1316  ; HSA-VI:   S_ENDPGM 0
1317  ; LEGACY-MESA-VI-LABEL: name: byref_constant_i16_arg
1318  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1319  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1320  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1321  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1322  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1323  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
1324  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1325  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1326  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 2 from %ir.in.byref, addrspace 4)
1327  ; LEGACY-MESA-VI:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
1328  ; LEGACY-MESA-VI:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1329  ; LEGACY-MESA-VI:   S_ENDPGM 0
1330  %in = load i16, i16 addrspace(4)* %in.byref
1331  %ext = zext i16 %in to i32
1332  store i32 %ext, i32 addrspace(1)* %out, align 4
1333  ret void
1334}
1335
1336define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align 4 %in.byref, i32 %after.offset) {
1337  ; HSA-VI-LABEL: name: byref_constant_i32_arg
1338  ; HSA-VI: bb.1 (%ir-block.0):
1339  ; HSA-VI:   liveins: $sgpr4_sgpr5
1340  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1341  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1342  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1343  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
1344  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1345  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1346  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
1347  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1348  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4)
1349  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4)
1350  ; HSA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1351  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1352  ; HSA-VI:   S_ENDPGM 0
1353  ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg
1354  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1355  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1356  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1357  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1358  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1359  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
1360  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1361  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1362  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
1363  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1364  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4)
1365  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4)
1366  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1367  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1368  ; LEGACY-MESA-VI:   S_ENDPGM 0
1369  %in = load i32, i32 addrspace(4)* %in.byref
1370  store volatile i32 %in, i32 addrspace(1)* %out, align 4
1371  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
1372  ret void
1373}
1374
1375define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> addrspace(4)* byref(<4 x i32>) align(16) %in.byref, i32 %after.offset) {
1376  ; HSA-VI-LABEL: name: byref_constant_v4i32_arg
1377  ; HSA-VI: bb.1 (%ir-block.0):
1378  ; HSA-VI:   liveins: $sgpr4_sgpr5
1379  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1380  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1381  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1382  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
1383  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1384  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1385  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
1386  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1387  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4)
1388  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 16 from %ir.in.byref, addrspace 4)
1389  ; HSA-VI:   G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store 16 into %ir.out, align 4, addrspace 1)
1390  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out.cast, addrspace 1)
1391  ; HSA-VI:   S_ENDPGM 0
1392  ; LEGACY-MESA-VI-LABEL: name: byref_constant_v4i32_arg
1393  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1394  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1395  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1396  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1397  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1398  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
1399  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1400  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1401  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
1402  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1403  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4)
1404  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 16 from %ir.in.byref, addrspace 4)
1405  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store 16 into %ir.out, align 4, addrspace 1)
1406  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out.cast, addrspace 1)
1407  ; LEGACY-MESA-VI:   S_ENDPGM 0
1408  %in = load <4 x i32>, <4 x i32> addrspace(4)* %in.byref
1409  store volatile <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
1410  %out.cast = bitcast <4 x i32> addrspace(1)* %out to i32 addrspace(1)*
1411  store volatile i32 %after.offset, i32 addrspace(1)* %out.cast, align 4
1412  ret void
1413}
1414
1415define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(256) %in.byref, i32 %after.offset) {
1416  ; HSA-VI-LABEL: name: byref_align_constant_i32_arg
1417  ; HSA-VI: bb.1 (%ir-block.0):
1418  ; HSA-VI:   liveins: $sgpr4_sgpr5
1419  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1420  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1421  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1422  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
1423  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
1424  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1425  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260
1426  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1427  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4)
1428  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4)
1429  ; HSA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1430  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1431  ; HSA-VI:   S_ENDPGM 0
1432  ; LEGACY-MESA-VI-LABEL: name: byref_align_constant_i32_arg
1433  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1434  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1435  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1436  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1437  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1438  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
1439  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 292
1440  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1441  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296
1442  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1443  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 8, addrspace 4)
1444  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4)
1445  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1446  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1447  ; LEGACY-MESA-VI:   S_ENDPGM 0
1448  %in = load i32, i32 addrspace(4)* %in.byref
1449  store volatile i32 %in, i32 addrspace(1)* %out, align 4
1450  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
1451  ret void
1452}
1453
1454define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace(1)* nocapture %out, i8, <16 x i32> addrspace(4)* byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) {
1455  ; HSA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg
1456  ; HSA-VI: bb.1 (%ir-block.1):
1457  ; HSA-VI:   liveins: $sgpr4_sgpr5
1458  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1459  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1460  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1461  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
1462  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
1463  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1464  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
1465  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1466  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4)
1467  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 64 from %ir.in.byref, addrspace 4)
1468  ; HSA-VI:   G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store 64 into %ir.cast.out, align 4, addrspace 1)
1469  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1470  ; HSA-VI:   S_ENDPGM 0
1471  ; LEGACY-MESA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg
1472  ; LEGACY-MESA-VI: bb.1 (%ir-block.1):
1473  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1474  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1475  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1476  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1477  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
1478  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
1479  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1480  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164
1481  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1482  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 4, addrspace 4)
1483  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 64 from %ir.in.byref, addrspace 4)
1484  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store 64 into %ir.cast.out, align 4, addrspace 1)
1485  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1486  ; LEGACY-MESA-VI:   S_ENDPGM 0
1487  %in = load <16 x i32>, <16 x i32> addrspace(4)* %in.byref
1488  %cast.out = bitcast i32 addrspace(1)* %out to <16 x i32> addrspace(1)*
1489  store volatile <16 x i32> %in, <16 x i32> addrspace(1)* %cast.out, align 4
1490  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
1491  ret void
1492}
1493
1494; Also accept byref kernel arguments with other global address spaces.
1495define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* byref(i32) align(4) %in.byref) {
1496  ; HSA-VI-LABEL: name: byref_global_i32_arg
1497  ; HSA-VI: bb.1 (%ir-block.0):
1498  ; HSA-VI:   liveins: $sgpr4_sgpr5
1499  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1500  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1501  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1502  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
1503  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1504  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1505  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1506  ; HSA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1)
1507  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load 4 from %ir.1, addrspace 1)
1508  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1509  ; HSA-VI:   S_ENDPGM 0
1510  ; LEGACY-MESA-VI-LABEL: name: byref_global_i32_arg
1511  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1512  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1513  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1514  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1515  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1516  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
1517  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1518  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1519  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1520  ; LEGACY-MESA-VI:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[ADDRSPACE_CAST]](p1)
1521  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (dereferenceable load 4 from %ir.1, addrspace 1)
1522  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1523  ; LEGACY-MESA-VI:   S_ENDPGM 0
1524  %in = load i32, i32 addrspace(1)* %in.byref
1525  store i32 %in, i32 addrspace(1)* %out, align 4
1526  ret void
1527}
1528
1529define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, i32* byref(i32) align(4) %in.byref) {
1530  ; HSA-VI-LABEL: name: byref_flat_i32_arg
1531  ; HSA-VI: bb.1 (%ir-block.0):
1532  ; HSA-VI:   liveins: $sgpr4_sgpr5
1533  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1534  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1535  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1536  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
1537  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1538  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1539  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1540  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load 4 from %ir.in.byref)
1541  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1542  ; HSA-VI:   S_ENDPGM 0
1543  ; LEGACY-MESA-VI-LABEL: name: byref_flat_i32_arg
1544  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1545  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1546  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1547  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1548  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1549  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
1550  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1551  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1552  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1553  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load 4 from %ir.in.byref)
1554  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1555  ; LEGACY-MESA-VI:   S_ENDPGM 0
1556  %in = load i32, i32* %in.byref
1557  store i32 %in, i32 addrspace(1)* %out, align 4
1558  ret void
1559}
1560
1561define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(6)* byref(i32) align(4) %in.byref) {
1562  ; HSA-VI-LABEL: name: byref_constant_32bit_i32_arg
1563  ; HSA-VI: bb.1 (%ir-block.0):
1564  ; HSA-VI:   liveins: $sgpr4_sgpr5
1565  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1566  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1567  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1568  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
1569  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1570  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1571  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1572  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load 4 from %ir.in.byref, addrspace 6)
1573  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1574  ; HSA-VI:   S_ENDPGM 0
1575  ; LEGACY-MESA-VI-LABEL: name: byref_constant_32bit_i32_arg
1576  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1577  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1578  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1579  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1580  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1581  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
1582  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1583  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1584  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1585  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load 4 from %ir.in.byref, addrspace 6)
1586  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1587  ; LEGACY-MESA-VI:   S_ENDPGM 0
1588  %in = load i32, i32 addrspace(6)* %in.byref
1589  store i32 %in, i32 addrspace(1)* %out, align 4
1590  ret void
1591}
1592
1593define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(999)* byref(i32) align(4) %in.byref) {
1594  ; HSA-VI-LABEL: name: byref_unknown_as_i32_arg
1595  ; HSA-VI: bb.1 (%ir-block.0):
1596  ; HSA-VI:   liveins: $sgpr4_sgpr5
1597  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1598  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1599  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1600  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
1601  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1602  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1603  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1604  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load 4 from %ir.in.byref, addrspace 999)
1605  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1606  ; HSA-VI:   S_ENDPGM 0
1607  ; LEGACY-MESA-VI-LABEL: name: byref_unknown_as_i32_arg
1608  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1609  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1610  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1611  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1612  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1613  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
1614  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1615  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1616  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1617  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load 4 from %ir.in.byref, addrspace 999)
1618  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1619  ; LEGACY-MESA-VI:   S_ENDPGM 0
1620  %in = load i32, i32 addrspace(999)* %in.byref
1621  store i32 %in, i32 addrspace(1)* %out, align 4
1622  ret void
1623}
1624
1625; Invalid, but should not crash.
1626define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(3)* byref(i32) align(4) %in.byref) {
1627  ; HSA-VI-LABEL: name: byref_local_i32_arg
1628  ; HSA-VI: bb.1 (%ir-block.0):
1629  ; HSA-VI:   liveins: $sgpr4_sgpr5
1630  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1631  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1632  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1633  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
1634  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1635  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1636  ; HSA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1637  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load 4 from %ir.in.byref, addrspace 3)
1638  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1639  ; HSA-VI:   S_ENDPGM 0
1640  ; LEGACY-MESA-VI-LABEL: name: byref_local_i32_arg
1641  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1642  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1643  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1644  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1645  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1646  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
1647  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1648  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1649  ; LEGACY-MESA-VI:   [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1650  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load 4 from %ir.in.byref, addrspace 3)
1651  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store 4 into %ir.out, addrspace 1)
1652  ; LEGACY-MESA-VI:   S_ENDPGM 0
1653  %in = load i32, i32 addrspace(3)* %in.byref
1654  store i32 %in, i32 addrspace(1)* %out, align 4
1655  ret void
1656}
1657
1658define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(4) %in0.byref, i32 addrspace(4)* byref(i32) align(4) %in1.byref, i32 %after.offset) {
1659  ; HSA-VI-LABEL: name: multi_byref_constant_i32_arg
1660  ; HSA-VI: bb.1 (%ir-block.0):
1661  ; HSA-VI:   liveins: $sgpr4_sgpr5
1662  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1663  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1664  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1665  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 16, addrspace 4)
1666  ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1667  ; HSA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1668  ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
1669  ; HSA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1670  ; HSA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1671  ; HSA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1672  ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load 4, align 16, addrspace 4)
1673  ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in0.byref, addrspace 4)
1674  ; HSA-VI:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load 4 from %ir.in1.byref, addrspace 4)
1675  ; HSA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1676  ; HSA-VI:   G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1677  ; HSA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1678  ; HSA-VI:   S_ENDPGM 0
1679  ; LEGACY-MESA-VI-LABEL: name: multi_byref_constant_i32_arg
1680  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1681  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1682  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1683  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1684  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1685  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 8, align 4, addrspace 4)
1686  ; LEGACY-MESA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1687  ; LEGACY-MESA-VI:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1688  ; LEGACY-MESA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
1689  ; LEGACY-MESA-VI:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1690  ; LEGACY-MESA-VI:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1691  ; LEGACY-MESA-VI:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1692  ; LEGACY-MESA-VI:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load 4, addrspace 4)
1693  ; LEGACY-MESA-VI:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load 4 from %ir.in0.byref, addrspace 4)
1694  ; LEGACY-MESA-VI:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable load 4 from %ir.in1.byref, addrspace 4)
1695  ; LEGACY-MESA-VI:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1696  ; LEGACY-MESA-VI:   G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1697  ; LEGACY-MESA-VI:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store 4 into %ir.out, addrspace 1)
1698  ; LEGACY-MESA-VI:   S_ENDPGM 0
1699  %in0 = load i32, i32 addrspace(4)* %in0.byref
1700  %in1 = load i32, i32 addrspace(4)* %in1.byref
1701  store volatile i32 %in0, i32 addrspace(1)* %out, align 4
1702  store volatile i32 %in1, i32 addrspace(1)* %out, align 4
1703  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
1704  ret void
1705}
1706
1707define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byref(i32) align(4) %in.byref) {
1708  ; HSA-VI-LABEL: name: byref_constant_i32_arg_offset0
1709  ; HSA-VI: bb.1 (%ir-block.0):
1710  ; HSA-VI:   liveins: $sgpr4_sgpr5
1711  ; HSA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1712  ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1713  ; HSA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1714  ; HSA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1715  ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4)
1716  ; HSA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1717  ; HSA-VI:   S_ENDPGM 0
1718  ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0
1719  ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1720  ; LEGACY-MESA-VI:   liveins: $sgpr0_sgpr1
1721  ; LEGACY-MESA-VI:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1722  ; LEGACY-MESA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1723  ; LEGACY-MESA-VI:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1724  ; LEGACY-MESA-VI:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1725  ; LEGACY-MESA-VI:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load 4 from %ir.in.byref, addrspace 4)
1726  ; LEGACY-MESA-VI:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
1727  ; LEGACY-MESA-VI:   S_ENDPGM 0
1728  %in = load i32, i32 addrspace(4)* %in.byref
1729  store i32 %in, i32 addrspace(1)* undef, align 4
1730  ret void
1731}
1732