• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
3; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
4
5; Use a 64-bit value with lo bits that can be represented as an inline constant
6define amdgpu_kernel void @i64_imm_inline_lo(i64 addrspace(1) *%out) {
7; SI-LABEL: i64_imm_inline_lo:
8; SI:       ; %bb.0: ; %entry
9; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
10; SI-NEXT:    s_mov_b32 s3, 0xf000
11; SI-NEXT:    s_mov_b32 s2, -1
12; SI-NEXT:    v_mov_b32_e32 v0, 5
13; SI-NEXT:    v_mov_b32_e32 v1, 0x12345678
14; SI-NEXT:    s_waitcnt lgkmcnt(0)
15; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
16; SI-NEXT:    s_endpgm
17;
18; VI-LABEL: i64_imm_inline_lo:
19; VI:       ; %bb.0: ; %entry
20; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
21; VI-NEXT:    s_mov_b32 s3, 0xf000
22; VI-NEXT:    s_mov_b32 s2, -1
23; VI-NEXT:    v_mov_b32_e32 v0, 5
24; VI-NEXT:    v_mov_b32_e32 v1, 0x12345678
25; VI-NEXT:    s_waitcnt lgkmcnt(0)
26; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
27; VI-NEXT:    s_endpgm
28entry:
29  store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005
30  ret void
31}
32
33; Use a 64-bit value with hi bits that can be represented as an inline constant
34define amdgpu_kernel void @i64_imm_inline_hi(i64 addrspace(1) *%out) {
35; SI-LABEL: i64_imm_inline_hi:
36; SI:       ; %bb.0: ; %entry
37; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
38; SI-NEXT:    s_mov_b32 s3, 0xf000
39; SI-NEXT:    s_mov_b32 s2, -1
40; SI-NEXT:    v_mov_b32_e32 v0, 0x12345678
41; SI-NEXT:    v_mov_b32_e32 v1, 5
42; SI-NEXT:    s_waitcnt lgkmcnt(0)
43; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
44; SI-NEXT:    s_endpgm
45;
46; VI-LABEL: i64_imm_inline_hi:
47; VI:       ; %bb.0: ; %entry
48; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
49; VI-NEXT:    s_mov_b32 s3, 0xf000
50; VI-NEXT:    s_mov_b32 s2, -1
51; VI-NEXT:    v_mov_b32_e32 v0, 0x12345678
52; VI-NEXT:    v_mov_b32_e32 v1, 5
53; VI-NEXT:    s_waitcnt lgkmcnt(0)
54; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
55; VI-NEXT:    s_endpgm
56entry:
57  store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678
58  ret void
59}
60
61define amdgpu_kernel void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) {
62; SI-LABEL: store_imm_neg_0.0_i64:
63; SI:       ; %bb.0:
64; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
65; SI-NEXT:    s_mov_b32 s3, 0xf000
66; SI-NEXT:    s_mov_b32 s2, -1
67; SI-NEXT:    v_mov_b32_e32 v0, 0
68; SI-NEXT:    v_bfrev_b32_e32 v1, 1
69; SI-NEXT:    s_waitcnt lgkmcnt(0)
70; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
71; SI-NEXT:    s_endpgm
72;
73; VI-LABEL: store_imm_neg_0.0_i64:
74; VI:       ; %bb.0:
75; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
76; VI-NEXT:    s_mov_b32 s3, 0xf000
77; VI-NEXT:    s_mov_b32 s2, -1
78; VI-NEXT:    v_mov_b32_e32 v0, 0
79; VI-NEXT:    v_bfrev_b32_e32 v1, 1
80; VI-NEXT:    s_waitcnt lgkmcnt(0)
81; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
82; VI-NEXT:    s_endpgm
83  store i64 -9223372036854775808, i64 addrspace(1) *%out
84  ret void
85}
86
87define amdgpu_kernel void @store_inline_imm_neg_0.0_i32(i32 addrspace(1)* %out) {
88; SI-LABEL: store_inline_imm_neg_0.0_i32:
89; SI:       ; %bb.0:
90; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
91; SI-NEXT:    s_mov_b32 s3, 0xf000
92; SI-NEXT:    s_mov_b32 s2, -1
93; SI-NEXT:    v_bfrev_b32_e32 v0, 1
94; SI-NEXT:    s_waitcnt lgkmcnt(0)
95; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
96; SI-NEXT:    s_endpgm
97;
98; VI-LABEL: store_inline_imm_neg_0.0_i32:
99; VI:       ; %bb.0:
100; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
101; VI-NEXT:    s_mov_b32 s3, 0xf000
102; VI-NEXT:    s_mov_b32 s2, -1
103; VI-NEXT:    v_bfrev_b32_e32 v0, 1
104; VI-NEXT:    s_waitcnt lgkmcnt(0)
105; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
106; VI-NEXT:    s_endpgm
107  store i32 -2147483648, i32 addrspace(1)* %out
108  ret void
109}
110
111define amdgpu_kernel void @store_inline_imm_0.0_f32(float addrspace(1)* %out) {
112; SI-LABEL: store_inline_imm_0.0_f32:
113; SI:       ; %bb.0:
114; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
115; SI-NEXT:    s_mov_b32 s3, 0xf000
116; SI-NEXT:    s_mov_b32 s2, -1
117; SI-NEXT:    v_mov_b32_e32 v0, 0
118; SI-NEXT:    s_waitcnt lgkmcnt(0)
119; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
120; SI-NEXT:    s_endpgm
121;
122; VI-LABEL: store_inline_imm_0.0_f32:
123; VI:       ; %bb.0:
124; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
125; VI-NEXT:    s_mov_b32 s3, 0xf000
126; VI-NEXT:    s_mov_b32 s2, -1
127; VI-NEXT:    v_mov_b32_e32 v0, 0
128; VI-NEXT:    s_waitcnt lgkmcnt(0)
129; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
130; VI-NEXT:    s_endpgm
131  store float 0.0, float addrspace(1)* %out
132  ret void
133}
134
135define amdgpu_kernel void @store_imm_neg_0.0_f32(float addrspace(1)* %out) {
136; SI-LABEL: store_imm_neg_0.0_f32:
137; SI:       ; %bb.0:
138; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
139; SI-NEXT:    s_mov_b32 s3, 0xf000
140; SI-NEXT:    s_mov_b32 s2, -1
141; SI-NEXT:    v_bfrev_b32_e32 v0, 1
142; SI-NEXT:    s_waitcnt lgkmcnt(0)
143; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
144; SI-NEXT:    s_endpgm
145;
146; VI-LABEL: store_imm_neg_0.0_f32:
147; VI:       ; %bb.0:
148; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
149; VI-NEXT:    s_mov_b32 s3, 0xf000
150; VI-NEXT:    s_mov_b32 s2, -1
151; VI-NEXT:    v_bfrev_b32_e32 v0, 1
152; VI-NEXT:    s_waitcnt lgkmcnt(0)
153; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
154; VI-NEXT:    s_endpgm
155  store float -0.0, float addrspace(1)* %out
156  ret void
157}
158
159define amdgpu_kernel void @store_inline_imm_0.5_f32(float addrspace(1)* %out) {
160; SI-LABEL: store_inline_imm_0.5_f32:
161; SI:       ; %bb.0:
162; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
163; SI-NEXT:    s_mov_b32 s3, 0xf000
164; SI-NEXT:    s_mov_b32 s2, -1
165; SI-NEXT:    v_mov_b32_e32 v0, 0.5
166; SI-NEXT:    s_waitcnt lgkmcnt(0)
167; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
168; SI-NEXT:    s_endpgm
169;
170; VI-LABEL: store_inline_imm_0.5_f32:
171; VI:       ; %bb.0:
172; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
173; VI-NEXT:    s_mov_b32 s3, 0xf000
174; VI-NEXT:    s_mov_b32 s2, -1
175; VI-NEXT:    v_mov_b32_e32 v0, 0.5
176; VI-NEXT:    s_waitcnt lgkmcnt(0)
177; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
178; VI-NEXT:    s_endpgm
179  store float 0.5, float addrspace(1)* %out
180  ret void
181}
182
183define amdgpu_kernel void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) {
184; SI-LABEL: store_inline_imm_m_0.5_f32:
185; SI:       ; %bb.0:
186; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
187; SI-NEXT:    s_mov_b32 s3, 0xf000
188; SI-NEXT:    s_mov_b32 s2, -1
189; SI-NEXT:    v_mov_b32_e32 v0, -0.5
190; SI-NEXT:    s_waitcnt lgkmcnt(0)
191; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
192; SI-NEXT:    s_endpgm
193;
194; VI-LABEL: store_inline_imm_m_0.5_f32:
195; VI:       ; %bb.0:
196; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
197; VI-NEXT:    s_mov_b32 s3, 0xf000
198; VI-NEXT:    s_mov_b32 s2, -1
199; VI-NEXT:    v_mov_b32_e32 v0, -0.5
200; VI-NEXT:    s_waitcnt lgkmcnt(0)
201; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
202; VI-NEXT:    s_endpgm
203  store float -0.5, float addrspace(1)* %out
204  ret void
205}
206
207define amdgpu_kernel void @store_inline_imm_1.0_f32(float addrspace(1)* %out) {
208; SI-LABEL: store_inline_imm_1.0_f32:
209; SI:       ; %bb.0:
210; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
211; SI-NEXT:    s_mov_b32 s3, 0xf000
212; SI-NEXT:    s_mov_b32 s2, -1
213; SI-NEXT:    v_mov_b32_e32 v0, 1.0
214; SI-NEXT:    s_waitcnt lgkmcnt(0)
215; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
216; SI-NEXT:    s_endpgm
217;
218; VI-LABEL: store_inline_imm_1.0_f32:
219; VI:       ; %bb.0:
220; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
221; VI-NEXT:    s_mov_b32 s3, 0xf000
222; VI-NEXT:    s_mov_b32 s2, -1
223; VI-NEXT:    v_mov_b32_e32 v0, 1.0
224; VI-NEXT:    s_waitcnt lgkmcnt(0)
225; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
226; VI-NEXT:    s_endpgm
227  store float 1.0, float addrspace(1)* %out
228  ret void
229}
230
231define amdgpu_kernel void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) {
232; SI-LABEL: store_inline_imm_m_1.0_f32:
233; SI:       ; %bb.0:
234; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
235; SI-NEXT:    s_mov_b32 s3, 0xf000
236; SI-NEXT:    s_mov_b32 s2, -1
237; SI-NEXT:    v_mov_b32_e32 v0, -1.0
238; SI-NEXT:    s_waitcnt lgkmcnt(0)
239; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
240; SI-NEXT:    s_endpgm
241;
242; VI-LABEL: store_inline_imm_m_1.0_f32:
243; VI:       ; %bb.0:
244; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
245; VI-NEXT:    s_mov_b32 s3, 0xf000
246; VI-NEXT:    s_mov_b32 s2, -1
247; VI-NEXT:    v_mov_b32_e32 v0, -1.0
248; VI-NEXT:    s_waitcnt lgkmcnt(0)
249; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
250; VI-NEXT:    s_endpgm
251  store float -1.0, float addrspace(1)* %out
252  ret void
253}
254
255define amdgpu_kernel void @store_inline_imm_2.0_f32(float addrspace(1)* %out) {
256; SI-LABEL: store_inline_imm_2.0_f32:
257; SI:       ; %bb.0:
258; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
259; SI-NEXT:    s_mov_b32 s3, 0xf000
260; SI-NEXT:    s_mov_b32 s2, -1
261; SI-NEXT:    v_mov_b32_e32 v0, 2.0
262; SI-NEXT:    s_waitcnt lgkmcnt(0)
263; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
264; SI-NEXT:    s_endpgm
265;
266; VI-LABEL: store_inline_imm_2.0_f32:
267; VI:       ; %bb.0:
268; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
269; VI-NEXT:    s_mov_b32 s3, 0xf000
270; VI-NEXT:    s_mov_b32 s2, -1
271; VI-NEXT:    v_mov_b32_e32 v0, 2.0
272; VI-NEXT:    s_waitcnt lgkmcnt(0)
273; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
274; VI-NEXT:    s_endpgm
275  store float 2.0, float addrspace(1)* %out
276  ret void
277}
278
279define amdgpu_kernel void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) {
280; SI-LABEL: store_inline_imm_m_2.0_f32:
281; SI:       ; %bb.0:
282; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
283; SI-NEXT:    s_mov_b32 s3, 0xf000
284; SI-NEXT:    s_mov_b32 s2, -1
285; SI-NEXT:    v_mov_b32_e32 v0, -2.0
286; SI-NEXT:    s_waitcnt lgkmcnt(0)
287; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
288; SI-NEXT:    s_endpgm
289;
290; VI-LABEL: store_inline_imm_m_2.0_f32:
291; VI:       ; %bb.0:
292; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
293; VI-NEXT:    s_mov_b32 s3, 0xf000
294; VI-NEXT:    s_mov_b32 s2, -1
295; VI-NEXT:    v_mov_b32_e32 v0, -2.0
296; VI-NEXT:    s_waitcnt lgkmcnt(0)
297; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
298; VI-NEXT:    s_endpgm
299  store float -2.0, float addrspace(1)* %out
300  ret void
301}
302
303define amdgpu_kernel void @store_inline_imm_4.0_f32(float addrspace(1)* %out) {
304; SI-LABEL: store_inline_imm_4.0_f32:
305; SI:       ; %bb.0:
306; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
307; SI-NEXT:    s_mov_b32 s3, 0xf000
308; SI-NEXT:    s_mov_b32 s2, -1
309; SI-NEXT:    v_mov_b32_e32 v0, 4.0
310; SI-NEXT:    s_waitcnt lgkmcnt(0)
311; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
312; SI-NEXT:    s_endpgm
313;
314; VI-LABEL: store_inline_imm_4.0_f32:
315; VI:       ; %bb.0:
316; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
317; VI-NEXT:    s_mov_b32 s3, 0xf000
318; VI-NEXT:    s_mov_b32 s2, -1
319; VI-NEXT:    v_mov_b32_e32 v0, 4.0
320; VI-NEXT:    s_waitcnt lgkmcnt(0)
321; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
322; VI-NEXT:    s_endpgm
323  store float 4.0, float addrspace(1)* %out
324  ret void
325}
326
327define amdgpu_kernel void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) {
328; SI-LABEL: store_inline_imm_m_4.0_f32:
329; SI:       ; %bb.0:
330; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
331; SI-NEXT:    s_mov_b32 s3, 0xf000
332; SI-NEXT:    s_mov_b32 s2, -1
333; SI-NEXT:    v_mov_b32_e32 v0, -4.0
334; SI-NEXT:    s_waitcnt lgkmcnt(0)
335; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
336; SI-NEXT:    s_endpgm
337;
338; VI-LABEL: store_inline_imm_m_4.0_f32:
339; VI:       ; %bb.0:
340; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
341; VI-NEXT:    s_mov_b32 s3, 0xf000
342; VI-NEXT:    s_mov_b32 s2, -1
343; VI-NEXT:    v_mov_b32_e32 v0, -4.0
344; VI-NEXT:    s_waitcnt lgkmcnt(0)
345; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
346; VI-NEXT:    s_endpgm
347  store float -4.0, float addrspace(1)* %out
348  ret void
349}
350
351define amdgpu_kernel void @store_inline_imm_inv_2pi_f32(float addrspace(1)* %out) {
352; SI-LABEL: store_inline_imm_inv_2pi_f32:
353; SI:       ; %bb.0:
354; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
355; SI-NEXT:    s_mov_b32 s3, 0xf000
356; SI-NEXT:    s_mov_b32 s2, -1
357; SI-NEXT:    v_mov_b32_e32 v0, 0x3e22f983
358; SI-NEXT:    s_waitcnt lgkmcnt(0)
359; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
360; SI-NEXT:    s_endpgm
361;
362; VI-LABEL: store_inline_imm_inv_2pi_f32:
363; VI:       ; %bb.0:
364; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
365; VI-NEXT:    s_mov_b32 s3, 0xf000
366; VI-NEXT:    s_mov_b32 s2, -1
367; VI-NEXT:    v_mov_b32_e32 v0, 0.15915494
368; VI-NEXT:    s_waitcnt lgkmcnt(0)
369; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
370; VI-NEXT:    s_endpgm
371  store float 0x3FC45F3060000000, float addrspace(1)* %out
372  ret void
373}
374
375define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f32(float addrspace(1)* %out) {
376; SI-LABEL: store_inline_imm_m_inv_2pi_f32:
377; SI:       ; %bb.0:
378; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
379; SI-NEXT:    s_mov_b32 s3, 0xf000
380; SI-NEXT:    s_mov_b32 s2, -1
381; SI-NEXT:    v_mov_b32_e32 v0, 0xbe22f983
382; SI-NEXT:    s_waitcnt lgkmcnt(0)
383; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
384; SI-NEXT:    s_endpgm
385;
386; VI-LABEL: store_inline_imm_m_inv_2pi_f32:
387; VI:       ; %bb.0:
388; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
389; VI-NEXT:    s_mov_b32 s3, 0xf000
390; VI-NEXT:    s_mov_b32 s2, -1
391; VI-NEXT:    v_mov_b32_e32 v0, 0xbe22f983
392; VI-NEXT:    s_waitcnt lgkmcnt(0)
393; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
394; VI-NEXT:    s_endpgm
395  store float 0xBFC45F3060000000, float addrspace(1)* %out
396  ret void
397}
398
399define amdgpu_kernel void @store_literal_imm_f32(float addrspace(1)* %out) {
400; SI-LABEL: store_literal_imm_f32:
401; SI:       ; %bb.0:
402; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
403; SI-NEXT:    s_mov_b32 s3, 0xf000
404; SI-NEXT:    s_mov_b32 s2, -1
405; SI-NEXT:    v_mov_b32_e32 v0, 0x45800000
406; SI-NEXT:    s_waitcnt lgkmcnt(0)
407; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
408; SI-NEXT:    s_endpgm
409;
410; VI-LABEL: store_literal_imm_f32:
411; VI:       ; %bb.0:
412; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
413; VI-NEXT:    s_mov_b32 s3, 0xf000
414; VI-NEXT:    s_mov_b32 s2, -1
415; VI-NEXT:    v_mov_b32_e32 v0, 0x45800000
416; VI-NEXT:    s_waitcnt lgkmcnt(0)
417; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
418; VI-NEXT:    s_endpgm
419  store float 4096.0, float addrspace(1)* %out
420  ret void
421}
422
423define amdgpu_kernel void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
424; SI-LABEL: add_inline_imm_0.0_f32:
425; SI:       ; %bb.0:
426; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
427; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
428; SI-NEXT:    s_mov_b32 s7, 0xf000
429; SI-NEXT:    s_mov_b32 s6, -1
430; SI-NEXT:    s_waitcnt lgkmcnt(0)
431; SI-NEXT:    v_add_f32_e64 v0, s0, 0
432; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
433; SI-NEXT:    s_endpgm
434;
435; VI-LABEL: add_inline_imm_0.0_f32:
436; VI:       ; %bb.0:
437; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
438; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
439; VI-NEXT:    s_mov_b32 s7, 0xf000
440; VI-NEXT:    s_mov_b32 s6, -1
441; VI-NEXT:    s_waitcnt lgkmcnt(0)
442; VI-NEXT:    v_add_f32_e64 v0, s0, 0
443; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
444; VI-NEXT:    s_endpgm
445  %y = fadd float %x, 0.0
446  store float %y, float addrspace(1)* %out
447  ret void
448}
449
450define amdgpu_kernel void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
451; SI-LABEL: add_inline_imm_0.5_f32:
452; SI:       ; %bb.0:
453; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
454; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
455; SI-NEXT:    s_mov_b32 s7, 0xf000
456; SI-NEXT:    s_mov_b32 s6, -1
457; SI-NEXT:    s_waitcnt lgkmcnt(0)
458; SI-NEXT:    v_add_f32_e64 v0, s0, 0.5
459; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
460; SI-NEXT:    s_endpgm
461;
462; VI-LABEL: add_inline_imm_0.5_f32:
463; VI:       ; %bb.0:
464; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
465; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
466; VI-NEXT:    s_mov_b32 s7, 0xf000
467; VI-NEXT:    s_mov_b32 s6, -1
468; VI-NEXT:    s_waitcnt lgkmcnt(0)
469; VI-NEXT:    v_add_f32_e64 v0, s0, 0.5
470; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
471; VI-NEXT:    s_endpgm
472  %y = fadd float %x, 0.5
473  store float %y, float addrspace(1)* %out
474  ret void
475}
476
477define amdgpu_kernel void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
478; SI-LABEL: add_inline_imm_neg_0.5_f32:
479; SI:       ; %bb.0:
480; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
481; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
482; SI-NEXT:    s_mov_b32 s7, 0xf000
483; SI-NEXT:    s_mov_b32 s6, -1
484; SI-NEXT:    s_waitcnt lgkmcnt(0)
485; SI-NEXT:    v_add_f32_e64 v0, s0, -0.5
486; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
487; SI-NEXT:    s_endpgm
488;
489; VI-LABEL: add_inline_imm_neg_0.5_f32:
490; VI:       ; %bb.0:
491; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
492; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
493; VI-NEXT:    s_mov_b32 s7, 0xf000
494; VI-NEXT:    s_mov_b32 s6, -1
495; VI-NEXT:    s_waitcnt lgkmcnt(0)
496; VI-NEXT:    v_add_f32_e64 v0, s0, -0.5
497; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
498; VI-NEXT:    s_endpgm
499  %y = fadd float %x, -0.5
500  store float %y, float addrspace(1)* %out
501  ret void
502}
503
504define amdgpu_kernel void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
505; SI-LABEL: add_inline_imm_1.0_f32:
506; SI:       ; %bb.0:
507; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
508; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
509; SI-NEXT:    s_mov_b32 s7, 0xf000
510; SI-NEXT:    s_mov_b32 s6, -1
511; SI-NEXT:    s_waitcnt lgkmcnt(0)
512; SI-NEXT:    v_add_f32_e64 v0, s0, 1.0
513; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
514; SI-NEXT:    s_endpgm
515;
516; VI-LABEL: add_inline_imm_1.0_f32:
517; VI:       ; %bb.0:
518; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
519; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
520; VI-NEXT:    s_mov_b32 s7, 0xf000
521; VI-NEXT:    s_mov_b32 s6, -1
522; VI-NEXT:    s_waitcnt lgkmcnt(0)
523; VI-NEXT:    v_add_f32_e64 v0, s0, 1.0
524; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
525; VI-NEXT:    s_endpgm
526  %y = fadd float %x, 1.0
527  store float %y, float addrspace(1)* %out
528  ret void
529}
530
531define amdgpu_kernel void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
532; SI-LABEL: add_inline_imm_neg_1.0_f32:
533; SI:       ; %bb.0:
534; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
535; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
536; SI-NEXT:    s_mov_b32 s7, 0xf000
537; SI-NEXT:    s_mov_b32 s6, -1
538; SI-NEXT:    s_waitcnt lgkmcnt(0)
539; SI-NEXT:    v_add_f32_e64 v0, s0, -1.0
540; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
541; SI-NEXT:    s_endpgm
542;
543; VI-LABEL: add_inline_imm_neg_1.0_f32:
544; VI:       ; %bb.0:
545; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
546; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
547; VI-NEXT:    s_mov_b32 s7, 0xf000
548; VI-NEXT:    s_mov_b32 s6, -1
549; VI-NEXT:    s_waitcnt lgkmcnt(0)
550; VI-NEXT:    v_add_f32_e64 v0, s0, -1.0
551; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
552; VI-NEXT:    s_endpgm
553  %y = fadd float %x, -1.0
554  store float %y, float addrspace(1)* %out
555  ret void
556}
557
558define amdgpu_kernel void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
559; SI-LABEL: add_inline_imm_2.0_f32:
560; SI:       ; %bb.0:
561; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
562; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
563; SI-NEXT:    s_mov_b32 s7, 0xf000
564; SI-NEXT:    s_mov_b32 s6, -1
565; SI-NEXT:    s_waitcnt lgkmcnt(0)
566; SI-NEXT:    v_add_f32_e64 v0, s0, 2.0
567; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
568; SI-NEXT:    s_endpgm
569;
570; VI-LABEL: add_inline_imm_2.0_f32:
571; VI:       ; %bb.0:
572; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
573; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
574; VI-NEXT:    s_mov_b32 s7, 0xf000
575; VI-NEXT:    s_mov_b32 s6, -1
576; VI-NEXT:    s_waitcnt lgkmcnt(0)
577; VI-NEXT:    v_add_f32_e64 v0, s0, 2.0
578; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
579; VI-NEXT:    s_endpgm
580  %y = fadd float %x, 2.0
581  store float %y, float addrspace(1)* %out
582  ret void
583}
584
585define amdgpu_kernel void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
586; SI-LABEL: add_inline_imm_neg_2.0_f32:
587; SI:       ; %bb.0:
588; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
589; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
590; SI-NEXT:    s_mov_b32 s7, 0xf000
591; SI-NEXT:    s_mov_b32 s6, -1
592; SI-NEXT:    s_waitcnt lgkmcnt(0)
593; SI-NEXT:    v_add_f32_e64 v0, s0, -2.0
594; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
595; SI-NEXT:    s_endpgm
596;
597; VI-LABEL: add_inline_imm_neg_2.0_f32:
598; VI:       ; %bb.0:
599; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
600; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
601; VI-NEXT:    s_mov_b32 s7, 0xf000
602; VI-NEXT:    s_mov_b32 s6, -1
603; VI-NEXT:    s_waitcnt lgkmcnt(0)
604; VI-NEXT:    v_add_f32_e64 v0, s0, -2.0
605; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
606; VI-NEXT:    s_endpgm
607  %y = fadd float %x, -2.0
608  store float %y, float addrspace(1)* %out
609  ret void
610}
611
612define amdgpu_kernel void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
613; SI-LABEL: add_inline_imm_4.0_f32:
614; SI:       ; %bb.0:
615; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
616; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
617; SI-NEXT:    s_mov_b32 s7, 0xf000
618; SI-NEXT:    s_mov_b32 s6, -1
619; SI-NEXT:    s_waitcnt lgkmcnt(0)
620; SI-NEXT:    v_add_f32_e64 v0, s0, 4.0
621; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
622; SI-NEXT:    s_endpgm
623;
624; VI-LABEL: add_inline_imm_4.0_f32:
625; VI:       ; %bb.0:
626; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
627; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
628; VI-NEXT:    s_mov_b32 s7, 0xf000
629; VI-NEXT:    s_mov_b32 s6, -1
630; VI-NEXT:    s_waitcnt lgkmcnt(0)
631; VI-NEXT:    v_add_f32_e64 v0, s0, 4.0
632; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
633; VI-NEXT:    s_endpgm
634  %y = fadd float %x, 4.0
635  store float %y, float addrspace(1)* %out
636  ret void
637}
638
639define amdgpu_kernel void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
640; SI-LABEL: add_inline_imm_neg_4.0_f32:
641; SI:       ; %bb.0:
642; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
643; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
644; SI-NEXT:    s_mov_b32 s7, 0xf000
645; SI-NEXT:    s_mov_b32 s6, -1
646; SI-NEXT:    s_waitcnt lgkmcnt(0)
647; SI-NEXT:    v_add_f32_e64 v0, s0, -4.0
648; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
649; SI-NEXT:    s_endpgm
650;
651; VI-LABEL: add_inline_imm_neg_4.0_f32:
652; VI:       ; %bb.0:
653; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
654; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
655; VI-NEXT:    s_mov_b32 s7, 0xf000
656; VI-NEXT:    s_mov_b32 s6, -1
657; VI-NEXT:    s_waitcnt lgkmcnt(0)
658; VI-NEXT:    v_add_f32_e64 v0, s0, -4.0
659; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
660; VI-NEXT:    s_endpgm
661  %y = fadd float %x, -4.0
662  store float %y, float addrspace(1)* %out
663  ret void
664}
665
666define amdgpu_kernel void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
667; SI-LABEL: commute_add_inline_imm_0.5_f32:
668; SI:       ; %bb.0:
669; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
670; SI-NEXT:    s_mov_b32 s3, 0xf000
671; SI-NEXT:    s_mov_b32 s2, -1
672; SI-NEXT:    s_waitcnt lgkmcnt(0)
673; SI-NEXT:    s_mov_b32 s0, s4
674; SI-NEXT:    s_mov_b32 s1, s5
675; SI-NEXT:    s_mov_b32 s4, s6
676; SI-NEXT:    s_mov_b32 s5, s7
677; SI-NEXT:    s_mov_b32 s6, s2
678; SI-NEXT:    s_mov_b32 s7, s3
679; SI-NEXT:    buffer_load_dword v0, off, s[4:7], 0
680; SI-NEXT:    s_waitcnt vmcnt(0)
681; SI-NEXT:    v_add_f32_e32 v0, 0.5, v0
682; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
683; SI-NEXT:    s_endpgm
684;
685; VI-LABEL: commute_add_inline_imm_0.5_f32:
686; VI:       ; %bb.0:
687; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
688; VI-NEXT:    s_mov_b32 s3, 0xf000
689; VI-NEXT:    s_mov_b32 s2, -1
690; VI-NEXT:    s_waitcnt lgkmcnt(0)
691; VI-NEXT:    s_mov_b32 s0, s4
692; VI-NEXT:    s_mov_b32 s1, s5
693; VI-NEXT:    s_mov_b32 s4, s6
694; VI-NEXT:    s_mov_b32 s5, s7
695; VI-NEXT:    s_mov_b32 s6, s2
696; VI-NEXT:    s_mov_b32 s7, s3
697; VI-NEXT:    buffer_load_dword v0, off, s[4:7], 0
698; VI-NEXT:    s_waitcnt vmcnt(0)
699; VI-NEXT:    v_add_f32_e32 v0, 0.5, v0
700; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
701; VI-NEXT:    s_endpgm
702  %x = load float, float addrspace(1)* %in
703  %y = fadd float %x, 0.5
704  store float %y, float addrspace(1)* %out
705  ret void
706}
707
708define amdgpu_kernel void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
709; SI-LABEL: commute_add_literal_f32:
710; SI:       ; %bb.0:
711; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
712; SI-NEXT:    s_mov_b32 s3, 0xf000
713; SI-NEXT:    s_mov_b32 s2, -1
714; SI-NEXT:    s_waitcnt lgkmcnt(0)
715; SI-NEXT:    s_mov_b32 s0, s4
716; SI-NEXT:    s_mov_b32 s1, s5
717; SI-NEXT:    s_mov_b32 s4, s6
718; SI-NEXT:    s_mov_b32 s5, s7
719; SI-NEXT:    s_mov_b32 s6, s2
720; SI-NEXT:    s_mov_b32 s7, s3
721; SI-NEXT:    buffer_load_dword v0, off, s[4:7], 0
722; SI-NEXT:    s_waitcnt vmcnt(0)
723; SI-NEXT:    v_add_f32_e32 v0, 0x44800000, v0
724; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
725; SI-NEXT:    s_endpgm
726;
727; VI-LABEL: commute_add_literal_f32:
728; VI:       ; %bb.0:
729; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
730; VI-NEXT:    s_mov_b32 s3, 0xf000
731; VI-NEXT:    s_mov_b32 s2, -1
732; VI-NEXT:    s_waitcnt lgkmcnt(0)
733; VI-NEXT:    s_mov_b32 s0, s4
734; VI-NEXT:    s_mov_b32 s1, s5
735; VI-NEXT:    s_mov_b32 s4, s6
736; VI-NEXT:    s_mov_b32 s5, s7
737; VI-NEXT:    s_mov_b32 s6, s2
738; VI-NEXT:    s_mov_b32 s7, s3
739; VI-NEXT:    buffer_load_dword v0, off, s[4:7], 0
740; VI-NEXT:    s_waitcnt vmcnt(0)
741; VI-NEXT:    v_add_f32_e32 v0, 0x44800000, v0
742; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
743; VI-NEXT:    s_endpgm
744  %x = load float, float addrspace(1)* %in
745  %y = fadd float %x, 1024.0
746  store float %y, float addrspace(1)* %out
747  ret void
748}
749
750define amdgpu_kernel void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) {
751; SI-LABEL: add_inline_imm_1_f32:
752; SI:       ; %bb.0:
753; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
754; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
755; SI-NEXT:    s_mov_b32 s7, 0xf000
756; SI-NEXT:    s_mov_b32 s6, -1
757; SI-NEXT:    s_waitcnt lgkmcnt(0)
758; SI-NEXT:    v_add_f32_e64 v0, s0, 1
759; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
760; SI-NEXT:    s_endpgm
761;
762; VI-LABEL: add_inline_imm_1_f32:
763; VI:       ; %bb.0:
764; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
765; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
766; VI-NEXT:    s_mov_b32 s7, 0xf000
767; VI-NEXT:    s_mov_b32 s6, -1
768; VI-NEXT:    s_waitcnt lgkmcnt(0)
769; VI-NEXT:    v_add_f32_e64 v0, s0, 1
770; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
771; VI-NEXT:    s_endpgm
772  %y = fadd float %x, 0x36a0000000000000
773  store float %y, float addrspace(1)* %out
774  ret void
775}
776
777define amdgpu_kernel void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) {
778; SI-LABEL: add_inline_imm_2_f32:
779; SI:       ; %bb.0:
780; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
781; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
782; SI-NEXT:    s_mov_b32 s7, 0xf000
783; SI-NEXT:    s_mov_b32 s6, -1
784; SI-NEXT:    s_waitcnt lgkmcnt(0)
785; SI-NEXT:    v_add_f32_e64 v0, s0, 2
786; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
787; SI-NEXT:    s_endpgm
788;
789; VI-LABEL: add_inline_imm_2_f32:
790; VI:       ; %bb.0:
791; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
792; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
793; VI-NEXT:    s_mov_b32 s7, 0xf000
794; VI-NEXT:    s_mov_b32 s6, -1
795; VI-NEXT:    s_waitcnt lgkmcnt(0)
796; VI-NEXT:    v_add_f32_e64 v0, s0, 2
797; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
798; VI-NEXT:    s_endpgm
799  %y = fadd float %x, 0x36b0000000000000
800  store float %y, float addrspace(1)* %out
801  ret void
802}
803
804define amdgpu_kernel void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) {
805; SI-LABEL: add_inline_imm_16_f32:
806; SI:       ; %bb.0:
807; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
808; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
809; SI-NEXT:    s_mov_b32 s7, 0xf000
810; SI-NEXT:    s_mov_b32 s6, -1
811; SI-NEXT:    s_waitcnt lgkmcnt(0)
812; SI-NEXT:    v_add_f32_e64 v0, s0, 16
813; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
814; SI-NEXT:    s_endpgm
815;
816; VI-LABEL: add_inline_imm_16_f32:
817; VI:       ; %bb.0:
818; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
819; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
820; VI-NEXT:    s_mov_b32 s7, 0xf000
821; VI-NEXT:    s_mov_b32 s6, -1
822; VI-NEXT:    s_waitcnt lgkmcnt(0)
823; VI-NEXT:    v_add_f32_e64 v0, s0, 16
824; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
825; VI-NEXT:    s_endpgm
826  %y = fadd float %x, 0x36e0000000000000
827  store float %y, float addrspace(1)* %out
828  ret void
829}
830
831define amdgpu_kernel void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) {
832; SI-LABEL: add_inline_imm_neg_1_f32:
833; SI:       ; %bb.0:
834; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
835; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
836; SI-NEXT:    s_mov_b32 s7, 0xf000
837; SI-NEXT:    s_mov_b32 s6, -1
838; SI-NEXT:    s_waitcnt lgkmcnt(0)
839; SI-NEXT:    s_add_i32 s0, s0, -1
840; SI-NEXT:    v_mov_b32_e32 v0, s0
841; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
842; SI-NEXT:    s_endpgm
843;
844; VI-LABEL: add_inline_imm_neg_1_f32:
845; VI:       ; %bb.0:
846; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
847; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
848; VI-NEXT:    s_mov_b32 s7, 0xf000
849; VI-NEXT:    s_mov_b32 s6, -1
850; VI-NEXT:    s_waitcnt lgkmcnt(0)
851; VI-NEXT:    s_add_i32 s0, s0, -1
852; VI-NEXT:    v_mov_b32_e32 v0, s0
853; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
854; VI-NEXT:    s_endpgm
855  %xbc = bitcast float %x to i32
856  %y = add i32 %xbc, -1
857  %ybc = bitcast i32 %y to float
858  store float %ybc, float addrspace(1)* %out
859  ret void
860}
861
862define amdgpu_kernel void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) {
863; SI-LABEL: add_inline_imm_neg_2_f32:
864; SI:       ; %bb.0:
865; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
866; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
867; SI-NEXT:    s_mov_b32 s7, 0xf000
868; SI-NEXT:    s_mov_b32 s6, -1
869; SI-NEXT:    s_waitcnt lgkmcnt(0)
870; SI-NEXT:    s_add_i32 s0, s0, -2
871; SI-NEXT:    v_mov_b32_e32 v0, s0
872; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
873; SI-NEXT:    s_endpgm
874;
875; VI-LABEL: add_inline_imm_neg_2_f32:
876; VI:       ; %bb.0:
877; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
878; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
879; VI-NEXT:    s_mov_b32 s7, 0xf000
880; VI-NEXT:    s_mov_b32 s6, -1
881; VI-NEXT:    s_waitcnt lgkmcnt(0)
882; VI-NEXT:    s_add_i32 s0, s0, -2
883; VI-NEXT:    v_mov_b32_e32 v0, s0
884; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
885; VI-NEXT:    s_endpgm
886  %xbc = bitcast float %x to i32
887  %y = add i32 %xbc, -2
888  %ybc = bitcast i32 %y to float
889  store float %ybc, float addrspace(1)* %out
890  ret void
891}
892
893define amdgpu_kernel void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) {
894; SI-LABEL: add_inline_imm_neg_16_f32:
895; SI:       ; %bb.0:
896; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
897; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
898; SI-NEXT:    s_mov_b32 s7, 0xf000
899; SI-NEXT:    s_mov_b32 s6, -1
900; SI-NEXT:    s_waitcnt lgkmcnt(0)
901; SI-NEXT:    s_add_i32 s0, s0, -16
902; SI-NEXT:    v_mov_b32_e32 v0, s0
903; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
904; SI-NEXT:    s_endpgm
905;
906; VI-LABEL: add_inline_imm_neg_16_f32:
907; VI:       ; %bb.0:
908; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
909; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
910; VI-NEXT:    s_mov_b32 s7, 0xf000
911; VI-NEXT:    s_mov_b32 s6, -1
912; VI-NEXT:    s_waitcnt lgkmcnt(0)
913; VI-NEXT:    s_add_i32 s0, s0, -16
914; VI-NEXT:    v_mov_b32_e32 v0, s0
915; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
916; VI-NEXT:    s_endpgm
917  %xbc = bitcast float %x to i32
918  %y = add i32 %xbc, -16
919  %ybc = bitcast i32 %y to float
920  store float %ybc, float addrspace(1)* %out
921  ret void
922}
923
924define amdgpu_kernel void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) {
925; SI-LABEL: add_inline_imm_63_f32:
926; SI:       ; %bb.0:
927; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
928; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
929; SI-NEXT:    s_mov_b32 s7, 0xf000
930; SI-NEXT:    s_mov_b32 s6, -1
931; SI-NEXT:    s_waitcnt lgkmcnt(0)
932; SI-NEXT:    v_add_f32_e64 v0, s0, 63
933; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
934; SI-NEXT:    s_endpgm
935;
936; VI-LABEL: add_inline_imm_63_f32:
937; VI:       ; %bb.0:
938; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
939; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
940; VI-NEXT:    s_mov_b32 s7, 0xf000
941; VI-NEXT:    s_mov_b32 s6, -1
942; VI-NEXT:    s_waitcnt lgkmcnt(0)
943; VI-NEXT:    v_add_f32_e64 v0, s0, 63
944; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
945; VI-NEXT:    s_endpgm
946  %y = fadd float %x, 0x36ff800000000000
947  store float %y, float addrspace(1)* %out
948  ret void
949}
950
951define amdgpu_kernel void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) {
952; SI-LABEL: add_inline_imm_64_f32:
953; SI:       ; %bb.0:
954; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
955; SI-NEXT:    s_load_dword s0, s[0:1], 0xb
956; SI-NEXT:    s_mov_b32 s7, 0xf000
957; SI-NEXT:    s_mov_b32 s6, -1
958; SI-NEXT:    s_waitcnt lgkmcnt(0)
959; SI-NEXT:    v_add_f32_e64 v0, s0, 64
960; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
961; SI-NEXT:    s_endpgm
962;
963; VI-LABEL: add_inline_imm_64_f32:
964; VI:       ; %bb.0:
965; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
966; VI-NEXT:    s_load_dword s0, s[0:1], 0x2c
967; VI-NEXT:    s_mov_b32 s7, 0xf000
968; VI-NEXT:    s_mov_b32 s6, -1
969; VI-NEXT:    s_waitcnt lgkmcnt(0)
970; VI-NEXT:    v_add_f32_e64 v0, s0, 64
971; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
972; VI-NEXT:    s_endpgm
973  %y = fadd float %x, 0x3700000000000000
974  store float %y, float addrspace(1)* %out
975  ret void
976}
977
978define amdgpu_kernel void @add_inline_imm_0.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
979; SI-LABEL: add_inline_imm_0.0_f64:
980; SI:       ; %bb.0:
981; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
982; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
983; SI-NEXT:    s_waitcnt lgkmcnt(0)
984; SI-NEXT:    v_add_f64 v[0:1], s[2:3], 0
985; SI-NEXT:    s_mov_b32 s3, 0xf000
986; SI-NEXT:    s_mov_b32 s2, -1
987; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
988; SI-NEXT:    s_endpgm
989;
990; VI-LABEL: add_inline_imm_0.0_f64:
991; VI:       ; %bb.0:
992; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
993; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
994; VI-NEXT:    s_waitcnt lgkmcnt(0)
995; VI-NEXT:    v_add_f64 v[0:1], s[2:3], 0
996; VI-NEXT:    s_mov_b32 s3, 0xf000
997; VI-NEXT:    s_mov_b32 s2, -1
998; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
999; VI-NEXT:    s_endpgm
1000  %y = fadd double %x, 0.0
1001  store double %y, double addrspace(1)* %out
1002  ret void
1003}
1004
1005define amdgpu_kernel void @add_inline_imm_0.5_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1006; SI-LABEL: add_inline_imm_0.5_f64:
1007; SI:       ; %bb.0:
1008; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1009; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1010; SI-NEXT:    s_waitcnt lgkmcnt(0)
1011; SI-NEXT:    v_add_f64 v[0:1], s[2:3], 0.5
1012; SI-NEXT:    s_mov_b32 s3, 0xf000
1013; SI-NEXT:    s_mov_b32 s2, -1
1014; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1015; SI-NEXT:    s_endpgm
1016;
1017; VI-LABEL: add_inline_imm_0.5_f64:
1018; VI:       ; %bb.0:
1019; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1020; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1021; VI-NEXT:    s_waitcnt lgkmcnt(0)
1022; VI-NEXT:    v_add_f64 v[0:1], s[2:3], 0.5
1023; VI-NEXT:    s_mov_b32 s3, 0xf000
1024; VI-NEXT:    s_mov_b32 s2, -1
1025; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1026; VI-NEXT:    s_endpgm
1027  %y = fadd double %x, 0.5
1028  store double %y, double addrspace(1)* %out
1029  ret void
1030}
1031
1032define amdgpu_kernel void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1033; SI-LABEL: add_inline_imm_neg_0.5_f64:
1034; SI:       ; %bb.0:
1035; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1036; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1037; SI-NEXT:    s_waitcnt lgkmcnt(0)
1038; SI-NEXT:    v_add_f64 v[0:1], s[2:3], -0.5
1039; SI-NEXT:    s_mov_b32 s3, 0xf000
1040; SI-NEXT:    s_mov_b32 s2, -1
1041; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1042; SI-NEXT:    s_endpgm
1043;
1044; VI-LABEL: add_inline_imm_neg_0.5_f64:
1045; VI:       ; %bb.0:
1046; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1047; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1048; VI-NEXT:    s_waitcnt lgkmcnt(0)
1049; VI-NEXT:    v_add_f64 v[0:1], s[2:3], -0.5
1050; VI-NEXT:    s_mov_b32 s3, 0xf000
1051; VI-NEXT:    s_mov_b32 s2, -1
1052; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1053; VI-NEXT:    s_endpgm
1054  %y = fadd double %x, -0.5
1055  store double %y, double addrspace(1)* %out
1056  ret void
1057}
1058
1059define amdgpu_kernel void @add_inline_imm_1.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1060; SI-LABEL: add_inline_imm_1.0_f64:
1061; SI:       ; %bb.0:
1062; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1063; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1064; SI-NEXT:    s_waitcnt lgkmcnt(0)
1065; SI-NEXT:    v_add_f64 v[0:1], s[2:3], 1.0
1066; SI-NEXT:    s_mov_b32 s3, 0xf000
1067; SI-NEXT:    s_mov_b32 s2, -1
1068; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1069; SI-NEXT:    s_endpgm
1070;
1071; VI-LABEL: add_inline_imm_1.0_f64:
1072; VI:       ; %bb.0:
1073; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1074; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1075; VI-NEXT:    s_waitcnt lgkmcnt(0)
1076; VI-NEXT:    v_add_f64 v[0:1], s[2:3], 1.0
1077; VI-NEXT:    s_mov_b32 s3, 0xf000
1078; VI-NEXT:    s_mov_b32 s2, -1
1079; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1080; VI-NEXT:    s_endpgm
1081  %y = fadd double %x, 1.0
1082  store double %y, double addrspace(1)* %out
1083  ret void
1084}
1085
1086define amdgpu_kernel void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1087; SI-LABEL: add_inline_imm_neg_1.0_f64:
1088; SI:       ; %bb.0:
1089; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1090; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1091; SI-NEXT:    s_waitcnt lgkmcnt(0)
1092; SI-NEXT:    v_add_f64 v[0:1], s[2:3], -1.0
1093; SI-NEXT:    s_mov_b32 s3, 0xf000
1094; SI-NEXT:    s_mov_b32 s2, -1
1095; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1096; SI-NEXT:    s_endpgm
1097;
1098; VI-LABEL: add_inline_imm_neg_1.0_f64:
1099; VI:       ; %bb.0:
1100; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1101; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1102; VI-NEXT:    s_waitcnt lgkmcnt(0)
1103; VI-NEXT:    v_add_f64 v[0:1], s[2:3], -1.0
1104; VI-NEXT:    s_mov_b32 s3, 0xf000
1105; VI-NEXT:    s_mov_b32 s2, -1
1106; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1107; VI-NEXT:    s_endpgm
1108  %y = fadd double %x, -1.0
1109  store double %y, double addrspace(1)* %out
1110  ret void
1111}
1112
1113define amdgpu_kernel void @add_inline_imm_2.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1114; SI-LABEL: add_inline_imm_2.0_f64:
1115; SI:       ; %bb.0:
1116; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1117; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1118; SI-NEXT:    s_waitcnt lgkmcnt(0)
1119; SI-NEXT:    v_add_f64 v[0:1], s[2:3], 2.0
1120; SI-NEXT:    s_mov_b32 s3, 0xf000
1121; SI-NEXT:    s_mov_b32 s2, -1
1122; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1123; SI-NEXT:    s_endpgm
1124;
1125; VI-LABEL: add_inline_imm_2.0_f64:
1126; VI:       ; %bb.0:
1127; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1128; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1129; VI-NEXT:    s_waitcnt lgkmcnt(0)
1130; VI-NEXT:    v_add_f64 v[0:1], s[2:3], 2.0
1131; VI-NEXT:    s_mov_b32 s3, 0xf000
1132; VI-NEXT:    s_mov_b32 s2, -1
1133; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1134; VI-NEXT:    s_endpgm
1135  %y = fadd double %x, 2.0
1136  store double %y, double addrspace(1)* %out
1137  ret void
1138}
1139
1140define amdgpu_kernel void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1141; SI-LABEL: add_inline_imm_neg_2.0_f64:
1142; SI:       ; %bb.0:
1143; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1144; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1145; SI-NEXT:    s_waitcnt lgkmcnt(0)
1146; SI-NEXT:    v_add_f64 v[0:1], s[2:3], -2.0
1147; SI-NEXT:    s_mov_b32 s3, 0xf000
1148; SI-NEXT:    s_mov_b32 s2, -1
1149; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1150; SI-NEXT:    s_endpgm
1151;
1152; VI-LABEL: add_inline_imm_neg_2.0_f64:
1153; VI:       ; %bb.0:
1154; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1155; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1156; VI-NEXT:    s_waitcnt lgkmcnt(0)
1157; VI-NEXT:    v_add_f64 v[0:1], s[2:3], -2.0
1158; VI-NEXT:    s_mov_b32 s3, 0xf000
1159; VI-NEXT:    s_mov_b32 s2, -1
1160; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1161; VI-NEXT:    s_endpgm
1162  %y = fadd double %x, -2.0
1163  store double %y, double addrspace(1)* %out
1164  ret void
1165}
1166
1167define amdgpu_kernel void @add_inline_imm_4.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1168; SI-LABEL: add_inline_imm_4.0_f64:
1169; SI:       ; %bb.0:
1170; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1171; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1172; SI-NEXT:    s_waitcnt lgkmcnt(0)
1173; SI-NEXT:    v_add_f64 v[0:1], s[2:3], 4.0
1174; SI-NEXT:    s_mov_b32 s3, 0xf000
1175; SI-NEXT:    s_mov_b32 s2, -1
1176; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1177; SI-NEXT:    s_endpgm
1178;
1179; VI-LABEL: add_inline_imm_4.0_f64:
1180; VI:       ; %bb.0:
1181; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1182; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1183; VI-NEXT:    s_waitcnt lgkmcnt(0)
1184; VI-NEXT:    v_add_f64 v[0:1], s[2:3], 4.0
1185; VI-NEXT:    s_mov_b32 s3, 0xf000
1186; VI-NEXT:    s_mov_b32 s2, -1
1187; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1188; VI-NEXT:    s_endpgm
1189  %y = fadd double %x, 4.0
1190  store double %y, double addrspace(1)* %out
1191  ret void
1192}
1193
1194define amdgpu_kernel void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1195; SI-LABEL: add_inline_imm_neg_4.0_f64:
1196; SI:       ; %bb.0:
1197; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1198; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1199; SI-NEXT:    s_waitcnt lgkmcnt(0)
1200; SI-NEXT:    v_add_f64 v[0:1], s[2:3], -4.0
1201; SI-NEXT:    s_mov_b32 s3, 0xf000
1202; SI-NEXT:    s_mov_b32 s2, -1
1203; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1204; SI-NEXT:    s_endpgm
1205;
1206; VI-LABEL: add_inline_imm_neg_4.0_f64:
1207; VI:       ; %bb.0:
1208; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1209; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1210; VI-NEXT:    s_waitcnt lgkmcnt(0)
1211; VI-NEXT:    v_add_f64 v[0:1], s[2:3], -4.0
1212; VI-NEXT:    s_mov_b32 s3, 0xf000
1213; VI-NEXT:    s_mov_b32 s2, -1
1214; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1215; VI-NEXT:    s_endpgm
1216  %y = fadd double %x, -4.0
1217  store double %y, double addrspace(1)* %out
1218  ret void
1219}
1220
1221define amdgpu_kernel void @add_inline_imm_inv_2pi_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1222; SI-LABEL: add_inline_imm_inv_2pi_f64:
1223; SI:       ; %bb.0:
1224; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1225; SI-NEXT:    v_mov_b32_e32 v0, 0x6dc9c882
1226; SI-NEXT:    v_mov_b32_e32 v1, 0x3fc45f30
1227; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1228; SI-NEXT:    s_waitcnt lgkmcnt(0)
1229; SI-NEXT:    v_add_f64 v[0:1], s[2:3], v[0:1]
1230; SI-NEXT:    s_mov_b32 s3, 0xf000
1231; SI-NEXT:    s_mov_b32 s2, -1
1232; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1233; SI-NEXT:    s_endpgm
1234;
1235; VI-LABEL: add_inline_imm_inv_2pi_f64:
1236; VI:       ; %bb.0:
1237; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1238; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1239; VI-NEXT:    s_waitcnt lgkmcnt(0)
1240; VI-NEXT:    v_add_f64 v[0:1], s[2:3], 0.15915494309189532
1241; VI-NEXT:    s_mov_b32 s3, 0xf000
1242; VI-NEXT:    s_mov_b32 s2, -1
1243; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1244; VI-NEXT:    s_endpgm
1245  %y = fadd double %x, 0x3fc45f306dc9c882
1246  store double %y, double addrspace(1)* %out
1247  ret void
1248}
1249
1250define amdgpu_kernel void @add_m_inv_2pi_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1251; SI-LABEL: add_m_inv_2pi_f64:
1252; SI:       ; %bb.0:
1253; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1254; SI-NEXT:    v_mov_b32_e32 v0, 0x6dc9c882
1255; SI-NEXT:    v_mov_b32_e32 v1, 0xbfc45f30
1256; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1257; SI-NEXT:    s_waitcnt lgkmcnt(0)
1258; SI-NEXT:    v_add_f64 v[0:1], s[2:3], v[0:1]
1259; SI-NEXT:    s_mov_b32 s3, 0xf000
1260; SI-NEXT:    s_mov_b32 s2, -1
1261; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1262; SI-NEXT:    s_endpgm
1263;
1264; VI-LABEL: add_m_inv_2pi_f64:
1265; VI:       ; %bb.0:
1266; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1267; VI-NEXT:    v_mov_b32_e32 v0, 0x6dc9c882
1268; VI-NEXT:    v_mov_b32_e32 v1, 0xbfc45f30
1269; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1270; VI-NEXT:    s_waitcnt lgkmcnt(0)
1271; VI-NEXT:    v_add_f64 v[0:1], s[2:3], v[0:1]
1272; VI-NEXT:    s_mov_b32 s3, 0xf000
1273; VI-NEXT:    s_mov_b32 s2, -1
1274; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1275; VI-NEXT:    s_endpgm
1276  %y = fadd double %x, 0xbfc45f306dc9c882
1277  store double %y, double addrspace(1)* %out
1278  ret void
1279}
1280
1281define amdgpu_kernel void @add_inline_imm_1_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1282; SI-LABEL: add_inline_imm_1_f64:
1283; SI:       ; %bb.0:
1284; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1285; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1286; SI-NEXT:    s_waitcnt lgkmcnt(0)
1287; SI-NEXT:    v_add_f64 v[0:1], s[2:3], 1
1288; SI-NEXT:    s_mov_b32 s3, 0xf000
1289; SI-NEXT:    s_mov_b32 s2, -1
1290; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1291; SI-NEXT:    s_endpgm
1292;
1293; VI-LABEL: add_inline_imm_1_f64:
1294; VI:       ; %bb.0:
1295; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1296; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1297; VI-NEXT:    s_waitcnt lgkmcnt(0)
1298; VI-NEXT:    v_add_f64 v[0:1], s[2:3], 1
1299; VI-NEXT:    s_mov_b32 s3, 0xf000
1300; VI-NEXT:    s_mov_b32 s2, -1
1301; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1302; VI-NEXT:    s_endpgm
1303  %y = fadd double %x, 0x0000000000000001
1304  store double %y, double addrspace(1)* %out
1305  ret void
1306}
1307
1308define amdgpu_kernel void @add_inline_imm_2_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1309; SI-LABEL: add_inline_imm_2_f64:
1310; SI:       ; %bb.0:
1311; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1312; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1313; SI-NEXT:    s_waitcnt lgkmcnt(0)
1314; SI-NEXT:    v_add_f64 v[0:1], s[2:3], 2
1315; SI-NEXT:    s_mov_b32 s3, 0xf000
1316; SI-NEXT:    s_mov_b32 s2, -1
1317; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1318; SI-NEXT:    s_endpgm
1319;
1320; VI-LABEL: add_inline_imm_2_f64:
1321; VI:       ; %bb.0:
1322; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1323; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1324; VI-NEXT:    s_waitcnt lgkmcnt(0)
1325; VI-NEXT:    v_add_f64 v[0:1], s[2:3], 2
1326; VI-NEXT:    s_mov_b32 s3, 0xf000
1327; VI-NEXT:    s_mov_b32 s2, -1
1328; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1329; VI-NEXT:    s_endpgm
1330  %y = fadd double %x, 0x0000000000000002
1331  store double %y, double addrspace(1)* %out
1332  ret void
1333}
1334
1335define amdgpu_kernel void @add_inline_imm_16_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1336; SI-LABEL: add_inline_imm_16_f64:
1337; SI:       ; %bb.0:
1338; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1339; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1340; SI-NEXT:    s_waitcnt lgkmcnt(0)
1341; SI-NEXT:    v_add_f64 v[0:1], s[2:3], 16
1342; SI-NEXT:    s_mov_b32 s3, 0xf000
1343; SI-NEXT:    s_mov_b32 s2, -1
1344; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1345; SI-NEXT:    s_endpgm
1346;
1347; VI-LABEL: add_inline_imm_16_f64:
1348; VI:       ; %bb.0:
1349; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1350; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1351; VI-NEXT:    s_waitcnt lgkmcnt(0)
1352; VI-NEXT:    v_add_f64 v[0:1], s[2:3], 16
1353; VI-NEXT:    s_mov_b32 s3, 0xf000
1354; VI-NEXT:    s_mov_b32 s2, -1
1355; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1356; VI-NEXT:    s_endpgm
1357  %y = fadd double %x, 0x0000000000000010
1358  store double %y, double addrspace(1)* %out
1359  ret void
1360}
1361
1362define amdgpu_kernel void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1363; SI-LABEL: add_inline_imm_neg_1_f64:
1364; SI:       ; %bb.0:
1365; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1366; SI-NEXT:    v_mov_b32_e32 v0, -1
1367; SI-NEXT:    s_mov_b32 s3, 0xf000
1368; SI-NEXT:    s_mov_b32 s2, -1
1369; SI-NEXT:    v_mov_b32_e32 v1, v0
1370; SI-NEXT:    s_waitcnt lgkmcnt(0)
1371; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1372; SI-NEXT:    s_endpgm
1373;
1374; VI-LABEL: add_inline_imm_neg_1_f64:
1375; VI:       ; %bb.0:
1376; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1377; VI-NEXT:    v_mov_b32_e32 v0, -1
1378; VI-NEXT:    s_mov_b32 s3, 0xf000
1379; VI-NEXT:    s_mov_b32 s2, -1
1380; VI-NEXT:    v_mov_b32_e32 v1, v0
1381; VI-NEXT:    s_waitcnt lgkmcnt(0)
1382; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1383; VI-NEXT:    s_endpgm
1384  %y = fadd double %x, 0xffffffffffffffff
1385  store double %y, double addrspace(1)* %out
1386  ret void
1387}
1388
1389define amdgpu_kernel void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1390; SI-LABEL: add_inline_imm_neg_2_f64:
1391; SI:       ; %bb.0:
1392; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1393; SI-NEXT:    s_mov_b32 s3, 0xf000
1394; SI-NEXT:    s_mov_b32 s2, -1
1395; SI-NEXT:    v_mov_b32_e32 v0, -2
1396; SI-NEXT:    v_mov_b32_e32 v1, -1
1397; SI-NEXT:    s_waitcnt lgkmcnt(0)
1398; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1399; SI-NEXT:    s_endpgm
1400;
1401; VI-LABEL: add_inline_imm_neg_2_f64:
1402; VI:       ; %bb.0:
1403; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1404; VI-NEXT:    s_mov_b32 s3, 0xf000
1405; VI-NEXT:    s_mov_b32 s2, -1
1406; VI-NEXT:    v_mov_b32_e32 v0, -2
1407; VI-NEXT:    v_mov_b32_e32 v1, -1
1408; VI-NEXT:    s_waitcnt lgkmcnt(0)
1409; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1410; VI-NEXT:    s_endpgm
1411  %y = fadd double %x, 0xfffffffffffffffe
1412  store double %y, double addrspace(1)* %out
1413  ret void
1414}
1415
1416define amdgpu_kernel void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1417; SI-LABEL: add_inline_imm_neg_16_f64:
1418; SI:       ; %bb.0:
1419; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1420; SI-NEXT:    s_mov_b32 s3, 0xf000
1421; SI-NEXT:    s_mov_b32 s2, -1
1422; SI-NEXT:    v_mov_b32_e32 v0, -16
1423; SI-NEXT:    v_mov_b32_e32 v1, -1
1424; SI-NEXT:    s_waitcnt lgkmcnt(0)
1425; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1426; SI-NEXT:    s_endpgm
1427;
1428; VI-LABEL: add_inline_imm_neg_16_f64:
1429; VI:       ; %bb.0:
1430; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1431; VI-NEXT:    s_mov_b32 s3, 0xf000
1432; VI-NEXT:    s_mov_b32 s2, -1
1433; VI-NEXT:    v_mov_b32_e32 v0, -16
1434; VI-NEXT:    v_mov_b32_e32 v1, -1
1435; VI-NEXT:    s_waitcnt lgkmcnt(0)
1436; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1437; VI-NEXT:    s_endpgm
1438  %y = fadd double %x, 0xfffffffffffffff0
1439  store double %y, double addrspace(1)* %out
1440  ret void
1441}
1442
1443define amdgpu_kernel void @add_inline_imm_63_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1444; SI-LABEL: add_inline_imm_63_f64:
1445; SI:       ; %bb.0:
1446; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1447; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1448; SI-NEXT:    s_waitcnt lgkmcnt(0)
1449; SI-NEXT:    v_add_f64 v[0:1], s[2:3], 63
1450; SI-NEXT:    s_mov_b32 s3, 0xf000
1451; SI-NEXT:    s_mov_b32 s2, -1
1452; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1453; SI-NEXT:    s_endpgm
1454;
1455; VI-LABEL: add_inline_imm_63_f64:
1456; VI:       ; %bb.0:
1457; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1458; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1459; VI-NEXT:    s_waitcnt lgkmcnt(0)
1460; VI-NEXT:    v_add_f64 v[0:1], s[2:3], 63
1461; VI-NEXT:    s_mov_b32 s3, 0xf000
1462; VI-NEXT:    s_mov_b32 s2, -1
1463; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1464; VI-NEXT:    s_endpgm
1465  %y = fadd double %x, 0x000000000000003F
1466  store double %y, double addrspace(1)* %out
1467  ret void
1468}
1469
1470define amdgpu_kernel void @add_inline_imm_64_f64(double addrspace(1)* %out, [8 x i32], double %x) {
1471; SI-LABEL: add_inline_imm_64_f64:
1472; SI:       ; %bb.0:
1473; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x13
1474; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1475; SI-NEXT:    s_waitcnt lgkmcnt(0)
1476; SI-NEXT:    v_add_f64 v[0:1], s[2:3], 64
1477; SI-NEXT:    s_mov_b32 s3, 0xf000
1478; SI-NEXT:    s_mov_b32 s2, -1
1479; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1480; SI-NEXT:    s_endpgm
1481;
1482; VI-LABEL: add_inline_imm_64_f64:
1483; VI:       ; %bb.0:
1484; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
1485; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1486; VI-NEXT:    s_waitcnt lgkmcnt(0)
1487; VI-NEXT:    v_add_f64 v[0:1], s[2:3], 64
1488; VI-NEXT:    s_mov_b32 s3, 0xf000
1489; VI-NEXT:    s_mov_b32 s2, -1
1490; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1491; VI-NEXT:    s_endpgm
1492  %y = fadd double %x, 0x0000000000000040
1493  store double %y, double addrspace(1)* %out
1494  ret void
1495}
1496
1497define amdgpu_kernel void @store_inline_imm_0.0_f64(double addrspace(1)* %out) {
1498; SI-LABEL: store_inline_imm_0.0_f64:
1499; SI:       ; %bb.0:
1500; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1501; SI-NEXT:    v_mov_b32_e32 v0, 0
1502; SI-NEXT:    s_mov_b32 s3, 0xf000
1503; SI-NEXT:    s_mov_b32 s2, -1
1504; SI-NEXT:    v_mov_b32_e32 v1, v0
1505; SI-NEXT:    s_waitcnt lgkmcnt(0)
1506; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1507; SI-NEXT:    s_endpgm
1508;
1509; VI-LABEL: store_inline_imm_0.0_f64:
1510; VI:       ; %bb.0:
1511; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1512; VI-NEXT:    v_mov_b32_e32 v0, 0
1513; VI-NEXT:    s_mov_b32 s3, 0xf000
1514; VI-NEXT:    s_mov_b32 s2, -1
1515; VI-NEXT:    v_mov_b32_e32 v1, v0
1516; VI-NEXT:    s_waitcnt lgkmcnt(0)
1517; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1518; VI-NEXT:    s_endpgm
1519  store double 0.0, double addrspace(1)* %out
1520  ret void
1521}
1522
1523define amdgpu_kernel void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) {
1524; SI-LABEL: store_literal_imm_neg_0.0_f64:
1525; SI:       ; %bb.0:
1526; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1527; SI-NEXT:    s_mov_b32 s3, 0xf000
1528; SI-NEXT:    s_mov_b32 s2, -1
1529; SI-NEXT:    v_mov_b32_e32 v0, 0
1530; SI-NEXT:    v_bfrev_b32_e32 v1, 1
1531; SI-NEXT:    s_waitcnt lgkmcnt(0)
1532; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1533; SI-NEXT:    s_endpgm
1534;
1535; VI-LABEL: store_literal_imm_neg_0.0_f64:
1536; VI:       ; %bb.0:
1537; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1538; VI-NEXT:    s_mov_b32 s3, 0xf000
1539; VI-NEXT:    s_mov_b32 s2, -1
1540; VI-NEXT:    v_mov_b32_e32 v0, 0
1541; VI-NEXT:    v_bfrev_b32_e32 v1, 1
1542; VI-NEXT:    s_waitcnt lgkmcnt(0)
1543; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1544; VI-NEXT:    s_endpgm
1545  store double -0.0, double addrspace(1)* %out
1546  ret void
1547}
1548
1549define amdgpu_kernel void @store_inline_imm_0.5_f64(double addrspace(1)* %out) {
1550; SI-LABEL: store_inline_imm_0.5_f64:
1551; SI:       ; %bb.0:
1552; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1553; SI-NEXT:    s_mov_b32 s3, 0xf000
1554; SI-NEXT:    s_mov_b32 s2, -1
1555; SI-NEXT:    v_mov_b32_e32 v0, 0
1556; SI-NEXT:    v_mov_b32_e32 v1, 0x3fe00000
1557; SI-NEXT:    s_waitcnt lgkmcnt(0)
1558; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1559; SI-NEXT:    s_endpgm
1560;
1561; VI-LABEL: store_inline_imm_0.5_f64:
1562; VI:       ; %bb.0:
1563; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1564; VI-NEXT:    s_mov_b32 s3, 0xf000
1565; VI-NEXT:    s_mov_b32 s2, -1
1566; VI-NEXT:    v_mov_b32_e32 v0, 0
1567; VI-NEXT:    v_mov_b32_e32 v1, 0x3fe00000
1568; VI-NEXT:    s_waitcnt lgkmcnt(0)
1569; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1570; VI-NEXT:    s_endpgm
1571  store double 0.5, double addrspace(1)* %out
1572  ret void
1573}
1574
1575define amdgpu_kernel void @store_inline_imm_m_0.5_f64(double addrspace(1)* %out) {
1576; SI-LABEL: store_inline_imm_m_0.5_f64:
1577; SI:       ; %bb.0:
1578; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1579; SI-NEXT:    s_mov_b32 s3, 0xf000
1580; SI-NEXT:    s_mov_b32 s2, -1
1581; SI-NEXT:    v_mov_b32_e32 v0, 0
1582; SI-NEXT:    v_mov_b32_e32 v1, 0xbfe00000
1583; SI-NEXT:    s_waitcnt lgkmcnt(0)
1584; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1585; SI-NEXT:    s_endpgm
1586;
1587; VI-LABEL: store_inline_imm_m_0.5_f64:
1588; VI:       ; %bb.0:
1589; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1590; VI-NEXT:    s_mov_b32 s3, 0xf000
1591; VI-NEXT:    s_mov_b32 s2, -1
1592; VI-NEXT:    v_mov_b32_e32 v0, 0
1593; VI-NEXT:    v_mov_b32_e32 v1, 0xbfe00000
1594; VI-NEXT:    s_waitcnt lgkmcnt(0)
1595; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1596; VI-NEXT:    s_endpgm
1597  store double -0.5, double addrspace(1)* %out
1598  ret void
1599}
1600
1601define amdgpu_kernel void @store_inline_imm_1.0_f64(double addrspace(1)* %out) {
1602; SI-LABEL: store_inline_imm_1.0_f64:
1603; SI:       ; %bb.0:
1604; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1605; SI-NEXT:    s_mov_b32 s3, 0xf000
1606; SI-NEXT:    s_mov_b32 s2, -1
1607; SI-NEXT:    v_mov_b32_e32 v0, 0
1608; SI-NEXT:    v_mov_b32_e32 v1, 0x3ff00000
1609; SI-NEXT:    s_waitcnt lgkmcnt(0)
1610; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1611; SI-NEXT:    s_endpgm
1612;
1613; VI-LABEL: store_inline_imm_1.0_f64:
1614; VI:       ; %bb.0:
1615; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1616; VI-NEXT:    s_mov_b32 s3, 0xf000
1617; VI-NEXT:    s_mov_b32 s2, -1
1618; VI-NEXT:    v_mov_b32_e32 v0, 0
1619; VI-NEXT:    v_mov_b32_e32 v1, 0x3ff00000
1620; VI-NEXT:    s_waitcnt lgkmcnt(0)
1621; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1622; VI-NEXT:    s_endpgm
1623  store double 1.0, double addrspace(1)* %out
1624  ret void
1625}
1626
1627define amdgpu_kernel void @store_inline_imm_m_1.0_f64(double addrspace(1)* %out) {
1628; SI-LABEL: store_inline_imm_m_1.0_f64:
1629; SI:       ; %bb.0:
1630; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1631; SI-NEXT:    s_mov_b32 s3, 0xf000
1632; SI-NEXT:    s_mov_b32 s2, -1
1633; SI-NEXT:    v_mov_b32_e32 v0, 0
1634; SI-NEXT:    v_mov_b32_e32 v1, 0xbff00000
1635; SI-NEXT:    s_waitcnt lgkmcnt(0)
1636; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1637; SI-NEXT:    s_endpgm
1638;
1639; VI-LABEL: store_inline_imm_m_1.0_f64:
1640; VI:       ; %bb.0:
1641; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1642; VI-NEXT:    s_mov_b32 s3, 0xf000
1643; VI-NEXT:    s_mov_b32 s2, -1
1644; VI-NEXT:    v_mov_b32_e32 v0, 0
1645; VI-NEXT:    v_mov_b32_e32 v1, 0xbff00000
1646; VI-NEXT:    s_waitcnt lgkmcnt(0)
1647; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1648; VI-NEXT:    s_endpgm
1649  store double -1.0, double addrspace(1)* %out
1650  ret void
1651}
1652
1653define amdgpu_kernel void @store_inline_imm_2.0_f64(double addrspace(1)* %out) {
1654; SI-LABEL: store_inline_imm_2.0_f64:
1655; SI:       ; %bb.0:
1656; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1657; SI-NEXT:    s_mov_b32 s3, 0xf000
1658; SI-NEXT:    s_mov_b32 s2, -1
1659; SI-NEXT:    v_mov_b32_e32 v0, 0
1660; SI-NEXT:    v_mov_b32_e32 v1, 2.0
1661; SI-NEXT:    s_waitcnt lgkmcnt(0)
1662; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1663; SI-NEXT:    s_endpgm
1664;
1665; VI-LABEL: store_inline_imm_2.0_f64:
1666; VI:       ; %bb.0:
1667; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1668; VI-NEXT:    s_mov_b32 s3, 0xf000
1669; VI-NEXT:    s_mov_b32 s2, -1
1670; VI-NEXT:    v_mov_b32_e32 v0, 0
1671; VI-NEXT:    v_mov_b32_e32 v1, 2.0
1672; VI-NEXT:    s_waitcnt lgkmcnt(0)
1673; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1674; VI-NEXT:    s_endpgm
1675  store double 2.0, double addrspace(1)* %out
1676  ret void
1677}
1678
1679define amdgpu_kernel void @store_inline_imm_m_2.0_f64(double addrspace(1)* %out) {
1680; SI-LABEL: store_inline_imm_m_2.0_f64:
1681; SI:       ; %bb.0:
1682; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1683; SI-NEXT:    s_mov_b32 s3, 0xf000
1684; SI-NEXT:    s_mov_b32 s2, -1
1685; SI-NEXT:    v_mov_b32_e32 v0, 0
1686; SI-NEXT:    v_mov_b32_e32 v1, -2.0
1687; SI-NEXT:    s_waitcnt lgkmcnt(0)
1688; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1689; SI-NEXT:    s_endpgm
1690;
1691; VI-LABEL: store_inline_imm_m_2.0_f64:
1692; VI:       ; %bb.0:
1693; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1694; VI-NEXT:    s_mov_b32 s3, 0xf000
1695; VI-NEXT:    s_mov_b32 s2, -1
1696; VI-NEXT:    v_mov_b32_e32 v0, 0
1697; VI-NEXT:    v_mov_b32_e32 v1, -2.0
1698; VI-NEXT:    s_waitcnt lgkmcnt(0)
1699; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1700; VI-NEXT:    s_endpgm
1701  store double -2.0, double addrspace(1)* %out
1702  ret void
1703}
1704
1705define amdgpu_kernel void @store_inline_imm_4.0_f64(double addrspace(1)* %out) {
1706; SI-LABEL: store_inline_imm_4.0_f64:
1707; SI:       ; %bb.0:
1708; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1709; SI-NEXT:    s_mov_b32 s3, 0xf000
1710; SI-NEXT:    s_mov_b32 s2, -1
1711; SI-NEXT:    v_mov_b32_e32 v0, 0
1712; SI-NEXT:    v_mov_b32_e32 v1, 0x40100000
1713; SI-NEXT:    s_waitcnt lgkmcnt(0)
1714; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1715; SI-NEXT:    s_endpgm
1716;
1717; VI-LABEL: store_inline_imm_4.0_f64:
1718; VI:       ; %bb.0:
1719; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1720; VI-NEXT:    s_mov_b32 s3, 0xf000
1721; VI-NEXT:    s_mov_b32 s2, -1
1722; VI-NEXT:    v_mov_b32_e32 v0, 0
1723; VI-NEXT:    v_mov_b32_e32 v1, 0x40100000
1724; VI-NEXT:    s_waitcnt lgkmcnt(0)
1725; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1726; VI-NEXT:    s_endpgm
1727  store double 4.0, double addrspace(1)* %out
1728  ret void
1729}
1730
1731define amdgpu_kernel void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) {
1732; SI-LABEL: store_inline_imm_m_4.0_f64:
1733; SI:       ; %bb.0:
1734; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1735; SI-NEXT:    s_mov_b32 s3, 0xf000
1736; SI-NEXT:    s_mov_b32 s2, -1
1737; SI-NEXT:    v_mov_b32_e32 v0, 0
1738; SI-NEXT:    v_mov_b32_e32 v1, 0xc0100000
1739; SI-NEXT:    s_waitcnt lgkmcnt(0)
1740; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1741; SI-NEXT:    s_endpgm
1742;
1743; VI-LABEL: store_inline_imm_m_4.0_f64:
1744; VI:       ; %bb.0:
1745; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1746; VI-NEXT:    s_mov_b32 s3, 0xf000
1747; VI-NEXT:    s_mov_b32 s2, -1
1748; VI-NEXT:    v_mov_b32_e32 v0, 0
1749; VI-NEXT:    v_mov_b32_e32 v1, 0xc0100000
1750; VI-NEXT:    s_waitcnt lgkmcnt(0)
1751; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1752; VI-NEXT:    s_endpgm
1753  store double -4.0, double addrspace(1)* %out
1754  ret void
1755}
1756
1757define amdgpu_kernel void @store_inv_2pi_f64(double addrspace(1)* %out) {
1758; SI-LABEL: store_inv_2pi_f64:
1759; SI:       ; %bb.0:
1760; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1761; SI-NEXT:    s_mov_b32 s3, 0xf000
1762; SI-NEXT:    s_mov_b32 s2, -1
1763; SI-NEXT:    v_mov_b32_e32 v0, 0x6dc9c882
1764; SI-NEXT:    v_mov_b32_e32 v1, 0x3fc45f30
1765; SI-NEXT:    s_waitcnt lgkmcnt(0)
1766; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1767; SI-NEXT:    s_endpgm
1768;
1769; VI-LABEL: store_inv_2pi_f64:
1770; VI:       ; %bb.0:
1771; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1772; VI-NEXT:    s_mov_b32 s3, 0xf000
1773; VI-NEXT:    s_mov_b32 s2, -1
1774; VI-NEXT:    v_mov_b32_e32 v0, 0x6dc9c882
1775; VI-NEXT:    v_mov_b32_e32 v1, 0x3fc45f30
1776; VI-NEXT:    s_waitcnt lgkmcnt(0)
1777; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1778; VI-NEXT:    s_endpgm
1779  store double 0x3fc45f306dc9c882, double addrspace(1)* %out
1780  ret void
1781}
1782
1783define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f64(double addrspace(1)* %out) {
1784; SI-LABEL: store_inline_imm_m_inv_2pi_f64:
1785; SI:       ; %bb.0:
1786; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1787; SI-NEXT:    s_mov_b32 s3, 0xf000
1788; SI-NEXT:    s_mov_b32 s2, -1
1789; SI-NEXT:    v_mov_b32_e32 v0, 0x6dc9c882
1790; SI-NEXT:    v_mov_b32_e32 v1, 0xbfc45f30
1791; SI-NEXT:    s_waitcnt lgkmcnt(0)
1792; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1793; SI-NEXT:    s_endpgm
1794;
1795; VI-LABEL: store_inline_imm_m_inv_2pi_f64:
1796; VI:       ; %bb.0:
1797; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1798; VI-NEXT:    s_mov_b32 s3, 0xf000
1799; VI-NEXT:    s_mov_b32 s2, -1
1800; VI-NEXT:    v_mov_b32_e32 v0, 0x6dc9c882
1801; VI-NEXT:    v_mov_b32_e32 v1, 0xbfc45f30
1802; VI-NEXT:    s_waitcnt lgkmcnt(0)
1803; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1804; VI-NEXT:    s_endpgm
1805  store double 0xbfc45f306dc9c882, double addrspace(1)* %out
1806  ret void
1807}
1808
1809define amdgpu_kernel void @store_literal_imm_f64(double addrspace(1)* %out) {
1810; SI-LABEL: store_literal_imm_f64:
1811; SI:       ; %bb.0:
1812; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
1813; SI-NEXT:    s_mov_b32 s3, 0xf000
1814; SI-NEXT:    s_mov_b32 s2, -1
1815; SI-NEXT:    v_mov_b32_e32 v0, 0
1816; SI-NEXT:    v_mov_b32_e32 v1, 0x40b00000
1817; SI-NEXT:    s_waitcnt lgkmcnt(0)
1818; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1819; SI-NEXT:    s_endpgm
1820;
1821; VI-LABEL: store_literal_imm_f64:
1822; VI:       ; %bb.0:
1823; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1824; VI-NEXT:    s_mov_b32 s3, 0xf000
1825; VI-NEXT:    s_mov_b32 s2, -1
1826; VI-NEXT:    v_mov_b32_e32 v0, 0
1827; VI-NEXT:    v_mov_b32_e32 v1, 0x40b00000
1828; VI-NEXT:    s_waitcnt lgkmcnt(0)
1829; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
1830; VI-NEXT:    s_endpgm
1831  store double 4096.0, double addrspace(1)* %out
1832  ret void
1833}
1834
1835define amdgpu_vs void @literal_folding(float %arg) {
1836; GCN-LABEL: literal_folding:
1837; GCN:       ; %bb.0: ; %main_body
1838; GCN-NEXT:    v_mul_f32_e32 v1, 0x3f4353f8, v0
1839; GCN-NEXT:    v_mul_f32_e32 v0, 0xbf4353f8, v0
1840; GCN-NEXT:    exp pos0 v1, v1, v0, v0 done
1841; GCN-NEXT:    s_endpgm
1842main_body:
1843  %tmp = fmul float %arg, 0x3FE86A7F00000000
1844  %tmp1 = fmul float %arg, 0xBFE86A7F00000000
1845  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp, float %tmp, float %tmp1, float %tmp1, i1 true, i1 false) #0
1846  ret void
1847}
1848
1849declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
1850
1851attributes #0 = { nounwind }
1852