• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; GCN-LABEL: {{^}}vgpr:
5; GCN-DAG: v_mov_b32_e32 v1, v0
6; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
7; GCN: s_waitcnt expcnt(0)
8; GCN: v_add_f32_e32 v0, 1.0, v1
9; GCN-NOT: s_endpgm
10define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
11bb:
12  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
13  %x = fadd float %arg3, 1.000000e+00
14  %a = insertvalue { float, float } undef, float %x, 0
15  %b = insertvalue { float, float } %a, float %arg3, 1
16  ret { float, float } %b
17}
18
19; GCN-LABEL: {{^}}vgpr_literal:
20; GCN: exp mrt0 v0, v0, v0, v0 done vm
21
22; GCN-DAG: v_mov_b32_e32 v0, 1.0
23; GCN-DAG: v_mov_b32_e32 v1, 2.0
24; GCN-DAG: v_mov_b32_e32 v2, 4.0
25; GCN-DAG: v_mov_b32_e32 v3, -1.0
26; GCN-DAG: s_waitcnt expcnt(0)
27; GCN-NOT: s_endpgm
28define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
29bb:
30  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
31  ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
32}
33
34; GCN: .long 165580
35; GCN-NEXT: .long 562
36; GCN-NEXT: .long 165584
37; GCN-NEXT: .long 562
38; GCN-LABEL: {{^}}vgpr_ps_addr0:
39; GCN-NOT: v_mov_b32_e32 v0
40; GCN-NOT: v_mov_b32_e32 v1
41; GCN-NOT: v_mov_b32_e32 v2
42; GCN: v_mov_b32_e32 v3, v4
43; GCN: v_mov_b32_e32 v4, v6
44; GCN-NOT: s_endpgm
45define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
46bb:
47  %i0 = extractelement <2 x i32> %arg4, i32 0
48  %i1 = extractelement <2 x i32> %arg4, i32 1
49  %i2 = extractelement <2 x i32> %arg7, i32 0
50  %i3 = extractelement <2 x i32> %arg8, i32 0
51  %f0 = bitcast i32 %i0 to float
52  %f1 = bitcast i32 %i1 to float
53  %f2 = bitcast i32 %i2 to float
54  %f3 = bitcast i32 %i3 to float
55  %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
56  %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
57  %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
58  %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
59  %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
60  ret { float, float, float, float, float } %r4
61}
62
63; GCN: .long 165580
64; GCN-NEXT: .long 1
65; GCN-NEXT: .long 165584
66; GCN-NEXT: .long 1
67; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
68; GCN: v_mov_b32_e32 v0, 1.0
69; GCN-NOT: s_endpgm
70define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
71bb:
72  ret float 1.000000e+00
73}
74
75; GCN: .long 165580
76; GCN-NEXT: .long 2081
77; GCN-NEXT: .long 165584
78; GCN-NEXT: .long 2081
79; GCN-LABEL: {{^}}ps_input_ena_pos_w:
80; GCN-DAG: v_mov_b32_e32 v0, v4
81; GCN-DAG: v_mov_b32_e32 v1, v2
82; GCN-DAG: v_mov_b32_e32 v2, v3
83; GCN-NOT: s_endpgm
84define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
85bb:
86  %f = bitcast <2 x i32> %arg8 to <2 x float>
87  %s = insertvalue { float, <2 x float> } undef, float %arg14, 0
88  %s1 = insertvalue { float, <2 x float> } %s, <2 x float> %f, 1
89  ret { float, <2 x float> } %s1
90}
91
92; GCN: .long 165580
93; GCN-NEXT: .long 562
94; GCN-NEXT: .long 165584
95; GCN-NEXT: .long 563
96; GCN-LABEL: {{^}}vgpr_ps_addr1:
97; GCN-DAG: v_mov_b32_e32 v0, v2
98; GCN-DAG: v_mov_b32_e32 v1, v3
99; GCN: v_mov_b32_e32 v2, v4
100; GCN-DAG: v_mov_b32_e32 v3, v6
101; GCN-DAG: v_mov_b32_e32 v4, v8
102; GCN-NOT: s_endpgm
103define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
104bb:
105  %i0 = extractelement <2 x i32> %arg4, i32 0
106  %i1 = extractelement <2 x i32> %arg4, i32 1
107  %i2 = extractelement <2 x i32> %arg7, i32 0
108  %i3 = extractelement <2 x i32> %arg8, i32 0
109  %f0 = bitcast i32 %i0 to float
110  %f1 = bitcast i32 %i1 to float
111  %f2 = bitcast i32 %i2 to float
112  %f3 = bitcast i32 %i3 to float
113  %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
114  %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
115  %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
116  %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
117  %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
118  ret { float, float, float, float, float } %r4
119}
120
121; GCN: .long 165580
122; GCN-NEXT: .long 562
123; GCN-NEXT: .long 165584
124; GCN-NEXT: .long 631
125; GCN-LABEL: {{^}}vgpr_ps_addr119:
126; GCN-DAG: v_mov_b32_e32 v0, v2
127; GCN-DAG: v_mov_b32_e32 v1, v3
128; GCN-DAG: v_mov_b32_e32 v2, v6
129; GCN-DAG: v_mov_b32_e32 v3, v8
130; GCN-DAG: v_mov_b32_e32 v4, v12
131; GCN-NOT: s_endpgm
132define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
133bb:
134  %i0 = extractelement <2 x i32> %arg4, i32 0
135  %i1 = extractelement <2 x i32> %arg4, i32 1
136  %i2 = extractelement <2 x i32> %arg7, i32 0
137  %i3 = extractelement <2 x i32> %arg8, i32 0
138  %f0 = bitcast i32 %i0 to float
139  %f1 = bitcast i32 %i1 to float
140  %f2 = bitcast i32 %i2 to float
141  %f3 = bitcast i32 %i3 to float
142  %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
143  %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
144  %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
145  %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
146  %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
147  ret { float, float, float, float, float } %r4
148}
149
150; GCN: .long 165580
151; GCN-NEXT: .long 562
152; GCN-NEXT: .long 165584
153; GCN-NEXT: .long 946
154; GCN-LABEL: {{^}}vgpr_ps_addr418:
155; GCN-NOT: v_mov_b32_e32 v0
156; GCN-NOT: v_mov_b32_e32 v1
157; GCN-NOT: v_mov_b32_e32 v2
158; GCN: v_mov_b32_e32 v3, v4
159; GCN: v_mov_b32_e32 v4, v8
160; GCN-NOT: s_endpgm
161define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
162bb:
163  %i0 = extractelement <2 x i32> %arg4, i32 0
164  %i1 = extractelement <2 x i32> %arg4, i32 1
165  %i2 = extractelement <2 x i32> %arg7, i32 0
166  %i3 = extractelement <2 x i32> %arg8, i32 0
167  %f0 = bitcast i32 %i0 to float
168  %f1 = bitcast i32 %i1 to float
169  %f2 = bitcast i32 %i2 to float
170  %f3 = bitcast i32 %i3 to float
171  %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
172  %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
173  %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
174  %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
175  %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
176  ret { float, float, float, float, float } %r4
177}
178
179; GCN-LABEL: {{^}}sgpr:
180; GCN-DAG: s_mov_b32 s2, s3
181; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2
182; GCN-NOT: s_endpgm
183define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
184bb:
185  %x = add i32 %arg2, 2
186  %a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
187  %b = insertvalue { i32, i32, i32 } %a, i32 %arg1, 1
188  %c = insertvalue { i32, i32, i32 } %a, i32 %arg2, 2
189  ret { i32, i32, i32 } %c
190}
191
192; GCN-LABEL: {{^}}sgpr_literal:
193; GCN: s_mov_b32 s0, 5
194; GCN-NOT: s_mov_b32 s0, s0
195; GCN-DAG: s_mov_b32 s1, 6
196; GCN-DAG: s_mov_b32 s2, 7
197; GCN-DAG: s_mov_b32 s3, 8
198; GCN-NOT: s_endpgm
199define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
200bb:
201  %x = add i32 %arg2, 2
202  ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
203}
204
205; GCN-LABEL: {{^}}both:
206; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
207; GCN-DAG: v_mov_b32_e32 v1, v0
208; GCN-DAG: s_mov_b32 s1, s2
209; GCN-DAG: s_waitcnt expcnt(0)
210; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
211; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2
212; GCN-DAG: s_mov_b32 s2, s3
213; GCN-NOT: s_endpgm
214define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
215bb:
216  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
217  %v = fadd float %arg3, 1.000000e+00
218  %s = add i32 %arg2, 2
219  %a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0
220  %a1 = insertvalue { float, i32, float, i32, i32 } %a0, i32 %s, 1
221  %a2 = insertvalue { float, i32, float, i32, i32 } %a1, float %arg3, 2
222  %a3 = insertvalue { float, i32, float, i32, i32 } %a2, i32 %arg1, 3
223  %a4 = insertvalue { float, i32, float, i32, i32 } %a3, i32 %arg2, 4
224  ret { float, i32, float, i32, i32 } %a4
225}
226
227; GCN-LABEL: {{^}}structure_literal:
228; GCN: exp mrt0 v0, v0, v0, v0 done vm
229
230; GCN-DAG: v_mov_b32_e32 v0, 1.0
231; GCN-DAG: s_mov_b32 s0, 2
232; GCN-DAG: s_mov_b32 s1, 3
233; GCN-DAG: v_mov_b32_e32 v1, 2.0
234; GCN-DAG: v_mov_b32_e32 v2, 4.0
235; GCN-DAG: s_waitcnt expcnt(0)
236define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
237bb:
238  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
239  ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }
240}
241
242; GCN-LABEL: {{^}}ret_return_to_epilog_pseudo_size:
243; GCN: codeLenInByte = 0{{$}}
244define amdgpu_ps float @ret_return_to_epilog_pseudo_size() #0 {
245  ret float undef
246}
247
248declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
249
250attributes #0 = { nounwind }
251attributes #1 = { nounwind "InitialPSInputAddr"="0" }
252attributes #2 = { nounwind "InitialPSInputAddr"="1" }
253attributes #3 = { nounwind "InitialPSInputAddr"="119" }
254attributes #4 = { nounwind "InitialPSInputAddr"="418" }
255