1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #ifndef R600_ASM_H
24 #define R600_ASM_H
25
26 #include "util/format/u_format.h"
27 #include "util/list.h"
28 #include "amd_family.h"
29 #include "r600_isa.h"
30
31 #include <stdbool.h>
32 #include <stdint.h>
33 #include <stdio.h>
34
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38
39 #define R600_ASM_ERR(fmt, args...) \
40 fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
41
42 struct r600_bytecode_alu_src {
43 unsigned sel;
44 unsigned chan;
45 unsigned neg;
46 unsigned abs;
47 unsigned rel;
48 unsigned kc_bank;
49 unsigned kc_rel;
50 uint32_t value;
51 };
52
53 struct r600_bytecode_alu_dst {
54 unsigned sel;
55 unsigned chan;
56 unsigned clamp;
57 unsigned write;
58 unsigned rel;
59 };
60
61 struct r600_bytecode_alu {
62 struct list_head list;
63 struct r600_bytecode_alu_src src[3];
64 struct r600_bytecode_alu_dst dst;
65 unsigned op;
66 unsigned last;
67 unsigned is_op3;
68 unsigned is_lds_idx_op;
69 unsigned execute_mask;
70 unsigned update_pred;
71 unsigned pred_sel;
72 unsigned bank_swizzle;
73 unsigned bank_swizzle_force;
74 unsigned omod;
75 unsigned index_mode;
76 unsigned lds_idx;
77 };
78
79 struct r600_bytecode_tex {
80 struct list_head list;
81 unsigned op;
82 unsigned inst_mod;
83 unsigned resource_id;
84 unsigned src_gpr;
85 unsigned src_rel;
86 unsigned dst_gpr;
87 unsigned dst_rel;
88 unsigned dst_sel_x;
89 unsigned dst_sel_y;
90 unsigned dst_sel_z;
91 unsigned dst_sel_w;
92 unsigned lod_bias;
93 unsigned coord_type_x;
94 unsigned coord_type_y;
95 unsigned coord_type_z;
96 unsigned coord_type_w;
97 int offset_x;
98 int offset_y;
99 int offset_z;
100 unsigned sampler_id;
101 unsigned src_sel_x;
102 unsigned src_sel_y;
103 unsigned src_sel_z;
104 unsigned src_sel_w;
105 /* indexed samplers/resources only on evergreen/cayman */
106 unsigned sampler_index_mode;
107 unsigned resource_index_mode;
108 };
109
110 struct r600_bytecode_vtx {
111 struct list_head list;
112 unsigned op;
113 unsigned fetch_type;
114 unsigned buffer_id;
115 unsigned src_gpr;
116 unsigned src_sel_x;
117 unsigned mega_fetch_count;
118 unsigned dst_gpr;
119 unsigned dst_sel_x;
120 unsigned dst_sel_y;
121 unsigned dst_sel_z;
122 unsigned dst_sel_w;
123 unsigned use_const_fields;
124 unsigned data_format;
125 unsigned num_format_all;
126 unsigned format_comp_all;
127 unsigned srf_mode_all;
128 unsigned offset;
129 unsigned endian;
130 unsigned buffer_index_mode;
131
132 // READ_SCRATCH fields
133 unsigned uncached;
134 unsigned indexed;
135 unsigned src_sel_y;
136 unsigned src_rel;
137 unsigned elem_size;
138 unsigned array_size;
139 unsigned array_base;
140 unsigned burst_count;
141 unsigned dst_rel;
142 };
143
144 struct r600_bytecode_gds {
145 struct list_head list;
146 unsigned op;
147 unsigned src_gpr;
148 unsigned src_rel;
149 unsigned src_sel_x;
150 unsigned src_sel_y;
151 unsigned src_sel_z;
152 unsigned src_gpr2;
153 unsigned dst_gpr;
154 unsigned dst_rel;
155 unsigned dst_sel_x;
156 unsigned dst_sel_y;
157 unsigned dst_sel_z;
158 unsigned dst_sel_w;
159 unsigned uav_index_mode;
160 unsigned uav_id;
161 unsigned alloc_consume;
162 unsigned bcast_first_req;
163 };
164
165 struct r600_bytecode_output {
166 unsigned array_base;
167 unsigned array_size;
168 unsigned comp_mask;
169 unsigned type;
170
171 unsigned op;
172
173 unsigned elem_size;
174 unsigned gpr;
175 unsigned swizzle_x;
176 unsigned swizzle_y;
177 unsigned swizzle_z;
178 unsigned swizzle_w;
179 unsigned burst_count;
180 unsigned index_gpr;
181 unsigned mark; /* used by MEM_SCRATCH */
182 };
183
184 struct r600_bytecode_rat {
185 unsigned id;
186 unsigned inst;
187 unsigned index_mode;
188 };
189
190 struct r600_bytecode_kcache {
191 unsigned bank;
192 unsigned mode;
193 unsigned addr;
194 unsigned index_mode;
195 };
196
197 struct r600_bytecode_cf {
198 struct list_head list;
199
200 unsigned op;
201 unsigned addr;
202 unsigned ndw;
203 unsigned id;
204 unsigned cond;
205 unsigned pop_count;
206 unsigned count;
207 unsigned cf_addr; /* control flow addr */
208 struct r600_bytecode_kcache kcache[4];
209 unsigned r6xx_uses_waterfall;
210 unsigned eg_alu_extended;
211 unsigned barrier;
212 unsigned end_of_program;
213 unsigned mark;
214 unsigned vpm;
215 struct list_head alu;
216 struct list_head tex;
217 struct list_head vtx;
218 struct list_head gds;
219 struct r600_bytecode_output output;
220 struct r600_bytecode_rat rat;
221 struct r600_bytecode_alu *curr_bs_head;
222 struct r600_bytecode_alu *prev_bs_head;
223 struct r600_bytecode_alu *prev2_bs_head;
224 unsigned isa[2];
225 unsigned nlds_read;
226 unsigned nqueue_read;
227 unsigned clause_local_written;
228 };
229
230 #define FC_NONE 0
231 #define FC_IF 1
232 #define FC_LOOP 2
233 #define FC_REP 3
234 #define FC_PUSH_VPM 4
235 #define FC_PUSH_WQM 5
236
237 struct r600_cf_stack_entry {
238 int type;
239 struct r600_bytecode_cf *start;
240 struct r600_bytecode_cf **mid; /* used to store the else point */
241 int num_mid;
242 };
243
244 #define SQ_MAX_CALL_DEPTH 0x00000020
245
246 #define AR_HANDLE_NORMAL 0
247 #define AR_HANDLE_RV6XX 1 /* except RV670 */
248
249 struct r600_stack_info {
250 /* current level of non-WQM PUSH operations
251 * (PUSH, PUSH_ELSE, ALU_PUSH_BEFORE) */
252 int push;
253 /* current level of WQM PUSH operations
254 * (PUSH, PUSH_ELSE, PUSH_WQM) */
255 int push_wqm;
256 /* current loop level */
257 int loop;
258
259 /* required depth */
260 int max_entries;
261 /* subentries per entry */
262 int entry_size;
263 };
264
265 struct r600_bytecode {
266 enum amd_gfx_level gfx_level;
267 enum radeon_family family;
268 bool has_compressed_msaa_texturing;
269 int type;
270 struct list_head cf;
271 struct r600_bytecode_cf *cf_last;
272 unsigned ndw;
273 unsigned ncf;
274 unsigned nalu_groups;
275 unsigned ngpr;
276 unsigned nstack;
277 unsigned nlds_dw;
278 unsigned nresource;
279 unsigned force_add_cf;
280 uint32_t *bytecode;
281 uint32_t fc_sp;
282 struct r600_cf_stack_entry fc_stack[256];
283 struct r600_stack_info stack;
284 unsigned ar_loaded;
285 unsigned ar_reg;
286 unsigned ar_chan;
287 unsigned ar_handling;
288 unsigned r6xx_nop_after_rel_dst;
289 bool index_loaded[2];
290 unsigned index_reg[2]; /* indexing register CF_INDEX_[01] */
291 unsigned index_reg_chan[2]; /* indexing register channel CF_INDEX_[01] */
292 unsigned debug_id;
293 struct r600_isa* isa;
294 struct r600_bytecode_output pending_outputs[5];
295 int n_pending_outputs;
296 bool need_wait_ack; /* emit a pending WAIT_ACK prior to control flow */
297 bool precise;
298 };
299
300 /* eg_asm.c */
301 int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
302 int eg_bytecode_gds_build(struct r600_bytecode *bc, struct r600_bytecode_gds *gds, unsigned id);
303 int eg_bytecode_alu_build(struct r600_bytecode *bc,
304 struct r600_bytecode_alu *alu, unsigned id);
305 /* r600_asm.c */
306 void r600_bytecode_init(struct r600_bytecode *bc,
307 enum amd_gfx_level gfx_level,
308 enum radeon_family family,
309 bool has_compressed_msaa_texturing);
310 void r600_bytecode_clear(struct r600_bytecode *bc);
311 int r600_bytecode_add_alu(struct r600_bytecode *bc,
312 const struct r600_bytecode_alu *alu);
313 int r600_bytecode_add_vtx(struct r600_bytecode *bc,
314 const struct r600_bytecode_vtx *vtx);
315 int r600_bytecode_add_vtx_tc(struct r600_bytecode *bc,
316 const struct r600_bytecode_vtx *vtx);
317 int r600_bytecode_add_tex(struct r600_bytecode *bc,
318 const struct r600_bytecode_tex *tex);
319 int r600_bytecode_add_gds(struct r600_bytecode *bc,
320 const struct r600_bytecode_gds *gds);
321 int r600_bytecode_add_output(struct r600_bytecode *bc,
322 const struct r600_bytecode_output *output);
323 int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
324 const struct r600_bytecode_output *output);
325
326 void r600_bytecode_add_ack(struct r600_bytecode *bc);
327 int r600_bytecode_wait_acks(struct r600_bytecode *bc);
328 uint32_t r600_bytecode_write_export_ack_type(struct r600_bytecode *bc, bool indirect);
329
330 int r600_bytecode_build(struct r600_bytecode *bc);
331 int r600_bytecode_add_cf(struct r600_bytecode *bc);
332 int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
333 unsigned op);
334 int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
335 const struct r600_bytecode_alu *alu, unsigned type);
336 void r600_bytecode_special_constants(uint32_t value, unsigned *sel);
337 void r600_bytecode_disasm(struct r600_bytecode *bc);
338 void r600_bytecode_alu_read(struct r600_bytecode *bc,
339 struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
340 int r600_load_ar(struct r600_bytecode *bc, bool for_src);
341
342 int cm_bytecode_add_cf_end(struct r600_bytecode *bc);
343
344 /* r700_asm.c */
345 void r700_bytecode_cf_vtx_build(uint32_t *bytecode,
346 const struct r600_bytecode_cf *cf);
347 int r700_bytecode_alu_build(struct r600_bytecode *bc,
348 struct r600_bytecode_alu *alu, unsigned id);
349 void r700_bytecode_alu_read(struct r600_bytecode *bc,
350 struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
351 int r700_bytecode_fetch_mem_build(struct r600_bytecode *bc,
352 struct r600_bytecode_vtx *mem, unsigned id);
353
354 void r600_bytecode_export_read(struct r600_bytecode *bc,
355 struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
356 void eg_bytecode_export_read(struct r600_bytecode *bc,
357 struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
358
359 void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
360 unsigned *num_format, unsigned *format_comp, unsigned *endian);
361
362 int r600_load_ar(struct r600_bytecode *bc, bool for_src);
363
fp64_switch(int i)364 static inline int fp64_switch(int i)
365 {
366 switch (i) {
367 case 0:
368 return 1;
369 case 1:
370 return 0;
371 case 2:
372 return 3;
373 case 3:
374 return 2;
375 }
376 return 0;
377 }
378
379 #ifdef __cplusplus
380 }
381 #endif
382
383 #endif
384