• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 #ifndef R600_ASM_H
24 #define R600_ASM_H
25 
26 #include "util/format/u_format.h"
27 #include "util/list.h"
28 #include "amd_family.h"
29 #include "r600_isa.h"
30 
31 #include <stdbool.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38 
39 #define R600_ASM_ERR(fmt, args...) \
40 	fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
41 
42 struct r600_bytecode_alu_src {
43 	unsigned			sel;
44 	unsigned			chan;
45 	unsigned			neg;
46 	unsigned			abs;
47 	unsigned			rel;
48 	unsigned			kc_bank;
49 	unsigned			kc_rel;
50 	uint32_t			value;
51 };
52 
53 struct r600_bytecode_alu_dst {
54 	unsigned			sel;
55 	unsigned			chan;
56 	unsigned			clamp;
57 	unsigned			write;
58 	unsigned			rel;
59 };
60 
61 struct r600_bytecode_alu {
62 	struct list_head		list;
63 	struct r600_bytecode_alu_src		src[3];
64 	struct r600_bytecode_alu_dst		dst;
65 	unsigned			op;
66 	unsigned			last;
67 	unsigned			is_op3;
68 	unsigned			is_lds_idx_op;
69 	unsigned			execute_mask;
70 	unsigned			update_pred;
71 	unsigned			pred_sel;
72 	unsigned			bank_swizzle;
73 	unsigned			bank_swizzle_force;
74 	unsigned			omod;
75 	unsigned                        index_mode;
76 	unsigned                        lds_idx;
77 };
78 
79 struct r600_bytecode_tex {
80 	struct list_head		list;
81 	unsigned			op;
82 	unsigned			inst_mod;
83 	unsigned			resource_id;
84 	unsigned			src_gpr;
85 	unsigned			src_rel;
86 	unsigned			dst_gpr;
87 	unsigned			dst_rel;
88 	unsigned			dst_sel_x;
89 	unsigned			dst_sel_y;
90 	unsigned			dst_sel_z;
91 	unsigned			dst_sel_w;
92 	unsigned			lod_bias;
93 	unsigned			coord_type_x;
94 	unsigned			coord_type_y;
95 	unsigned			coord_type_z;
96 	unsigned			coord_type_w;
97 	int				offset_x;
98 	int				offset_y;
99 	int				offset_z;
100 	unsigned			sampler_id;
101 	unsigned			src_sel_x;
102 	unsigned			src_sel_y;
103 	unsigned			src_sel_z;
104 	unsigned			src_sel_w;
105 	/* indexed samplers/resources only on evergreen/cayman */
106 	unsigned			sampler_index_mode;
107 	unsigned			resource_index_mode;
108 };
109 
110 struct r600_bytecode_vtx {
111 	struct list_head		list;
112 	unsigned			op;
113 	unsigned			fetch_type;
114 	unsigned			buffer_id;
115 	unsigned			src_gpr;
116 	unsigned			src_sel_x;
117 	unsigned			mega_fetch_count;
118 	unsigned			dst_gpr;
119 	unsigned			dst_sel_x;
120 	unsigned			dst_sel_y;
121 	unsigned			dst_sel_z;
122 	unsigned			dst_sel_w;
123 	unsigned			use_const_fields;
124 	unsigned			data_format;
125 	unsigned			num_format_all;
126 	unsigned			format_comp_all;
127 	unsigned			srf_mode_all;
128 	unsigned			offset;
129 	unsigned			endian;
130 	unsigned			buffer_index_mode;
131 
132 	// READ_SCRATCH fields
133 	unsigned			uncached;
134 	unsigned			indexed;
135 	unsigned			src_sel_y;
136 	unsigned			src_rel;
137 	unsigned			elem_size;
138 	unsigned			array_size;
139 	unsigned			array_base;
140 	unsigned			burst_count;
141 	unsigned			dst_rel;
142 };
143 
144 struct r600_bytecode_gds {
145 	struct list_head		list;
146 	unsigned			op;
147 	unsigned			src_gpr;
148 	unsigned			src_rel;
149 	unsigned			src_sel_x;
150 	unsigned			src_sel_y;
151 	unsigned			src_sel_z;
152 	unsigned			src_gpr2;
153 	unsigned			dst_gpr;
154 	unsigned			dst_rel;
155 	unsigned			dst_sel_x;
156 	unsigned			dst_sel_y;
157 	unsigned			dst_sel_z;
158 	unsigned			dst_sel_w;
159 	unsigned			uav_index_mode;
160 	unsigned                        uav_id;
161 	unsigned                        alloc_consume;
162 	unsigned                        bcast_first_req;
163 };
164 
165 struct r600_bytecode_output {
166 	unsigned			array_base;
167 	unsigned			array_size;
168 	unsigned			comp_mask;
169 	unsigned			type;
170 
171 	unsigned			op;
172 
173 	unsigned			elem_size;
174 	unsigned			gpr;
175 	unsigned			swizzle_x;
176 	unsigned			swizzle_y;
177 	unsigned			swizzle_z;
178 	unsigned			swizzle_w;
179 	unsigned			burst_count;
180 	unsigned			index_gpr;
181 	unsigned			mark; /* used by MEM_SCRATCH */
182 };
183 
184 struct r600_bytecode_rat {
185 	unsigned			id;
186 	unsigned			inst;
187 	unsigned			index_mode;
188 };
189 
190 struct r600_bytecode_kcache {
191 	unsigned			bank;
192 	unsigned			mode;
193 	unsigned			addr;
194 	unsigned			index_mode;
195 };
196 
197 struct r600_bytecode_cf {
198 	struct list_head		list;
199 
200 	unsigned			op;
201 	unsigned			addr;
202 	unsigned			ndw;
203 	unsigned			id;
204 	unsigned			cond;
205 	unsigned			pop_count;
206 	unsigned			count;
207 	unsigned			cf_addr; /* control flow addr */
208 	struct r600_bytecode_kcache		kcache[4];
209 	unsigned			r6xx_uses_waterfall;
210 	unsigned			eg_alu_extended;
211 	unsigned			barrier;
212 	unsigned			end_of_program;
213 	unsigned                        mark;
214 	unsigned                        vpm;
215 	struct list_head		alu;
216 	struct list_head		tex;
217 	struct list_head		vtx;
218 	struct list_head		gds;
219 	struct r600_bytecode_output		output;
220 	struct r600_bytecode_rat		rat;
221 	struct r600_bytecode_alu		*curr_bs_head;
222 	struct r600_bytecode_alu		*prev_bs_head;
223 	struct r600_bytecode_alu		*prev2_bs_head;
224 	unsigned isa[2];
225 	unsigned nlds_read;
226 	unsigned nqueue_read;
227 	unsigned clause_local_written;
228 };
229 
230 #define FC_NONE				0
231 #define FC_IF				1
232 #define FC_LOOP				2
233 #define FC_REP				3
234 #define FC_PUSH_VPM			4
235 #define FC_PUSH_WQM			5
236 
237 struct r600_cf_stack_entry {
238 	int				type;
239 	struct r600_bytecode_cf		*start;
240 	struct r600_bytecode_cf		**mid; /* used to store the else point */
241 	int				num_mid;
242 };
243 
244 #define SQ_MAX_CALL_DEPTH 0x00000020
245 
246 #define AR_HANDLE_NORMAL 0
247 #define AR_HANDLE_RV6XX 1 /* except RV670 */
248 
249 struct r600_stack_info {
250 	/* current level of non-WQM PUSH operations
251 	 * (PUSH, PUSH_ELSE, ALU_PUSH_BEFORE) */
252 	int push;
253 	/* current level of WQM PUSH operations
254 	 * (PUSH, PUSH_ELSE, PUSH_WQM) */
255 	int push_wqm;
256 	/* current loop level */
257 	int loop;
258 
259 	/* required depth */
260 	int max_entries;
261 	/* subentries per entry */
262 	int entry_size;
263 };
264 
265 struct r600_bytecode {
266 	enum amd_gfx_level			gfx_level;
267 	enum radeon_family		family;
268 	bool				has_compressed_msaa_texturing;
269 	int				type;
270 	struct list_head		cf;
271 	struct r600_bytecode_cf		*cf_last;
272 	unsigned			ndw;
273 	unsigned			ncf;
274 	unsigned			nalu_groups;
275 	unsigned			ngpr;
276 	unsigned			nstack;
277 	unsigned			nlds_dw;
278 	unsigned			nresource;
279 	unsigned			force_add_cf;
280 	uint32_t			*bytecode;
281 	uint32_t			fc_sp;
282 	struct r600_cf_stack_entry	fc_stack[256];
283 	struct r600_stack_info		stack;
284 	unsigned	ar_loaded;
285 	unsigned	ar_reg;
286 	unsigned	ar_chan;
287 	unsigned        ar_handling;
288 	unsigned        r6xx_nop_after_rel_dst;
289 	bool            index_loaded[2];
290 	unsigned        index_reg[2]; /* indexing register CF_INDEX_[01] */
291 	unsigned        index_reg_chan[2]; /* indexing register channel CF_INDEX_[01] */
292 	unsigned        debug_id;
293 	struct r600_isa* isa;
294 	struct r600_bytecode_output pending_outputs[5];
295 	int n_pending_outputs;
296 	bool			need_wait_ack; /* emit a pending WAIT_ACK prior to control flow */
297 	bool			precise;
298 };
299 
300 /* eg_asm.c */
301 int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
302 int eg_bytecode_gds_build(struct r600_bytecode *bc, struct r600_bytecode_gds *gds, unsigned id);
303 int eg_bytecode_alu_build(struct r600_bytecode *bc,
304 			  struct r600_bytecode_alu *alu, unsigned id);
305 /* r600_asm.c */
306 void r600_bytecode_init(struct r600_bytecode *bc,
307 			enum amd_gfx_level gfx_level,
308 			enum radeon_family family,
309 			bool has_compressed_msaa_texturing);
310 void r600_bytecode_clear(struct r600_bytecode *bc);
311 int r600_bytecode_add_alu(struct r600_bytecode *bc,
312 		const struct r600_bytecode_alu *alu);
313 int r600_bytecode_add_vtx(struct r600_bytecode *bc,
314 		const struct r600_bytecode_vtx *vtx);
315 int r600_bytecode_add_vtx_tc(struct r600_bytecode *bc,
316 			     const struct r600_bytecode_vtx *vtx);
317 int r600_bytecode_add_tex(struct r600_bytecode *bc,
318 		const struct r600_bytecode_tex *tex);
319 int r600_bytecode_add_gds(struct r600_bytecode *bc,
320 		const struct r600_bytecode_gds *gds);
321 int r600_bytecode_add_output(struct r600_bytecode *bc,
322 		const struct r600_bytecode_output *output);
323 int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
324 		const struct r600_bytecode_output *output);
325 
326 void r600_bytecode_add_ack(struct r600_bytecode *bc);
327 int r600_bytecode_wait_acks(struct r600_bytecode *bc);
328 uint32_t r600_bytecode_write_export_ack_type(struct r600_bytecode *bc, bool indirect);
329 
330 int r600_bytecode_build(struct r600_bytecode *bc);
331 int r600_bytecode_add_cf(struct r600_bytecode *bc);
332 int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
333 		unsigned op);
334 int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
335 		const struct r600_bytecode_alu *alu, unsigned type);
336 void r600_bytecode_special_constants(uint32_t value, unsigned *sel);
337 void r600_bytecode_disasm(struct r600_bytecode *bc);
338 void r600_bytecode_alu_read(struct r600_bytecode *bc,
339 		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
340 int r600_load_ar(struct r600_bytecode *bc, bool for_src);
341 
342 int cm_bytecode_add_cf_end(struct r600_bytecode *bc);
343 
344 /* r700_asm.c */
345 void r700_bytecode_cf_vtx_build(uint32_t *bytecode,
346 		const struct r600_bytecode_cf *cf);
347 int r700_bytecode_alu_build(struct r600_bytecode *bc,
348 		struct r600_bytecode_alu *alu, unsigned id);
349 void r700_bytecode_alu_read(struct r600_bytecode *bc,
350 		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
351 int r700_bytecode_fetch_mem_build(struct r600_bytecode *bc,
352 		struct r600_bytecode_vtx *mem, unsigned id);
353 
354 void r600_bytecode_export_read(struct r600_bytecode *bc,
355 		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
356 void eg_bytecode_export_read(struct r600_bytecode *bc,
357 		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
358 
359 void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
360 			   unsigned *num_format, unsigned *format_comp, unsigned *endian);
361 
362 int r600_load_ar(struct r600_bytecode *bc, bool for_src);
363 
fp64_switch(int i)364 static inline int fp64_switch(int i)
365 {
366 	switch (i) {
367 	case 0:
368 		return 1;
369 	case 1:
370 		return 0;
371 	case 2:
372 		return 3;
373 	case 3:
374 		return 2;
375 	}
376 	return 0;
377 }
378 
379 #ifdef __cplusplus
380 }
381 #endif
382 
383 #endif
384