• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors: Marek Olšák <maraeo@gmail.com>
24  *
25  */
26 
27 /**
28  * This file contains common screen and context structures and functions
29  * for r600g and radeonsi.
30  */
31 
32 #ifndef R600_PIPE_COMMON_H
33 #define R600_PIPE_COMMON_H
34 
35 #include <stdio.h>
36 
37 #include "radeon/radeon_winsys.h"
38 
39 #include "util/u_blitter.h"
40 #include "util/list.h"
41 #include "util/u_range.h"
42 #include "util/slab.h"
43 #include "util/u_suballoc.h"
44 #include "util/u_transfer.h"
45 
46 #define ATI_VENDOR_ID 0x1002
47 
48 #define R600_RESOURCE_FLAG_TRANSFER		(PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
49 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH	(PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
50 #define R600_RESOURCE_FLAG_FORCE_TILING		(PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
51 #define R600_RESOURCE_FLAG_DISABLE_DCC		(PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
52 
53 #define R600_CONTEXT_STREAMOUT_FLUSH		(1u << 0)
54 /* Pipeline & streamout query controls. */
55 #define R600_CONTEXT_START_PIPELINE_STATS	(1u << 1)
56 #define R600_CONTEXT_STOP_PIPELINE_STATS	(1u << 2)
57 #define R600_CONTEXT_PRIVATE_FLAG		(1u << 3)
58 
59 /* special primitive types */
60 #define R600_PRIM_RECTANGLE_LIST	PIPE_PRIM_MAX
61 
62 /* Debug flags. */
63 /* logging */
64 #define DBG_TEX			(1 << 0)
65 /* gap - reuse */
66 #define DBG_COMPUTE		(1 << 2)
67 #define DBG_VM			(1 << 3)
68 /* gap - reuse */
69 /* shader logging */
70 #define DBG_FS			(1 << 5)
71 #define DBG_VS			(1 << 6)
72 #define DBG_GS			(1 << 7)
73 #define DBG_PS			(1 << 8)
74 #define DBG_CS			(1 << 9)
75 #define DBG_TCS			(1 << 10)
76 #define DBG_TES			(1 << 11)
77 #define DBG_NO_IR		(1 << 12)
78 #define DBG_NO_TGSI		(1 << 13)
79 #define DBG_NO_ASM		(1 << 14)
80 #define DBG_PREOPT_IR		(1 << 15)
81 #define DBG_CHECK_IR		(1 << 16)
82 #define DBG_NO_OPT_VARIANT	(1 << 17)
83 /* gaps */
84 #define DBG_TEST_DMA		(1 << 20)
85 /* Bits 21-31 are reserved for the r600g driver. */
86 /* features */
87 #define DBG_NO_ASYNC_DMA	(1llu << 32)
88 #define DBG_NO_HYPERZ		(1llu << 33)
89 #define DBG_NO_DISCARD_RANGE	(1llu << 34)
90 #define DBG_NO_2D_TILING	(1llu << 35)
91 #define DBG_NO_TILING		(1llu << 36)
92 #define DBG_SWITCH_ON_EOP	(1llu << 37)
93 #define DBG_FORCE_DMA		(1llu << 38)
94 #define DBG_PRECOMPILE		(1llu << 39)
95 #define DBG_INFO		(1llu << 40)
96 #define DBG_NO_WC		(1llu << 41)
97 #define DBG_CHECK_VM		(1llu << 42)
98 #define DBG_NO_DCC		(1llu << 43)
99 #define DBG_NO_DCC_CLEAR	(1llu << 44)
100 #define DBG_NO_RB_PLUS		(1llu << 45)
101 #define DBG_SI_SCHED		(1llu << 46)
102 #define DBG_MONOLITHIC_SHADERS	(1llu << 47)
103 #define DBG_NO_CE		(1llu << 48)
104 #define DBG_UNSAFE_MATH		(1llu << 49)
105 #define DBG_NO_DCC_FB		(1llu << 50)
106 
107 #define R600_MAP_BUFFER_ALIGNMENT 64
108 #define R600_MAX_VIEWPORTS        16
109 
110 #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
111 
112 enum r600_coherency {
113 	R600_COHERENCY_NONE, /* no cache flushes needed */
114 	R600_COHERENCY_SHADER,
115 	R600_COHERENCY_CB_META,
116 };
117 
118 #ifdef PIPE_ARCH_BIG_ENDIAN
119 #define R600_BIG_ENDIAN 1
120 #else
121 #define R600_BIG_ENDIAN 0
122 #endif
123 
124 struct r600_common_context;
125 struct r600_perfcounters;
126 struct tgsi_shader_info;
127 struct r600_qbo_state;
128 
129 struct radeon_shader_reloc {
130 	char name[32];
131 	uint64_t offset;
132 };
133 
134 struct radeon_shader_binary {
135 	/** Shader code */
136 	unsigned char *code;
137 	unsigned code_size;
138 
139 	/** Config/Context register state that accompanies this shader.
140 	 * This is a stream of dword pairs.  First dword contains the
141 	 * register address, the second dword contains the value.*/
142 	unsigned char *config;
143 	unsigned config_size;
144 
145 	/** The number of bytes of config information for each global symbol.
146 	 */
147 	unsigned config_size_per_symbol;
148 
149 	/** Constant data accessed by the shader.  This will be uploaded
150 	 * into a constant buffer. */
151 	unsigned char *rodata;
152 	unsigned rodata_size;
153 
154 	/** List of symbol offsets for the shader */
155 	uint64_t *global_symbol_offsets;
156 	unsigned global_symbol_count;
157 
158 	struct radeon_shader_reloc *relocs;
159 	unsigned reloc_count;
160 
161 	/** Disassembled shader in a string. */
162 	char *disasm_string;
163 	char *llvm_ir_string;
164 };
165 
166 void radeon_shader_binary_init(struct radeon_shader_binary *b);
167 void radeon_shader_binary_clean(struct radeon_shader_binary *b);
168 
169 /* Only 32-bit buffer allocations are supported, gallium doesn't support more
170  * at the moment.
171  */
172 struct r600_resource {
173 	struct u_resource		b;
174 
175 	/* Winsys objects. */
176 	struct pb_buffer		*buf;
177 	uint64_t			gpu_address;
178 	/* Memory usage if the buffer placement is optimal. */
179 	uint64_t			vram_usage;
180 	uint64_t			gart_usage;
181 
182 	/* Resource properties. */
183 	uint64_t			bo_size;
184 	unsigned			bo_alignment;
185 	enum radeon_bo_domain		domains;
186 	enum radeon_bo_flag		flags;
187 	unsigned			bind_history;
188 
189 	/* The buffer range which is initialized (with a write transfer,
190 	 * streamout, DMA, or as a random access target). The rest of
191 	 * the buffer is considered invalid and can be mapped unsynchronized.
192 	 *
193 	 * This allows unsychronized mapping of a buffer range which hasn't
194 	 * been used yet. It's for applications which forget to use
195 	 * the unsynchronized map flag and expect the driver to figure it out.
196          */
197 	struct util_range		valid_buffer_range;
198 
199 	/* For buffers only. This indicates that a write operation has been
200 	 * performed by TC L2, but the cache hasn't been flushed.
201 	 * Any hw block which doesn't use or bypasses TC L2 should check this
202 	 * flag and flush the cache before using the buffer.
203 	 *
204 	 * For example, TC L2 must be flushed if a buffer which has been
205 	 * modified by a shader store instruction is about to be used as
206 	 * an index buffer. The reason is that VGT DMA index fetching doesn't
207 	 * use TC L2.
208 	 */
209 	bool				TC_L2_dirty;
210 
211 	/* Whether the resource has been exported via resource_get_handle. */
212 	bool				is_shared;
213 	unsigned			external_usage; /* PIPE_HANDLE_USAGE_* */
214 };
215 
216 struct r600_transfer {
217 	struct pipe_transfer		transfer;
218 	struct r600_resource		*staging;
219 	unsigned			offset;
220 };
221 
222 struct r600_fmask_info {
223 	uint64_t offset;
224 	uint64_t size;
225 	unsigned alignment;
226 	unsigned pitch_in_pixels;
227 	unsigned bank_height;
228 	unsigned slice_tile_max;
229 	unsigned tile_mode_index;
230 };
231 
232 struct r600_cmask_info {
233 	uint64_t offset;
234 	uint64_t size;
235 	unsigned alignment;
236 	unsigned slice_tile_max;
237 	unsigned base_address_reg;
238 };
239 
240 struct r600_texture {
241 	struct r600_resource		resource;
242 
243 	uint64_t			size;
244 	unsigned			num_level0_transfers;
245 	enum pipe_format		db_render_format;
246 	bool				is_depth;
247 	bool				db_compatible;
248 	bool				can_sample_z;
249 	bool				can_sample_s;
250 	unsigned			dirty_level_mask; /* each bit says if that mipmap is compressed */
251 	unsigned			stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
252 	struct r600_texture		*flushed_depth_texture;
253 	struct radeon_surf		surface;
254 
255 	/* Colorbuffer compression and fast clear. */
256 	struct r600_fmask_info		fmask;
257 	struct r600_cmask_info		cmask;
258 	struct r600_resource		*cmask_buffer;
259 	uint64_t			dcc_offset; /* 0 = disabled */
260 	unsigned			cb_color_info; /* fast clear enable bit */
261 	unsigned			color_clear_value[2];
262 	unsigned			last_msaa_resolve_target_micro_mode;
263 
264 	/* Depth buffer compression and fast clear. */
265 	struct r600_resource		*htile_buffer;
266 	bool				tc_compatible_htile;
267 	bool				depth_cleared; /* if it was cleared at least once */
268 	float				depth_clear_value;
269 	bool				stencil_cleared; /* if it was cleared at least once */
270 	uint8_t				stencil_clear_value;
271 
272 	bool				non_disp_tiling; /* R600-Cayman only */
273 
274 	/* Whether the texture is a displayable back buffer and needs DCC
275 	 * decompression, which is expensive. Therefore, it's enabled only
276 	 * if statistics suggest that it will pay off and it's allocated
277 	 * separately. It can't be bound as a sampler by apps. Limited to
278 	 * target == 2D and last_level == 0. If enabled, dcc_offset contains
279 	 * the absolute GPUVM address, not the relative one.
280 	 */
281 	struct r600_resource		*dcc_separate_buffer;
282 	/* When DCC is temporarily disabled, the separate buffer is here. */
283 	struct r600_resource		*last_dcc_separate_buffer;
284 	/* We need to track DCC dirtiness, because st/dri usually calls
285 	 * flush_resource twice per frame (not a bug) and we don't wanna
286 	 * decompress DCC twice. Also, the dirty tracking must be done even
287 	 * if DCC isn't used, because it's required by the DCC usage analysis
288 	 * for a possible future enablement.
289 	 */
290 	bool				separate_dcc_dirty;
291 	/* Statistics gathering for the DCC enablement heuristic. */
292 	bool				dcc_gather_statistics;
293 	/* Estimate of how much this color buffer is written to in units of
294 	 * full-screen draws: ps_invocations / (width * height)
295 	 * Shader kills, late Z, and blending with trivial discards make it
296 	 * inaccurate (we need to count CB updates, not PS invocations).
297 	 */
298 	unsigned			ps_draw_ratio;
299 	/* The number of clears since the last DCC usage analysis. */
300 	unsigned			num_slow_clears;
301 
302 	/* Counter that should be non-zero if the texture is bound to a
303 	 * framebuffer. Implemented in radeonsi only.
304 	 */
305 	uint32_t			framebuffers_bound;
306 };
307 
308 struct r600_surface {
309 	struct pipe_surface		base;
310 
311 	bool color_initialized;
312 	bool depth_initialized;
313 
314 	/* Misc. color flags. */
315 	bool alphatest_bypass;
316 	bool export_16bpc;
317 	bool color_is_int8;
318 	bool color_is_int10;
319 
320 	/* Color registers. */
321 	unsigned cb_color_info;
322 	unsigned cb_color_base;
323 	unsigned cb_color_view;
324 	unsigned cb_color_size;		/* R600 only */
325 	unsigned cb_color_dim;		/* EG only */
326 	unsigned cb_color_pitch;	/* EG and later */
327 	unsigned cb_color_slice;	/* EG and later */
328 	unsigned cb_color_attrib;	/* EG and later */
329 	unsigned cb_dcc_control;	/* VI and later */
330 	unsigned cb_color_fmask;	/* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
331 	unsigned cb_color_fmask_slice;	/* EG and later */
332 	unsigned cb_color_cmask;	/* CB_COLORn_TILE (r600 only) */
333 	unsigned cb_color_mask;		/* R600 only */
334 	unsigned spi_shader_col_format;		/* SI+, no blending, no alpha-to-coverage. */
335 	unsigned spi_shader_col_format_alpha;	/* SI+, alpha-to-coverage */
336 	unsigned spi_shader_col_format_blend;	/* SI+, blending without alpha. */
337 	unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */
338 	struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
339 	struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
340 
341 	/* DB registers. */
342 	unsigned db_depth_info;		/* R600 only, then SI and later */
343 	unsigned db_z_info;		/* EG and later */
344 	unsigned db_depth_base;		/* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */
345 	unsigned db_depth_view;
346 	unsigned db_depth_size;
347 	unsigned db_depth_slice;	/* EG and later */
348 	unsigned db_stencil_base;	/* EG and later */
349 	unsigned db_stencil_info;	/* EG and later */
350 	unsigned db_prefetch_limit;	/* R600 only */
351 	unsigned db_htile_surface;
352 	unsigned db_htile_data_base;
353 	unsigned db_preload_control;	/* EG and later */
354 };
355 
356 union r600_grbm_counters {
357 	struct {
358 		unsigned spi_busy;
359 		unsigned spi_idle;
360 		unsigned gui_busy;
361 		unsigned gui_idle;
362 	} named;
363 	unsigned array[0];
364 };
365 
366 struct r600_common_screen {
367 	struct pipe_screen		b;
368 	struct radeon_winsys		*ws;
369 	enum radeon_family		family;
370 	enum chip_class			chip_class;
371 	struct radeon_info		info;
372 	uint64_t			debug_flags;
373 	bool				has_cp_dma;
374 	bool				has_streamout;
375 
376 	struct slab_parent_pool		pool_transfers;
377 
378 	/* Texture filter settings. */
379 	int				force_aniso; /* -1 = disabled */
380 
381 	/* Auxiliary context. Mainly used to initialize resources.
382 	 * It must be locked prior to using and flushed before unlocking. */
383 	struct pipe_context		*aux_context;
384 	pipe_mutex			aux_context_lock;
385 
386 	/* This must be in the screen, because UE4 uses one context for
387 	 * compilation and another one for rendering.
388 	 */
389 	unsigned			num_compilations;
390 	/* Along with ST_DEBUG=precompile, this should show if applications
391 	 * are loading shaders on demand. This is a monotonic counter.
392 	 */
393 	unsigned			num_shaders_created;
394 	unsigned			num_shader_cache_hits;
395 
396 	/* GPU load thread. */
397 	pipe_mutex			gpu_load_mutex;
398 	pipe_thread			gpu_load_thread;
399 	union r600_grbm_counters	grbm_counters;
400 	volatile unsigned		gpu_load_stop_thread; /* bool */
401 
402 	char				renderer_string[100];
403 
404 	/* Performance counters. */
405 	struct r600_perfcounters	*perfcounters;
406 
407 	/* If pipe_screen wants to re-emit the framebuffer state of all
408 	 * contexts, it should atomically increment this. Each context will
409 	 * compare this with its own last known value of the counter before
410 	 * drawing and re-emit the framebuffer state accordingly.
411 	 */
412 	unsigned			dirty_fb_counter;
413 
414 	/* Atomically increment this counter when an existing texture's
415 	 * metadata is enabled or disabled in a way that requires changing
416 	 * contexts' compressed texture binding masks.
417 	 */
418 	unsigned			compressed_colortex_counter;
419 
420 	/* Atomically increment this counter when an existing texture's
421 	 * backing buffer or tile mode parameters have changed that requires
422 	 * recomputation of shader descriptors.
423 	 */
424 	unsigned			dirty_tex_descriptor_counter;
425 
426 	struct {
427 		/* Context flags to set so that all writes from earlier jobs
428 		 * in the CP are seen by L2 clients.
429 		 */
430 		unsigned cp_to_L2;
431 
432 		/* Context flags to set so that all writes from earlier
433 		 * compute jobs are seen by L2 clients.
434 		 */
435 		unsigned compute_to_L2;
436 	} barrier_flags;
437 
438 	void (*query_opaque_metadata)(struct r600_common_screen *rscreen,
439 				      struct r600_texture *rtex,
440 				      struct radeon_bo_metadata *md);
441 
442 	void (*apply_opaque_metadata)(struct r600_common_screen *rscreen,
443 				    struct r600_texture *rtex,
444 				    struct radeon_bo_metadata *md);
445 };
446 
447 /* This encapsulates a state or an operation which can emitted into the GPU
448  * command stream. */
449 struct r600_atom {
450 	void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
451 	unsigned		num_dw;
452 	unsigned short		id;
453 };
454 
455 struct r600_so_target {
456 	struct pipe_stream_output_target b;
457 
458 	/* The buffer where BUFFER_FILLED_SIZE is stored. */
459 	struct r600_resource	*buf_filled_size;
460 	unsigned		buf_filled_size_offset;
461 	bool			buf_filled_size_valid;
462 
463 	unsigned		stride_in_dw;
464 };
465 
466 struct r600_streamout {
467 	struct r600_atom		begin_atom;
468 	bool				begin_emitted;
469 	unsigned			num_dw_for_end;
470 
471 	unsigned			enabled_mask;
472 	unsigned			num_targets;
473 	struct r600_so_target		*targets[PIPE_MAX_SO_BUFFERS];
474 
475 	unsigned			append_bitmask;
476 	bool				suspended;
477 
478 	/* External state which comes from the vertex shader,
479 	 * it must be set explicitly when binding a shader. */
480 	unsigned			*stride_in_dw;
481 	unsigned			enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
482 
483 	/* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
484 	unsigned			hw_enabled_mask;
485 
486 	/* The state of VGT_STRMOUT_(CONFIG|EN). */
487 	struct r600_atom		enable_atom;
488 	bool				streamout_enabled;
489 	bool				prims_gen_query_enabled;
490 	int				num_prims_gen_queries;
491 };
492 
493 struct r600_signed_scissor {
494 	int minx;
495 	int miny;
496 	int maxx;
497 	int maxy;
498 };
499 
500 struct r600_scissors {
501 	struct r600_atom		atom;
502 	unsigned			dirty_mask;
503 	struct pipe_scissor_state	states[R600_MAX_VIEWPORTS];
504 };
505 
506 struct r600_viewports {
507 	struct r600_atom		atom;
508 	unsigned			dirty_mask;
509 	unsigned			depth_range_dirty_mask;
510 	struct pipe_viewport_state	states[R600_MAX_VIEWPORTS];
511 	struct r600_signed_scissor	as_scissor[R600_MAX_VIEWPORTS];
512 };
513 
514 struct r600_ring {
515 	struct radeon_winsys_cs		*cs;
516 	void (*flush)(void *ctx, unsigned flags,
517 		      struct pipe_fence_handle **fence);
518 };
519 
520 /* Saved CS data for debugging features. */
521 struct radeon_saved_cs {
522 	uint32_t			*ib;
523 	unsigned			num_dw;
524 
525 	struct radeon_bo_list_item	*bo_list;
526 	unsigned			bo_count;
527 };
528 
529 struct r600_common_context {
530 	struct pipe_context b; /* base class */
531 
532 	struct r600_common_screen	*screen;
533 	struct radeon_winsys		*ws;
534 	struct radeon_winsys_ctx	*ctx;
535 	enum radeon_family		family;
536 	enum chip_class			chip_class;
537 	struct r600_ring		gfx;
538 	struct r600_ring		dma;
539 	struct pipe_fence_handle	*last_gfx_fence;
540 	struct pipe_fence_handle	*last_sdma_fence;
541 	unsigned			num_gfx_cs_flushes;
542 	unsigned			initial_gfx_cs_size;
543 	unsigned			gpu_reset_counter;
544 	unsigned			last_dirty_fb_counter;
545 	unsigned			last_compressed_colortex_counter;
546 	unsigned			last_dirty_tex_descriptor_counter;
547 
548 	struct u_upload_mgr		*uploader;
549 	struct u_suballocator		*allocator_zeroed_memory;
550 	struct slab_child_pool		pool_transfers;
551 
552 	/* Current unaccounted memory usage. */
553 	uint64_t			vram;
554 	uint64_t			gtt;
555 
556 	/* States. */
557 	struct r600_streamout		streamout;
558 	struct r600_scissors		scissors;
559 	struct r600_viewports		viewports;
560 	bool				scissor_enabled;
561 	bool				clip_halfz;
562 	bool				vs_writes_viewport_index;
563 	bool				vs_disables_clipping_viewport;
564 
565 	/* Additional context states. */
566 	unsigned flags; /* flush flags */
567 
568 	/* Queries. */
569 	/* Maintain the list of active queries for pausing between IBs. */
570 	int				num_occlusion_queries;
571 	int				num_perfect_occlusion_queries;
572 	struct list_head		active_queries;
573 	unsigned			num_cs_dw_queries_suspend;
574 	/* Additional hardware info. */
575 	unsigned			backend_mask;
576 	unsigned			max_db; /* for OQ */
577 	/* Misc stats. */
578 	unsigned			num_draw_calls;
579 	unsigned			num_spill_draw_calls;
580 	unsigned			num_compute_calls;
581 	unsigned			num_spill_compute_calls;
582 	unsigned			num_dma_calls;
583 	unsigned			num_cp_dma_calls;
584 	unsigned			num_vs_flushes;
585 	unsigned			num_ps_flushes;
586 	unsigned			num_cs_flushes;
587 	unsigned			num_fb_cache_flushes;
588 	unsigned			num_L2_invalidates;
589 	unsigned			num_L2_writebacks;
590 	uint64_t			num_alloc_tex_transfer_bytes;
591 	unsigned			last_tex_ps_draw_ratio; /* for query */
592 
593 	/* Render condition. */
594 	struct r600_atom		render_cond_atom;
595 	struct pipe_query		*render_cond;
596 	unsigned			render_cond_mode;
597 	bool				render_cond_invert;
598 	bool				render_cond_force_off; /* for u_blitter */
599 
600 	/* MSAA sample locations.
601 	 * The first index is the sample index.
602 	 * The second index is the coordinate: X, Y. */
603 	float				sample_locations_1x[1][2];
604 	float				sample_locations_2x[2][2];
605 	float				sample_locations_4x[4][2];
606 	float				sample_locations_8x[8][2];
607 	float				sample_locations_16x[16][2];
608 
609 	/* Statistics gathering for the DCC enablement heuristic. It can't be
610 	 * in r600_texture because r600_texture can be shared by multiple
611 	 * contexts. This is for back buffers only. We shouldn't get too many
612 	 * of those.
613 	 *
614 	 * X11 DRI3 rotates among a finite set of back buffers. They should
615 	 * all fit in this array. If they don't, separate DCC might never be
616 	 * enabled by DCC stat gathering.
617 	 */
618 	struct {
619 		struct r600_texture		*tex;
620 		/* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */
621 		struct pipe_query		*ps_stats[3];
622 		/* If all slots are used and another slot is needed,
623 		 * the least recently used slot is evicted based on this. */
624 		int64_t				last_use_timestamp;
625 		bool				query_active;
626 	} dcc_stats[5];
627 
628 	struct pipe_debug_callback	debug;
629 	struct pipe_device_reset_callback device_reset_callback;
630 
631 	void				*query_result_shader;
632 
633 	/* Copy one resource to another using async DMA. */
634 	void (*dma_copy)(struct pipe_context *ctx,
635 			 struct pipe_resource *dst,
636 			 unsigned dst_level,
637 			 unsigned dst_x, unsigned dst_y, unsigned dst_z,
638 			 struct pipe_resource *src,
639 			 unsigned src_level,
640 			 const struct pipe_box *src_box);
641 
642 	void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
643 				 uint64_t offset, uint64_t size, unsigned value);
644 
645 	void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
646 			     uint64_t offset, uint64_t size, unsigned value,
647 			     enum r600_coherency coher);
648 
649 	void (*blit_decompress_depth)(struct pipe_context *ctx,
650 				      struct r600_texture *texture,
651 				      struct r600_texture *staging,
652 				      unsigned first_level, unsigned last_level,
653 				      unsigned first_layer, unsigned last_layer,
654 				      unsigned first_sample, unsigned last_sample);
655 
656 	void (*decompress_dcc)(struct pipe_context *ctx,
657 			       struct r600_texture *rtex);
658 
659 	/* Reallocate the buffer and update all resource bindings where
660 	 * the buffer is bound, including all resource descriptors. */
661 	void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf);
662 
663 	/* Enable or disable occlusion queries. */
664 	void (*set_occlusion_query_state)(struct pipe_context *ctx, bool enable);
665 
666 	void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st);
667 
668 	/* This ensures there is enough space in the command stream. */
669 	void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
670 				  bool include_draw_vbo);
671 
672 	void (*set_atom_dirty)(struct r600_common_context *ctx,
673 			       struct r600_atom *atom, bool dirty);
674 
675 	void (*check_vm_faults)(struct r600_common_context *ctx,
676 				struct radeon_saved_cs *saved,
677 				enum ring_type ring);
678 };
679 
680 /* r600_buffer.c */
681 bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
682 				     struct pb_buffer *buf,
683 				     enum radeon_bo_usage usage);
684 void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
685                                       struct r600_resource *resource,
686                                       unsigned usage);
687 void r600_buffer_subdata(struct pipe_context *ctx,
688 			 struct pipe_resource *buffer,
689 			 unsigned usage, unsigned offset,
690 			 unsigned size, const void *data);
691 void r600_init_resource_fields(struct r600_common_screen *rscreen,
692 			       struct r600_resource *res,
693 			       uint64_t size, unsigned alignment);
694 bool r600_alloc_resource(struct r600_common_screen *rscreen,
695 			 struct r600_resource *res);
696 struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
697 					 const struct pipe_resource *templ,
698 					 unsigned alignment);
699 struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
700 						  unsigned bind,
701 						  unsigned usage,
702 						  unsigned size,
703 						  unsigned alignment);
704 struct pipe_resource *
705 r600_buffer_from_user_memory(struct pipe_screen *screen,
706 			     const struct pipe_resource *templ,
707 			     void *user_memory);
708 void
709 r600_invalidate_resource(struct pipe_context *ctx,
710 			 struct pipe_resource *resource);
711 
712 /* r600_common_pipe.c */
713 void r600_gfx_write_event_eop(struct r600_common_context *ctx,
714 			      unsigned event, unsigned event_flags,
715 			      unsigned data_sel,
716 			      struct r600_resource *buf, uint64_t va,
717 			      uint32_t old_fence, uint32_t new_fence);
718 unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
719 void r600_gfx_wait_fence(struct r600_common_context *ctx,
720 			 uint64_t va, uint32_t ref, uint32_t mask);
721 void r600_draw_rectangle(struct blitter_context *blitter,
722 			 int x1, int y1, int x2, int y2, float depth,
723 			 enum blitter_attrib_type type,
724 			 const union pipe_color_union *attrib);
725 bool r600_common_screen_init(struct r600_common_screen *rscreen,
726 			     struct radeon_winsys *ws);
727 void r600_destroy_common_screen(struct r600_common_screen *rscreen);
728 void r600_preflush_suspend_features(struct r600_common_context *ctx);
729 void r600_postflush_resume_features(struct r600_common_context *ctx);
730 bool r600_common_context_init(struct r600_common_context *rctx,
731 			      struct r600_common_screen *rscreen,
732 			      unsigned context_flags);
733 void r600_common_context_cleanup(struct r600_common_context *rctx);
734 bool r600_can_dump_shader(struct r600_common_screen *rscreen,
735 			  unsigned processor);
736 bool r600_extra_shader_checks(struct r600_common_screen *rscreen,
737 			      unsigned processor);
738 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
739 			      uint64_t offset, uint64_t size, unsigned value);
740 struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
741 						  const struct pipe_resource *templ);
742 const char *r600_get_llvm_processor_name(enum radeon_family family);
743 void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
744 			 struct r600_resource *dst, struct r600_resource *src);
745 void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
746 		    struct radeon_saved_cs *saved);
747 void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
748 bool r600_check_device_reset(struct r600_common_context *rctx);
749 
750 /* r600_gpu_load.c */
751 void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
752 uint64_t r600_begin_counter_spi(struct r600_common_screen *rscreen);
753 unsigned r600_end_counter_spi(struct r600_common_screen *rscreen, uint64_t begin);
754 uint64_t r600_begin_counter_gui(struct r600_common_screen *rscreen);
755 unsigned r600_end_counter_gui(struct r600_common_screen *rscreen, uint64_t begin);
756 
757 /* r600_perfcounters.c */
758 void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
759 
760 /* r600_query.c */
761 void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
762 void r600_query_init(struct r600_common_context *rctx);
763 void r600_suspend_queries(struct r600_common_context *ctx);
764 void r600_resume_queries(struct r600_common_context *ctx);
765 void r600_query_init_backend_mask(struct r600_common_context *ctx);
766 
767 /* r600_streamout.c */
768 void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
769 void r600_set_streamout_targets(struct pipe_context *ctx,
770 				unsigned num_targets,
771 				struct pipe_stream_output_target **targets,
772 				const unsigned *offset);
773 void r600_emit_streamout_end(struct r600_common_context *rctx);
774 void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
775 					     unsigned type, int diff);
776 void r600_streamout_init(struct r600_common_context *rctx);
777 
778 /* r600_test_dma.c */
779 void r600_test_dma(struct r600_common_screen *rscreen);
780 
781 /* r600_texture.c */
782 bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
783 				struct r600_texture *rdst,
784 				unsigned dst_level, unsigned dstx,
785 				unsigned dsty, unsigned dstz,
786 				struct r600_texture *rsrc,
787 				unsigned src_level,
788 				const struct pipe_box *src_box);
789 void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
790 				 struct r600_texture *rtex,
791 				 unsigned nr_samples,
792 				 struct r600_fmask_info *out);
793 void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
794 				 struct r600_texture *rtex,
795 				 struct r600_cmask_info *out);
796 bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
797 				     struct pipe_resource *texture,
798 				     struct r600_texture **staging);
799 void r600_print_texture_info(struct r600_texture *rtex, FILE *f);
800 struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
801 					const struct pipe_resource *templ);
802 bool vi_dcc_formats_compatible(enum pipe_format format1,
803 			       enum pipe_format format2);
804 void vi_dcc_disable_if_incompatible_format(struct r600_common_context *rctx,
805 					   struct pipe_resource *tex,
806 					   unsigned level,
807 					   enum pipe_format view_format);
808 struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
809 						struct pipe_resource *texture,
810 						const struct pipe_surface *templ,
811 						unsigned width, unsigned height);
812 unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap);
813 void vi_separate_dcc_start_query(struct pipe_context *ctx,
814 				 struct r600_texture *tex);
815 void vi_separate_dcc_stop_query(struct pipe_context *ctx,
816 				struct r600_texture *tex);
817 void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
818 					     struct r600_texture *tex);
819 void vi_dcc_clear_level(struct r600_common_context *rctx,
820 			struct r600_texture *rtex,
821 			unsigned level, unsigned clear_value);
822 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
823 				   struct pipe_framebuffer_state *fb,
824 				   struct r600_atom *fb_state,
825 				   unsigned *buffers, unsigned *dirty_cbufs,
826 				   const union pipe_color_union *color);
827 bool r600_texture_disable_dcc(struct r600_common_context *rctx,
828 			      struct r600_texture *rtex);
829 void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
830 void r600_init_context_texture_functions(struct r600_common_context *rctx);
831 
832 /* r600_viewport.c */
833 void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
834 					    struct pipe_scissor_state *scissor);
835 void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
836 				 bool scissor_enable, bool clip_halfz);
837 void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
838 					  struct tgsi_shader_info *info);
839 void r600_init_viewport_functions(struct r600_common_context *rctx);
840 
841 /* cayman_msaa.c */
842 extern const uint32_t eg_sample_locs_2x[4];
843 extern const unsigned eg_max_dist_2x;
844 extern const uint32_t eg_sample_locs_4x[4];
845 extern const unsigned eg_max_dist_4x;
846 void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
847 				unsigned sample_index, float *out_value);
848 void cayman_init_msaa(struct pipe_context *ctx);
849 void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
850 void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
851 			     int ps_iter_samples, int overrast_samples,
852 			     unsigned sc_mode_cntl_1);
853 
854 
855 /* Inline helpers. */
856 
r600_resource(struct pipe_resource * r)857 static inline struct r600_resource *r600_resource(struct pipe_resource *r)
858 {
859 	return (struct r600_resource*)r;
860 }
861 
862 static inline void
r600_resource_reference(struct r600_resource ** ptr,struct r600_resource * res)863 r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
864 {
865 	pipe_resource_reference((struct pipe_resource **)ptr,
866 				(struct pipe_resource *)res);
867 }
868 
869 static inline void
r600_texture_reference(struct r600_texture ** ptr,struct r600_texture * res)870 r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res)
871 {
872 	pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b);
873 }
874 
875 static inline void
r600_context_add_resource_size(struct pipe_context * ctx,struct pipe_resource * r)876 r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
877 {
878 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
879 	struct r600_resource *res = (struct r600_resource *)r;
880 
881 	if (res) {
882 		/* Add memory usage for need_gfx_cs_space */
883 		rctx->vram += res->vram_usage;
884 		rctx->gtt += res->gart_usage;
885 	}
886 }
887 
r600_get_strmout_en(struct r600_common_context * rctx)888 static inline bool r600_get_strmout_en(struct r600_common_context *rctx)
889 {
890 	return rctx->streamout.streamout_enabled ||
891 	       rctx->streamout.prims_gen_query_enabled;
892 }
893 
894 #define     SQ_TEX_XY_FILTER_POINT                         0x00
895 #define     SQ_TEX_XY_FILTER_BILINEAR                      0x01
896 #define     SQ_TEX_XY_FILTER_ANISO_POINT                   0x02
897 #define     SQ_TEX_XY_FILTER_ANISO_BILINEAR                0x03
898 
eg_tex_filter(unsigned filter,unsigned max_aniso)899 static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso)
900 {
901 	if (filter == PIPE_TEX_FILTER_LINEAR)
902 		return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR
903 				     : SQ_TEX_XY_FILTER_BILINEAR;
904 	else
905 		return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT
906 				     : SQ_TEX_XY_FILTER_POINT;
907 }
908 
r600_tex_aniso_filter(unsigned filter)909 static inline unsigned r600_tex_aniso_filter(unsigned filter)
910 {
911 	if (filter < 2)
912 		return 0;
913 	if (filter < 4)
914 		return 1;
915 	if (filter < 8)
916 		return 2;
917 	if (filter < 16)
918 		return 3;
919 	return 4;
920 }
921 
r600_wavefront_size(enum radeon_family family)922 static inline unsigned r600_wavefront_size(enum radeon_family family)
923 {
924 	switch (family) {
925 	case CHIP_RV610:
926 	case CHIP_RS780:
927 	case CHIP_RV620:
928 	case CHIP_RS880:
929 		return 16;
930 	case CHIP_RV630:
931 	case CHIP_RV635:
932 	case CHIP_RV730:
933 	case CHIP_RV710:
934 	case CHIP_PALM:
935 	case CHIP_CEDAR:
936 		return 32;
937 	default:
938 		return 64;
939 	}
940 }
941 
942 static inline enum radeon_bo_priority
r600_get_sampler_view_priority(struct r600_resource * res)943 r600_get_sampler_view_priority(struct r600_resource *res)
944 {
945 	if (res->b.b.target == PIPE_BUFFER)
946 		return RADEON_PRIO_SAMPLER_BUFFER;
947 
948 	if (res->b.b.nr_samples > 1)
949 		return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
950 
951 	return RADEON_PRIO_SAMPLER_TEXTURE;
952 }
953 
954 static inline bool
r600_can_sample_zs(struct r600_texture * tex,bool stencil_sampler)955 r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler)
956 {
957 	return (stencil_sampler && tex->can_sample_s) ||
958 	       (!stencil_sampler && tex->can_sample_z);
959 }
960 
961 #define COMPUTE_DBG(rscreen, fmt, args...) \
962 	do { \
963 		if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
964 	} while (0);
965 
966 #define R600_ERR(fmt, args...) \
967 	fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
968 
969 /* For MSAA sample positions. */
970 #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
971 	(((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) |		   \
972 	(((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) |	   \
973 	(((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) |	   \
974 	 (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
975 
976 #endif
977