• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2009 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *     Alex Deucher <alexander.deucher@amd.com>
25  *
26  * ------------------------ This file is DEPRECATED! -------------------------
27  */
28 #include <drm/drmP.h>
29 #include <drm/radeon_drm.h>
30 #include "radeon_drv.h"
31 
32 #include "r600_blit_shaders.h"
33 
34 #define DI_PT_RECTLIST        0x11
35 #define DI_INDEX_SIZE_16_BIT  0x0
36 #define DI_SRC_SEL_AUTO_INDEX 0x2
37 
38 #define FMT_8                 0x1
39 #define FMT_5_6_5             0x8
40 #define FMT_8_8_8_8           0x1a
41 #define COLOR_8               0x1
42 #define COLOR_5_6_5           0x8
43 #define COLOR_8_8_8_8         0x1a
44 
45 static void
set_render_target(drm_radeon_private_t * dev_priv,int format,int w,int h,u64 gpu_addr)46 set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
47 {
48 	u32 cb_color_info;
49 	int pitch, slice;
50 	RING_LOCALS;
51 	DRM_DEBUG("\n");
52 
53 	h = ALIGN(h, 8);
54 	if (h < 8)
55 		h = 8;
56 
57 	cb_color_info = ((format << 2) | (1 << 27));
58 	pitch = (w / 8) - 1;
59 	slice = ((w * h) / 64) - 1;
60 
61 	if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
62 	    ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
63 		BEGIN_RING(21 + 2);
64 		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
65 		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
66 		OUT_RING(gpu_addr >> 8);
67 		OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
68 		OUT_RING(2 << 0);
69 	} else {
70 		BEGIN_RING(21);
71 		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
72 		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
73 		OUT_RING(gpu_addr >> 8);
74 	}
75 
76 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
77 	OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
78 	OUT_RING((pitch << 0) | (slice << 10));
79 
80 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
81 	OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
82 	OUT_RING(0);
83 
84 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
85 	OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
86 	OUT_RING(cb_color_info);
87 
88 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
89 	OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
90 	OUT_RING(0);
91 
92 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
93 	OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
94 	OUT_RING(0);
95 
96 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
97 	OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
98 	OUT_RING(0);
99 
100 	ADVANCE_RING();
101 }
102 
103 static void
cp_set_surface_sync(drm_radeon_private_t * dev_priv,u32 sync_type,u32 size,u64 mc_addr)104 cp_set_surface_sync(drm_radeon_private_t *dev_priv,
105 		    u32 sync_type, u32 size, u64 mc_addr)
106 {
107 	u32 cp_coher_size;
108 	RING_LOCALS;
109 	DRM_DEBUG("\n");
110 
111 	if (size == 0xffffffff)
112 		cp_coher_size = 0xffffffff;
113 	else
114 		cp_coher_size = ((size + 255) >> 8);
115 
116 	BEGIN_RING(5);
117 	OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
118 	OUT_RING(sync_type);
119 	OUT_RING(cp_coher_size);
120 	OUT_RING((mc_addr >> 8));
121 	OUT_RING(10); /* poll interval */
122 	ADVANCE_RING();
123 }
124 
125 static void
set_shaders(struct drm_device * dev)126 set_shaders(struct drm_device *dev)
127 {
128 	drm_radeon_private_t *dev_priv = dev->dev_private;
129 	u64 gpu_addr;
130 	int i;
131 	u32 *vs, *ps;
132 	uint32_t sq_pgm_resources;
133 	RING_LOCALS;
134 	DRM_DEBUG("\n");
135 
136 	/* load shaders */
137 	vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
138 	ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
139 
140 	for (i = 0; i < r6xx_vs_size; i++)
141 		vs[i] = cpu_to_le32(r6xx_vs[i]);
142 	for (i = 0; i < r6xx_ps_size; i++)
143 		ps[i] = cpu_to_le32(r6xx_ps[i]);
144 
145 	dev_priv->blit_vb->used = 512;
146 
147 	gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
148 
149 	/* setup shader regs */
150 	sq_pgm_resources = (1 << 0);
151 
152 	BEGIN_RING(9 + 12);
153 	/* VS */
154 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
155 	OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
156 	OUT_RING(gpu_addr >> 8);
157 
158 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
159 	OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
160 	OUT_RING(sq_pgm_resources);
161 
162 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
163 	OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
164 	OUT_RING(0);
165 
166 	/* PS */
167 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
168 	OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
169 	OUT_RING((gpu_addr + 256) >> 8);
170 
171 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
172 	OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
173 	OUT_RING(sq_pgm_resources | (1 << 28));
174 
175 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
176 	OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
177 	OUT_RING(2);
178 
179 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
180 	OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
181 	OUT_RING(0);
182 	ADVANCE_RING();
183 
184 	cp_set_surface_sync(dev_priv,
185 			    R600_SH_ACTION_ENA, 512, gpu_addr);
186 }
187 
188 static void
set_vtx_resource(drm_radeon_private_t * dev_priv,u64 gpu_addr)189 set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
190 {
191 	uint32_t sq_vtx_constant_word2;
192 	RING_LOCALS;
193 	DRM_DEBUG("\n");
194 
195 	sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
196 #ifdef __BIG_ENDIAN
197 	sq_vtx_constant_word2 |= (2 << 30);
198 #endif
199 
200 	BEGIN_RING(9);
201 	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
202 	OUT_RING(0x460);
203 	OUT_RING(gpu_addr & 0xffffffff);
204 	OUT_RING(48 - 1);
205 	OUT_RING(sq_vtx_constant_word2);
206 	OUT_RING(1 << 0);
207 	OUT_RING(0);
208 	OUT_RING(0);
209 	OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
210 	ADVANCE_RING();
211 
212 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
213 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
214 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
215 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
216 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
217 		cp_set_surface_sync(dev_priv,
218 				    R600_TC_ACTION_ENA, 48, gpu_addr);
219 	else
220 		cp_set_surface_sync(dev_priv,
221 				    R600_VC_ACTION_ENA, 48, gpu_addr);
222 }
223 
224 static void
set_tex_resource(drm_radeon_private_t * dev_priv,int format,int w,int h,int pitch,u64 gpu_addr)225 set_tex_resource(drm_radeon_private_t *dev_priv,
226 		 int format, int w, int h, int pitch, u64 gpu_addr)
227 {
228 	uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
229 	RING_LOCALS;
230 	DRM_DEBUG("\n");
231 
232 	if (h < 1)
233 		h = 1;
234 
235 	sq_tex_resource_word0 = (1 << 0);
236 	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
237 				  ((w - 1) << 19));
238 
239 	sq_tex_resource_word1 = (format << 26);
240 	sq_tex_resource_word1 |= ((h - 1) << 0);
241 
242 	sq_tex_resource_word4 = ((1 << 14) |
243 				 (0 << 16) |
244 				 (1 << 19) |
245 				 (2 << 22) |
246 				 (3 << 25));
247 
248 	BEGIN_RING(9);
249 	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
250 	OUT_RING(0);
251 	OUT_RING(sq_tex_resource_word0);
252 	OUT_RING(sq_tex_resource_word1);
253 	OUT_RING(gpu_addr >> 8);
254 	OUT_RING(gpu_addr >> 8);
255 	OUT_RING(sq_tex_resource_word4);
256 	OUT_RING(0);
257 	OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
258 	ADVANCE_RING();
259 
260 }
261 
262 static void
set_scissors(drm_radeon_private_t * dev_priv,int x1,int y1,int x2,int y2)263 set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
264 {
265 	RING_LOCALS;
266 	DRM_DEBUG("\n");
267 
268 	BEGIN_RING(12);
269 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
270 	OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
271 	OUT_RING((x1 << 0) | (y1 << 16));
272 	OUT_RING((x2 << 0) | (y2 << 16));
273 
274 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
275 	OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
276 	OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
277 	OUT_RING((x2 << 0) | (y2 << 16));
278 
279 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
280 	OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
281 	OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
282 	OUT_RING((x2 << 0) | (y2 << 16));
283 	ADVANCE_RING();
284 }
285 
286 static void
draw_auto(drm_radeon_private_t * dev_priv)287 draw_auto(drm_radeon_private_t *dev_priv)
288 {
289 	RING_LOCALS;
290 	DRM_DEBUG("\n");
291 
292 	BEGIN_RING(10);
293 	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
294 	OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
295 	OUT_RING(DI_PT_RECTLIST);
296 
297 	OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
298 #ifdef __BIG_ENDIAN
299 	OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT);
300 #else
301 	OUT_RING(DI_INDEX_SIZE_16_BIT);
302 #endif
303 
304 	OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
305 	OUT_RING(1);
306 
307 	OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
308 	OUT_RING(3);
309 	OUT_RING(DI_SRC_SEL_AUTO_INDEX);
310 
311 	ADVANCE_RING();
312 	COMMIT_RING();
313 }
314 
315 static void
set_default_state(drm_radeon_private_t * dev_priv)316 set_default_state(drm_radeon_private_t *dev_priv)
317 {
318 	int i;
319 	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
320 	u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
321 	int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
322 	int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
323 	int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
324 	RING_LOCALS;
325 
326 	switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
327 	case CHIP_R600:
328 		num_ps_gprs = 192;
329 		num_vs_gprs = 56;
330 		num_temp_gprs = 4;
331 		num_gs_gprs = 0;
332 		num_es_gprs = 0;
333 		num_ps_threads = 136;
334 		num_vs_threads = 48;
335 		num_gs_threads = 4;
336 		num_es_threads = 4;
337 		num_ps_stack_entries = 128;
338 		num_vs_stack_entries = 128;
339 		num_gs_stack_entries = 0;
340 		num_es_stack_entries = 0;
341 		break;
342 	case CHIP_RV630:
343 	case CHIP_RV635:
344 		num_ps_gprs = 84;
345 		num_vs_gprs = 36;
346 		num_temp_gprs = 4;
347 		num_gs_gprs = 0;
348 		num_es_gprs = 0;
349 		num_ps_threads = 144;
350 		num_vs_threads = 40;
351 		num_gs_threads = 4;
352 		num_es_threads = 4;
353 		num_ps_stack_entries = 40;
354 		num_vs_stack_entries = 40;
355 		num_gs_stack_entries = 32;
356 		num_es_stack_entries = 16;
357 		break;
358 	case CHIP_RV610:
359 	case CHIP_RV620:
360 	case CHIP_RS780:
361 	case CHIP_RS880:
362 	default:
363 		num_ps_gprs = 84;
364 		num_vs_gprs = 36;
365 		num_temp_gprs = 4;
366 		num_gs_gprs = 0;
367 		num_es_gprs = 0;
368 		num_ps_threads = 136;
369 		num_vs_threads = 48;
370 		num_gs_threads = 4;
371 		num_es_threads = 4;
372 		num_ps_stack_entries = 40;
373 		num_vs_stack_entries = 40;
374 		num_gs_stack_entries = 32;
375 		num_es_stack_entries = 16;
376 		break;
377 	case CHIP_RV670:
378 		num_ps_gprs = 144;
379 		num_vs_gprs = 40;
380 		num_temp_gprs = 4;
381 		num_gs_gprs = 0;
382 		num_es_gprs = 0;
383 		num_ps_threads = 136;
384 		num_vs_threads = 48;
385 		num_gs_threads = 4;
386 		num_es_threads = 4;
387 		num_ps_stack_entries = 40;
388 		num_vs_stack_entries = 40;
389 		num_gs_stack_entries = 32;
390 		num_es_stack_entries = 16;
391 		break;
392 	case CHIP_RV770:
393 		num_ps_gprs = 192;
394 		num_vs_gprs = 56;
395 		num_temp_gprs = 4;
396 		num_gs_gprs = 0;
397 		num_es_gprs = 0;
398 		num_ps_threads = 188;
399 		num_vs_threads = 60;
400 		num_gs_threads = 0;
401 		num_es_threads = 0;
402 		num_ps_stack_entries = 256;
403 		num_vs_stack_entries = 256;
404 		num_gs_stack_entries = 0;
405 		num_es_stack_entries = 0;
406 		break;
407 	case CHIP_RV730:
408 	case CHIP_RV740:
409 		num_ps_gprs = 84;
410 		num_vs_gprs = 36;
411 		num_temp_gprs = 4;
412 		num_gs_gprs = 0;
413 		num_es_gprs = 0;
414 		num_ps_threads = 188;
415 		num_vs_threads = 60;
416 		num_gs_threads = 0;
417 		num_es_threads = 0;
418 		num_ps_stack_entries = 128;
419 		num_vs_stack_entries = 128;
420 		num_gs_stack_entries = 0;
421 		num_es_stack_entries = 0;
422 		break;
423 	case CHIP_RV710:
424 		num_ps_gprs = 192;
425 		num_vs_gprs = 56;
426 		num_temp_gprs = 4;
427 		num_gs_gprs = 0;
428 		num_es_gprs = 0;
429 		num_ps_threads = 144;
430 		num_vs_threads = 48;
431 		num_gs_threads = 0;
432 		num_es_threads = 0;
433 		num_ps_stack_entries = 128;
434 		num_vs_stack_entries = 128;
435 		num_gs_stack_entries = 0;
436 		num_es_stack_entries = 0;
437 		break;
438 	}
439 
440 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
441 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
442 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
443 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
444 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
445 		sq_config = 0;
446 	else
447 		sq_config = R600_VC_ENABLE;
448 
449 	sq_config |= (R600_DX9_CONSTS |
450 		      R600_ALU_INST_PREFER_VECTOR |
451 		      R600_PS_PRIO(0) |
452 		      R600_VS_PRIO(1) |
453 		      R600_GS_PRIO(2) |
454 		      R600_ES_PRIO(3));
455 
456 	sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
457 				  R600_NUM_VS_GPRS(num_vs_gprs) |
458 				  R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
459 	sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
460 				  R600_NUM_ES_GPRS(num_es_gprs));
461 	sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
462 				   R600_NUM_VS_THREADS(num_vs_threads) |
463 				   R600_NUM_GS_THREADS(num_gs_threads) |
464 				   R600_NUM_ES_THREADS(num_es_threads));
465 	sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
466 				    R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
467 	sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
468 				    R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
469 
470 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
471 		BEGIN_RING(r7xx_default_size + 10);
472 		for (i = 0; i < r7xx_default_size; i++)
473 			OUT_RING(r7xx_default_state[i]);
474 	} else {
475 		BEGIN_RING(r6xx_default_size + 10);
476 		for (i = 0; i < r6xx_default_size; i++)
477 			OUT_RING(r6xx_default_state[i]);
478 	}
479 	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
480 	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
481 	/* SQ config */
482 	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
483 	OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
484 	OUT_RING(sq_config);
485 	OUT_RING(sq_gpr_resource_mgmt_1);
486 	OUT_RING(sq_gpr_resource_mgmt_2);
487 	OUT_RING(sq_thread_resource_mgmt);
488 	OUT_RING(sq_stack_resource_mgmt_1);
489 	OUT_RING(sq_stack_resource_mgmt_2);
490 	ADVANCE_RING();
491 }
492 
r600_nomm_get_vb(struct drm_device * dev)493 static int r600_nomm_get_vb(struct drm_device *dev)
494 {
495 	drm_radeon_private_t *dev_priv = dev->dev_private;
496 	dev_priv->blit_vb = radeon_freelist_get(dev);
497 	if (!dev_priv->blit_vb) {
498 		DRM_ERROR("Unable to allocate vertex buffer for blit\n");
499 		return -EAGAIN;
500 	}
501 	return 0;
502 }
503 
r600_nomm_put_vb(struct drm_device * dev)504 static void r600_nomm_put_vb(struct drm_device *dev)
505 {
506 	drm_radeon_private_t *dev_priv = dev->dev_private;
507 
508 	dev_priv->blit_vb->used = 0;
509 	radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
510 }
511 
r600_nomm_get_vb_ptr(struct drm_device * dev)512 static void *r600_nomm_get_vb_ptr(struct drm_device *dev)
513 {
514 	drm_radeon_private_t *dev_priv = dev->dev_private;
515 	return (((char *)dev->agp_buffer_map->handle +
516 		 dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
517 }
518 
519 int
r600_prepare_blit_copy(struct drm_device * dev,struct drm_file * file_priv)520 r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
521 {
522 	drm_radeon_private_t *dev_priv = dev->dev_private;
523 	int ret;
524 	DRM_DEBUG("\n");
525 
526 	ret = r600_nomm_get_vb(dev);
527 	if (ret)
528 		return ret;
529 
530 	dev_priv->blit_vb->file_priv = file_priv;
531 
532 	set_default_state(dev_priv);
533 	set_shaders(dev);
534 
535 	return 0;
536 }
537 
538 
539 void
r600_done_blit_copy(struct drm_device * dev)540 r600_done_blit_copy(struct drm_device *dev)
541 {
542 	drm_radeon_private_t *dev_priv = dev->dev_private;
543 	RING_LOCALS;
544 	DRM_DEBUG("\n");
545 
546 	BEGIN_RING(5);
547 	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
548 	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
549 	/* wait for 3D idle clean */
550 	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
551 	OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
552 	OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
553 
554 	ADVANCE_RING();
555 	COMMIT_RING();
556 
557 	r600_nomm_put_vb(dev);
558 }
559 
560 void
r600_blit_copy(struct drm_device * dev,uint64_t src_gpu_addr,uint64_t dst_gpu_addr,int size_bytes)561 r600_blit_copy(struct drm_device *dev,
562 	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
563 	       int size_bytes)
564 {
565 	drm_radeon_private_t *dev_priv = dev->dev_private;
566 	int max_bytes;
567 	u64 vb_addr;
568 	u32 *vb;
569 
570 	vb = r600_nomm_get_vb_ptr(dev);
571 
572 	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
573 		max_bytes = 8192;
574 
575 		while (size_bytes) {
576 			int cur_size = size_bytes;
577 			int src_x = src_gpu_addr & 255;
578 			int dst_x = dst_gpu_addr & 255;
579 			int h = 1;
580 			src_gpu_addr = src_gpu_addr & ~255;
581 			dst_gpu_addr = dst_gpu_addr & ~255;
582 
583 			if (!src_x && !dst_x) {
584 				h = (cur_size / max_bytes);
585 				if (h > 8192)
586 					h = 8192;
587 				if (h == 0)
588 					h = 1;
589 				else
590 					cur_size = max_bytes;
591 			} else {
592 				if (cur_size > max_bytes)
593 					cur_size = max_bytes;
594 				if (cur_size > (max_bytes - dst_x))
595 					cur_size = (max_bytes - dst_x);
596 				if (cur_size > (max_bytes - src_x))
597 					cur_size = (max_bytes - src_x);
598 			}
599 
600 			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
601 
602 				r600_nomm_put_vb(dev);
603 				r600_nomm_get_vb(dev);
604 				if (!dev_priv->blit_vb)
605 					return;
606 				set_shaders(dev);
607 				vb = r600_nomm_get_vb_ptr(dev);
608 			}
609 
610 			vb[0] = int2float(dst_x);
611 			vb[1] = 0;
612 			vb[2] = int2float(src_x);
613 			vb[3] = 0;
614 
615 			vb[4] = int2float(dst_x);
616 			vb[5] = int2float(h);
617 			vb[6] = int2float(src_x);
618 			vb[7] = int2float(h);
619 
620 			vb[8] = int2float(dst_x + cur_size);
621 			vb[9] = int2float(h);
622 			vb[10] = int2float(src_x + cur_size);
623 			vb[11] = int2float(h);
624 
625 			/* src */
626 			set_tex_resource(dev_priv, FMT_8,
627 					 src_x + cur_size, h, src_x + cur_size,
628 					 src_gpu_addr);
629 
630 			cp_set_surface_sync(dev_priv,
631 					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
632 
633 			/* dst */
634 			set_render_target(dev_priv, COLOR_8,
635 					  dst_x + cur_size, h,
636 					  dst_gpu_addr);
637 
638 			/* scissors */
639 			set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
640 
641 			/* Vertex buffer setup */
642 			vb_addr = dev_priv->gart_buffers_offset +
643 				dev_priv->blit_vb->offset +
644 				dev_priv->blit_vb->used;
645 			set_vtx_resource(dev_priv, vb_addr);
646 
647 			/* draw */
648 			draw_auto(dev_priv);
649 
650 			cp_set_surface_sync(dev_priv,
651 					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
652 					    cur_size * h, dst_gpu_addr);
653 
654 			vb += 12;
655 			dev_priv->blit_vb->used += 12 * 4;
656 
657 			src_gpu_addr += cur_size * h;
658 			dst_gpu_addr += cur_size * h;
659 			size_bytes -= cur_size * h;
660 		}
661 	} else {
662 		max_bytes = 8192 * 4;
663 
664 		while (size_bytes) {
665 			int cur_size = size_bytes;
666 			int src_x = (src_gpu_addr & 255);
667 			int dst_x = (dst_gpu_addr & 255);
668 			int h = 1;
669 			src_gpu_addr = src_gpu_addr & ~255;
670 			dst_gpu_addr = dst_gpu_addr & ~255;
671 
672 			if (!src_x && !dst_x) {
673 				h = (cur_size / max_bytes);
674 				if (h > 8192)
675 					h = 8192;
676 				if (h == 0)
677 					h = 1;
678 				else
679 					cur_size = max_bytes;
680 			} else {
681 				if (cur_size > max_bytes)
682 					cur_size = max_bytes;
683 				if (cur_size > (max_bytes - dst_x))
684 					cur_size = (max_bytes - dst_x);
685 				if (cur_size > (max_bytes - src_x))
686 					cur_size = (max_bytes - src_x);
687 			}
688 
689 			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
690 				r600_nomm_put_vb(dev);
691 				r600_nomm_get_vb(dev);
692 				if (!dev_priv->blit_vb)
693 					return;
694 
695 				set_shaders(dev);
696 				vb = r600_nomm_get_vb_ptr(dev);
697 			}
698 
699 			vb[0] = int2float(dst_x / 4);
700 			vb[1] = 0;
701 			vb[2] = int2float(src_x / 4);
702 			vb[3] = 0;
703 
704 			vb[4] = int2float(dst_x / 4);
705 			vb[5] = int2float(h);
706 			vb[6] = int2float(src_x / 4);
707 			vb[7] = int2float(h);
708 
709 			vb[8] = int2float((dst_x + cur_size) / 4);
710 			vb[9] = int2float(h);
711 			vb[10] = int2float((src_x + cur_size) / 4);
712 			vb[11] = int2float(h);
713 
714 			/* src */
715 			set_tex_resource(dev_priv, FMT_8_8_8_8,
716 					 (src_x + cur_size) / 4,
717 					 h, (src_x + cur_size) / 4,
718 					 src_gpu_addr);
719 
720 			cp_set_surface_sync(dev_priv,
721 					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
722 
723 			/* dst */
724 			set_render_target(dev_priv, COLOR_8_8_8_8,
725 					  (dst_x + cur_size) / 4, h,
726 					  dst_gpu_addr);
727 
728 			/* scissors */
729 			set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
730 
731 			/* Vertex buffer setup */
732 			vb_addr = dev_priv->gart_buffers_offset +
733 				dev_priv->blit_vb->offset +
734 				dev_priv->blit_vb->used;
735 			set_vtx_resource(dev_priv, vb_addr);
736 
737 			/* draw */
738 			draw_auto(dev_priv);
739 
740 			cp_set_surface_sync(dev_priv,
741 					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
742 					    cur_size * h, dst_gpu_addr);
743 
744 			vb += 12;
745 			dev_priv->blit_vb->used += 12 * 4;
746 
747 			src_gpu_addr += cur_size * h;
748 			dst_gpu_addr += cur_size * h;
749 			size_bytes -= cur_size * h;
750 		}
751 	}
752 }
753 
754 void
r600_blit_swap(struct drm_device * dev,uint64_t src_gpu_addr,uint64_t dst_gpu_addr,int sx,int sy,int dx,int dy,int w,int h,int src_pitch,int dst_pitch,int cpp)755 r600_blit_swap(struct drm_device *dev,
756 	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
757 	       int sx, int sy, int dx, int dy,
758 	       int w, int h, int src_pitch, int dst_pitch, int cpp)
759 {
760 	drm_radeon_private_t *dev_priv = dev->dev_private;
761 	int cb_format, tex_format;
762 	int sx2, sy2, dx2, dy2;
763 	u64 vb_addr;
764 	u32 *vb;
765 
766 	if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
767 
768 		r600_nomm_put_vb(dev);
769 		r600_nomm_get_vb(dev);
770 		if (!dev_priv->blit_vb)
771 			return;
772 
773 		set_shaders(dev);
774 	}
775 	vb = r600_nomm_get_vb_ptr(dev);
776 
777 	sx2 = sx + w;
778 	sy2 = sy + h;
779 	dx2 = dx + w;
780 	dy2 = dy + h;
781 
782 	vb[0] = int2float(dx);
783 	vb[1] = int2float(dy);
784 	vb[2] = int2float(sx);
785 	vb[3] = int2float(sy);
786 
787 	vb[4] = int2float(dx);
788 	vb[5] = int2float(dy2);
789 	vb[6] = int2float(sx);
790 	vb[7] = int2float(sy2);
791 
792 	vb[8] = int2float(dx2);
793 	vb[9] = int2float(dy2);
794 	vb[10] = int2float(sx2);
795 	vb[11] = int2float(sy2);
796 
797 	switch(cpp) {
798 	case 4:
799 		cb_format = COLOR_8_8_8_8;
800 		tex_format = FMT_8_8_8_8;
801 		break;
802 	case 2:
803 		cb_format = COLOR_5_6_5;
804 		tex_format = FMT_5_6_5;
805 		break;
806 	default:
807 		cb_format = COLOR_8;
808 		tex_format = FMT_8;
809 		break;
810 	}
811 
812 	/* src */
813 	set_tex_resource(dev_priv, tex_format,
814 			 src_pitch / cpp,
815 			 sy2, src_pitch / cpp,
816 			 src_gpu_addr);
817 
818 	cp_set_surface_sync(dev_priv,
819 			    R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr);
820 
821 	/* dst */
822 	set_render_target(dev_priv, cb_format,
823 			  dst_pitch / cpp, dy2,
824 			  dst_gpu_addr);
825 
826 	/* scissors */
827 	set_scissors(dev_priv, dx, dy, dx2, dy2);
828 
829 	/* Vertex buffer setup */
830 	vb_addr = dev_priv->gart_buffers_offset +
831 		dev_priv->blit_vb->offset +
832 		dev_priv->blit_vb->used;
833 	set_vtx_resource(dev_priv, vb_addr);
834 
835 	/* draw */
836 	draw_auto(dev_priv);
837 
838 	cp_set_surface_sync(dev_priv,
839 			    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
840 			    dst_pitch * dy2, dst_gpu_addr);
841 
842 	dev_priv->blit_vb->used += 12 * 4;
843 }
844