• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29 
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35 
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39 
radeon_check_and_fixup_offset(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,u32 * offset)40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41 						    dev_priv,
42 						    struct drm_file * file_priv,
43 						    u32 *offset)
44 {
45 	u64 off = *offset;
46 	u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
47 	struct drm_radeon_driver_file_fields *radeon_priv;
48 
49 	/* Hrm ... the story of the offset ... So this function converts
50 	 * the various ideas of what userland clients might have for an
51 	 * offset in the card address space into an offset into the card
52 	 * address space :) So with a sane client, it should just keep
53 	 * the value intact and just do some boundary checking. However,
54 	 * not all clients are sane. Some older clients pass us 0 based
55 	 * offsets relative to the start of the framebuffer and some may
56 	 * assume the AGP aperture it appended to the framebuffer, so we
57 	 * try to detect those cases and fix them up.
58 	 *
59 	 * Note: It might be a good idea here to make sure the offset lands
60 	 * in some "allowed" area to protect things like the PCIE GART...
61 	 */
62 
63 	/* First, the best case, the offset already lands in either the
64 	 * framebuffer or the GART mapped space
65 	 */
66 	if (radeon_check_offset(dev_priv, off))
67 		return 0;
68 
69 	/* Ok, that didn't happen... now check if we have a zero based
70 	 * offset that fits in the framebuffer + gart space, apply the
71 	 * magic offset we get from SETPARAM or calculated from fb_location
72 	 */
73 	if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
74 		radeon_priv = file_priv->driver_priv;
75 		off += radeon_priv->radeon_fb_delta;
76 	}
77 
78 	/* Finally, assume we aimed at a GART offset if beyond the fb */
79 	if (off > fb_end)
80 		off = off - fb_end - 1 + dev_priv->gart_vm_start;
81 
82 	/* Now recheck and fail if out of bounds */
83 	if (radeon_check_offset(dev_priv, off)) {
84 		DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
85 		*offset = off;
86 		return 0;
87 	}
88 	return -EINVAL;
89 }
90 
radeon_check_and_fixup_packets(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,int id,u32 * data)91 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
92 						     dev_priv,
93 						     struct drm_file *file_priv,
94 						     int id, u32 *data)
95 {
96 	switch (id) {
97 
98 	case RADEON_EMIT_PP_MISC:
99 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
100 		    &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
101 			DRM_ERROR("Invalid depth buffer offset\n");
102 			return -EINVAL;
103 		}
104 		break;
105 
106 	case RADEON_EMIT_PP_CNTL:
107 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
108 		    &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
109 			DRM_ERROR("Invalid colour buffer offset\n");
110 			return -EINVAL;
111 		}
112 		break;
113 
114 	case R200_EMIT_PP_TXOFFSET_0:
115 	case R200_EMIT_PP_TXOFFSET_1:
116 	case R200_EMIT_PP_TXOFFSET_2:
117 	case R200_EMIT_PP_TXOFFSET_3:
118 	case R200_EMIT_PP_TXOFFSET_4:
119 	case R200_EMIT_PP_TXOFFSET_5:
120 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
121 						  &data[0])) {
122 			DRM_ERROR("Invalid R200 texture offset\n");
123 			return -EINVAL;
124 		}
125 		break;
126 
127 	case RADEON_EMIT_PP_TXFILTER_0:
128 	case RADEON_EMIT_PP_TXFILTER_1:
129 	case RADEON_EMIT_PP_TXFILTER_2:
130 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
131 		    &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
132 			DRM_ERROR("Invalid R100 texture offset\n");
133 			return -EINVAL;
134 		}
135 		break;
136 
137 	case R200_EMIT_PP_CUBIC_OFFSETS_0:
138 	case R200_EMIT_PP_CUBIC_OFFSETS_1:
139 	case R200_EMIT_PP_CUBIC_OFFSETS_2:
140 	case R200_EMIT_PP_CUBIC_OFFSETS_3:
141 	case R200_EMIT_PP_CUBIC_OFFSETS_4:
142 	case R200_EMIT_PP_CUBIC_OFFSETS_5:{
143 			int i;
144 			for (i = 0; i < 5; i++) {
145 				if (radeon_check_and_fixup_offset(dev_priv,
146 								  file_priv,
147 								  &data[i])) {
148 					DRM_ERROR
149 					    ("Invalid R200 cubic texture offset\n");
150 					return -EINVAL;
151 				}
152 			}
153 			break;
154 		}
155 
156 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
157 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
158 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
159 			int i;
160 			for (i = 0; i < 5; i++) {
161 				if (radeon_check_and_fixup_offset(dev_priv,
162 								  file_priv,
163 								  &data[i])) {
164 					DRM_ERROR
165 					    ("Invalid R100 cubic texture offset\n");
166 					return -EINVAL;
167 				}
168 			}
169 		}
170 		break;
171 
172 	case R200_EMIT_VAP_CTL:{
173 			RING_LOCALS;
174 			BEGIN_RING(2);
175 			OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
176 			ADVANCE_RING();
177 		}
178 		break;
179 
180 	case RADEON_EMIT_RB3D_COLORPITCH:
181 	case RADEON_EMIT_RE_LINE_PATTERN:
182 	case RADEON_EMIT_SE_LINE_WIDTH:
183 	case RADEON_EMIT_PP_LUM_MATRIX:
184 	case RADEON_EMIT_PP_ROT_MATRIX_0:
185 	case RADEON_EMIT_RB3D_STENCILREFMASK:
186 	case RADEON_EMIT_SE_VPORT_XSCALE:
187 	case RADEON_EMIT_SE_CNTL:
188 	case RADEON_EMIT_SE_CNTL_STATUS:
189 	case RADEON_EMIT_RE_MISC:
190 	case RADEON_EMIT_PP_BORDER_COLOR_0:
191 	case RADEON_EMIT_PP_BORDER_COLOR_1:
192 	case RADEON_EMIT_PP_BORDER_COLOR_2:
193 	case RADEON_EMIT_SE_ZBIAS_FACTOR:
194 	case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
195 	case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
196 	case R200_EMIT_PP_TXCBLEND_0:
197 	case R200_EMIT_PP_TXCBLEND_1:
198 	case R200_EMIT_PP_TXCBLEND_2:
199 	case R200_EMIT_PP_TXCBLEND_3:
200 	case R200_EMIT_PP_TXCBLEND_4:
201 	case R200_EMIT_PP_TXCBLEND_5:
202 	case R200_EMIT_PP_TXCBLEND_6:
203 	case R200_EMIT_PP_TXCBLEND_7:
204 	case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
205 	case R200_EMIT_TFACTOR_0:
206 	case R200_EMIT_VTX_FMT_0:
207 	case R200_EMIT_MATRIX_SELECT_0:
208 	case R200_EMIT_TEX_PROC_CTL_2:
209 	case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
210 	case R200_EMIT_PP_TXFILTER_0:
211 	case R200_EMIT_PP_TXFILTER_1:
212 	case R200_EMIT_PP_TXFILTER_2:
213 	case R200_EMIT_PP_TXFILTER_3:
214 	case R200_EMIT_PP_TXFILTER_4:
215 	case R200_EMIT_PP_TXFILTER_5:
216 	case R200_EMIT_VTE_CNTL:
217 	case R200_EMIT_OUTPUT_VTX_COMP_SEL:
218 	case R200_EMIT_PP_TAM_DEBUG3:
219 	case R200_EMIT_PP_CNTL_X:
220 	case R200_EMIT_RB3D_DEPTHXY_OFFSET:
221 	case R200_EMIT_RE_AUX_SCISSOR_CNTL:
222 	case R200_EMIT_RE_SCISSOR_TL_0:
223 	case R200_EMIT_RE_SCISSOR_TL_1:
224 	case R200_EMIT_RE_SCISSOR_TL_2:
225 	case R200_EMIT_SE_VAP_CNTL_STATUS:
226 	case R200_EMIT_SE_VTX_STATE_CNTL:
227 	case R200_EMIT_RE_POINTSIZE:
228 	case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
229 	case R200_EMIT_PP_CUBIC_FACES_0:
230 	case R200_EMIT_PP_CUBIC_FACES_1:
231 	case R200_EMIT_PP_CUBIC_FACES_2:
232 	case R200_EMIT_PP_CUBIC_FACES_3:
233 	case R200_EMIT_PP_CUBIC_FACES_4:
234 	case R200_EMIT_PP_CUBIC_FACES_5:
235 	case RADEON_EMIT_PP_TEX_SIZE_0:
236 	case RADEON_EMIT_PP_TEX_SIZE_1:
237 	case RADEON_EMIT_PP_TEX_SIZE_2:
238 	case R200_EMIT_RB3D_BLENDCOLOR:
239 	case R200_EMIT_TCL_POINT_SPRITE_CNTL:
240 	case RADEON_EMIT_PP_CUBIC_FACES_0:
241 	case RADEON_EMIT_PP_CUBIC_FACES_1:
242 	case RADEON_EMIT_PP_CUBIC_FACES_2:
243 	case R200_EMIT_PP_TRI_PERF_CNTL:
244 	case R200_EMIT_PP_AFS_0:
245 	case R200_EMIT_PP_AFS_1:
246 	case R200_EMIT_ATF_TFACTOR:
247 	case R200_EMIT_PP_TXCTLALL_0:
248 	case R200_EMIT_PP_TXCTLALL_1:
249 	case R200_EMIT_PP_TXCTLALL_2:
250 	case R200_EMIT_PP_TXCTLALL_3:
251 	case R200_EMIT_PP_TXCTLALL_4:
252 	case R200_EMIT_PP_TXCTLALL_5:
253 	case R200_EMIT_VAP_PVS_CNTL:
254 		/* These packets don't contain memory offsets */
255 		break;
256 
257 	default:
258 		DRM_ERROR("Unknown state packet ID %d\n", id);
259 		return -EINVAL;
260 	}
261 
262 	return 0;
263 }
264 
radeon_check_and_fixup_packet3(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_kcmd_buffer_t * cmdbuf,unsigned int * cmdsz)265 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
266 						     dev_priv,
267 						     struct drm_file *file_priv,
268 						     drm_radeon_kcmd_buffer_t *
269 						     cmdbuf,
270 						     unsigned int *cmdsz)
271 {
272 	u32 *cmd = (u32 *) cmdbuf->buf;
273 	u32 offset, narrays;
274 	int count, i, k;
275 
276 	*cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
277 
278 	if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
279 		DRM_ERROR("Not a type 3 packet\n");
280 		return -EINVAL;
281 	}
282 
283 	if (4 * *cmdsz > cmdbuf->bufsz) {
284 		DRM_ERROR("Packet size larger than size of data provided\n");
285 		return -EINVAL;
286 	}
287 
288 	switch(cmd[0] & 0xff00) {
289 	/* XXX Are there old drivers needing other packets? */
290 
291 	case RADEON_3D_DRAW_IMMD:
292 	case RADEON_3D_DRAW_VBUF:
293 	case RADEON_3D_DRAW_INDX:
294 	case RADEON_WAIT_FOR_IDLE:
295 	case RADEON_CP_NOP:
296 	case RADEON_3D_CLEAR_ZMASK:
297 /*	case RADEON_CP_NEXT_CHAR:
298 	case RADEON_CP_PLY_NEXTSCAN:
299 	case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
300 		/* these packets are safe */
301 		break;
302 
303 	case RADEON_CP_3D_DRAW_IMMD_2:
304 	case RADEON_CP_3D_DRAW_VBUF_2:
305 	case RADEON_CP_3D_DRAW_INDX_2:
306 	case RADEON_3D_CLEAR_HIZ:
307 		/* safe but r200 only */
308 		if (dev_priv->microcode_version != UCODE_R200) {
309 			DRM_ERROR("Invalid 3d packet for r100-class chip\n");
310 			return -EINVAL;
311 		}
312 		break;
313 
314 	case RADEON_3D_LOAD_VBPNTR:
315 		count = (cmd[0] >> 16) & 0x3fff;
316 
317 		if (count > 18) { /* 12 arrays max */
318 			DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
319 				  count);
320 			return -EINVAL;
321 		}
322 
323 		/* carefully check packet contents */
324 		narrays = cmd[1] & ~0xc000;
325 		k = 0;
326 		i = 2;
327 		while ((k < narrays) && (i < (count + 2))) {
328 			i++;		/* skip attribute field */
329 			if (radeon_check_and_fixup_offset(dev_priv, file_priv,
330 							  &cmd[i])) {
331 				DRM_ERROR
332 				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
333 				     k, i);
334 				return -EINVAL;
335 			}
336 			k++;
337 			i++;
338 			if (k == narrays)
339 				break;
340 			/* have one more to process, they come in pairs */
341 			if (radeon_check_and_fixup_offset(dev_priv,
342 							  file_priv, &cmd[i]))
343 			{
344 				DRM_ERROR
345 				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
346 				     k, i);
347 				return -EINVAL;
348 			}
349 			k++;
350 			i++;
351 		}
352 		/* do the counts match what we expect ? */
353 		if ((k != narrays) || (i != (count + 2))) {
354 			DRM_ERROR
355 			    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
356 			      k, i, narrays, count + 1);
357 			return -EINVAL;
358 		}
359 		break;
360 
361 	case RADEON_3D_RNDR_GEN_INDX_PRIM:
362 		if (dev_priv->microcode_version != UCODE_R100) {
363 			DRM_ERROR("Invalid 3d packet for r200-class chip\n");
364 			return -EINVAL;
365 		}
366 		if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
367 				DRM_ERROR("Invalid rndr_gen_indx offset\n");
368 				return -EINVAL;
369 		}
370 		break;
371 
372 	case RADEON_CP_INDX_BUFFER:
373 		if (dev_priv->microcode_version != UCODE_R200) {
374 			DRM_ERROR("Invalid 3d packet for r100-class chip\n");
375 			return -EINVAL;
376 		}
377 		if ((cmd[1] & 0x8000ffff) != 0x80000810) {
378 			DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
379 			return -EINVAL;
380 		}
381 		if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
382 			DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
383 			return -EINVAL;
384 		}
385 		break;
386 
387 	case RADEON_CNTL_HOSTDATA_BLT:
388 	case RADEON_CNTL_PAINT_MULTI:
389 	case RADEON_CNTL_BITBLT_MULTI:
390 		/* MSB of opcode: next DWORD GUI_CNTL */
391 		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
392 			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
393 			offset = cmd[2] << 10;
394 			if (radeon_check_and_fixup_offset
395 			    (dev_priv, file_priv, &offset)) {
396 				DRM_ERROR("Invalid first packet offset\n");
397 				return -EINVAL;
398 			}
399 			cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
400 		}
401 
402 		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
403 		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
404 			offset = cmd[3] << 10;
405 			if (radeon_check_and_fixup_offset
406 			    (dev_priv, file_priv, &offset)) {
407 				DRM_ERROR("Invalid second packet offset\n");
408 				return -EINVAL;
409 			}
410 			cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
411 		}
412 		break;
413 
414 	default:
415 		DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
416 		return -EINVAL;
417 	}
418 
419 	return 0;
420 }
421 
422 /* ================================================================
423  * CP hardware state programming functions
424  */
425 
radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,struct drm_clip_rect * box)426 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
427 					     struct drm_clip_rect * box)
428 {
429 	RING_LOCALS;
430 
431 	DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
432 		  box->x1, box->y1, box->x2, box->y2);
433 
434 	BEGIN_RING(4);
435 	OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
436 	OUT_RING((box->y1 << 16) | box->x1);
437 	OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
438 	OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
439 	ADVANCE_RING();
440 }
441 
442 /* Emit 1.1 state
443  */
radeon_emit_state(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_context_regs_t * ctx,drm_radeon_texture_regs_t * tex,unsigned int dirty)444 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
445 			     struct drm_file *file_priv,
446 			     drm_radeon_context_regs_t * ctx,
447 			     drm_radeon_texture_regs_t * tex,
448 			     unsigned int dirty)
449 {
450 	RING_LOCALS;
451 	DRM_DEBUG("dirty=0x%08x\n", dirty);
452 
453 	if (dirty & RADEON_UPLOAD_CONTEXT) {
454 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
455 						  &ctx->rb3d_depthoffset)) {
456 			DRM_ERROR("Invalid depth buffer offset\n");
457 			return -EINVAL;
458 		}
459 
460 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
461 						  &ctx->rb3d_coloroffset)) {
462 			DRM_ERROR("Invalid depth buffer offset\n");
463 			return -EINVAL;
464 		}
465 
466 		BEGIN_RING(14);
467 		OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
468 		OUT_RING(ctx->pp_misc);
469 		OUT_RING(ctx->pp_fog_color);
470 		OUT_RING(ctx->re_solid_color);
471 		OUT_RING(ctx->rb3d_blendcntl);
472 		OUT_RING(ctx->rb3d_depthoffset);
473 		OUT_RING(ctx->rb3d_depthpitch);
474 		OUT_RING(ctx->rb3d_zstencilcntl);
475 		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
476 		OUT_RING(ctx->pp_cntl);
477 		OUT_RING(ctx->rb3d_cntl);
478 		OUT_RING(ctx->rb3d_coloroffset);
479 		OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
480 		OUT_RING(ctx->rb3d_colorpitch);
481 		ADVANCE_RING();
482 	}
483 
484 	if (dirty & RADEON_UPLOAD_VERTFMT) {
485 		BEGIN_RING(2);
486 		OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
487 		OUT_RING(ctx->se_coord_fmt);
488 		ADVANCE_RING();
489 	}
490 
491 	if (dirty & RADEON_UPLOAD_LINE) {
492 		BEGIN_RING(5);
493 		OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
494 		OUT_RING(ctx->re_line_pattern);
495 		OUT_RING(ctx->re_line_state);
496 		OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
497 		OUT_RING(ctx->se_line_width);
498 		ADVANCE_RING();
499 	}
500 
501 	if (dirty & RADEON_UPLOAD_BUMPMAP) {
502 		BEGIN_RING(5);
503 		OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
504 		OUT_RING(ctx->pp_lum_matrix);
505 		OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
506 		OUT_RING(ctx->pp_rot_matrix_0);
507 		OUT_RING(ctx->pp_rot_matrix_1);
508 		ADVANCE_RING();
509 	}
510 
511 	if (dirty & RADEON_UPLOAD_MASKS) {
512 		BEGIN_RING(4);
513 		OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
514 		OUT_RING(ctx->rb3d_stencilrefmask);
515 		OUT_RING(ctx->rb3d_ropcntl);
516 		OUT_RING(ctx->rb3d_planemask);
517 		ADVANCE_RING();
518 	}
519 
520 	if (dirty & RADEON_UPLOAD_VIEWPORT) {
521 		BEGIN_RING(7);
522 		OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
523 		OUT_RING(ctx->se_vport_xscale);
524 		OUT_RING(ctx->se_vport_xoffset);
525 		OUT_RING(ctx->se_vport_yscale);
526 		OUT_RING(ctx->se_vport_yoffset);
527 		OUT_RING(ctx->se_vport_zscale);
528 		OUT_RING(ctx->se_vport_zoffset);
529 		ADVANCE_RING();
530 	}
531 
532 	if (dirty & RADEON_UPLOAD_SETUP) {
533 		BEGIN_RING(4);
534 		OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
535 		OUT_RING(ctx->se_cntl);
536 		OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
537 		OUT_RING(ctx->se_cntl_status);
538 		ADVANCE_RING();
539 	}
540 
541 	if (dirty & RADEON_UPLOAD_MISC) {
542 		BEGIN_RING(2);
543 		OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
544 		OUT_RING(ctx->re_misc);
545 		ADVANCE_RING();
546 	}
547 
548 	if (dirty & RADEON_UPLOAD_TEX0) {
549 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
550 						  &tex[0].pp_txoffset)) {
551 			DRM_ERROR("Invalid texture offset for unit 0\n");
552 			return -EINVAL;
553 		}
554 
555 		BEGIN_RING(9);
556 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
557 		OUT_RING(tex[0].pp_txfilter);
558 		OUT_RING(tex[0].pp_txformat);
559 		OUT_RING(tex[0].pp_txoffset);
560 		OUT_RING(tex[0].pp_txcblend);
561 		OUT_RING(tex[0].pp_txablend);
562 		OUT_RING(tex[0].pp_tfactor);
563 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
564 		OUT_RING(tex[0].pp_border_color);
565 		ADVANCE_RING();
566 	}
567 
568 	if (dirty & RADEON_UPLOAD_TEX1) {
569 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
570 						  &tex[1].pp_txoffset)) {
571 			DRM_ERROR("Invalid texture offset for unit 1\n");
572 			return -EINVAL;
573 		}
574 
575 		BEGIN_RING(9);
576 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
577 		OUT_RING(tex[1].pp_txfilter);
578 		OUT_RING(tex[1].pp_txformat);
579 		OUT_RING(tex[1].pp_txoffset);
580 		OUT_RING(tex[1].pp_txcblend);
581 		OUT_RING(tex[1].pp_txablend);
582 		OUT_RING(tex[1].pp_tfactor);
583 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
584 		OUT_RING(tex[1].pp_border_color);
585 		ADVANCE_RING();
586 	}
587 
588 	if (dirty & RADEON_UPLOAD_TEX2) {
589 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
590 						  &tex[2].pp_txoffset)) {
591 			DRM_ERROR("Invalid texture offset for unit 2\n");
592 			return -EINVAL;
593 		}
594 
595 		BEGIN_RING(9);
596 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
597 		OUT_RING(tex[2].pp_txfilter);
598 		OUT_RING(tex[2].pp_txformat);
599 		OUT_RING(tex[2].pp_txoffset);
600 		OUT_RING(tex[2].pp_txcblend);
601 		OUT_RING(tex[2].pp_txablend);
602 		OUT_RING(tex[2].pp_tfactor);
603 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
604 		OUT_RING(tex[2].pp_border_color);
605 		ADVANCE_RING();
606 	}
607 
608 	return 0;
609 }
610 
611 /* Emit 1.2 state
612  */
radeon_emit_state2(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_state_t * state)613 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
614 			      struct drm_file *file_priv,
615 			      drm_radeon_state_t * state)
616 {
617 	RING_LOCALS;
618 
619 	if (state->dirty & RADEON_UPLOAD_ZBIAS) {
620 		BEGIN_RING(3);
621 		OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
622 		OUT_RING(state->context2.se_zbias_factor);
623 		OUT_RING(state->context2.se_zbias_constant);
624 		ADVANCE_RING();
625 	}
626 
627 	return radeon_emit_state(dev_priv, file_priv, &state->context,
628 				 state->tex, state->dirty);
629 }
630 
631 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
632  * 1.3 cmdbuffers allow all previous state to be updated as well as
633  * the tcl scalar and vector areas.
634  */
635 static struct {
636 	int start;
637 	int len;
638 	const char *name;
639 } packet[RADEON_MAX_STATE_PACKETS] = {
640 	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
641 	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
642 	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
643 	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
644 	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
645 	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
646 	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
647 	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
648 	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
649 	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
650 	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
651 	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
652 	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
653 	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
654 	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
655 	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
656 	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
657 	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
658 	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
659 	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
660 	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
661 		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
662 	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
663 	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
664 	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
665 	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
666 	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
667 	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
668 	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
669 	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
670 	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
671 	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
672 	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
673 	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
674 	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
675 	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
676 	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
677 	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
678 	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
679 	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
680 	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
681 	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
682 	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
683 	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
684 	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
685 	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
686 	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
687 	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
688 	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
689 	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
690 	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
691 	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
692 	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
693 	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
694 	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
695 	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
696 	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
697 	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
698 	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
699 	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
700 	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
701 	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
702 	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
703 		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
704 	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
705 	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
706 	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
707 	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
708 	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
709 	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
710 	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
711 	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
712 	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
713 	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
714 	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
715 	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
716 	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
717 	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
718 	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
719 	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
720 	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
721 	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
722 	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
723 	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
724 	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
725 	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
726 	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
727 	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
728 	{R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
729 	{R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
730 	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
731 	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
732 	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
733 	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
734 	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
735 	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
736 	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
737 	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
738 };
739 
740 /* ================================================================
741  * Performance monitoring functions
742  */
743 
radeon_clear_box(drm_radeon_private_t * dev_priv,struct drm_radeon_master_private * master_priv,int x,int y,int w,int h,int r,int g,int b)744 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
745 			     struct drm_radeon_master_private *master_priv,
746 			     int x, int y, int w, int h, int r, int g, int b)
747 {
748 	u32 color;
749 	RING_LOCALS;
750 
751 	x += master_priv->sarea_priv->boxes[0].x1;
752 	y += master_priv->sarea_priv->boxes[0].y1;
753 
754 	switch (dev_priv->color_fmt) {
755 	case RADEON_COLOR_FORMAT_RGB565:
756 		color = (((r & 0xf8) << 8) |
757 			 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
758 		break;
759 	case RADEON_COLOR_FORMAT_ARGB8888:
760 	default:
761 		color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
762 		break;
763 	}
764 
765 	BEGIN_RING(4);
766 	RADEON_WAIT_UNTIL_3D_IDLE();
767 	OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
768 	OUT_RING(0xffffffff);
769 	ADVANCE_RING();
770 
771 	BEGIN_RING(6);
772 
773 	OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
774 	OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
775 		 RADEON_GMC_BRUSH_SOLID_COLOR |
776 		 (dev_priv->color_fmt << 8) |
777 		 RADEON_GMC_SRC_DATATYPE_COLOR |
778 		 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
779 
780 	if (master_priv->sarea_priv->pfCurrentPage == 1) {
781 		OUT_RING(dev_priv->front_pitch_offset);
782 	} else {
783 		OUT_RING(dev_priv->back_pitch_offset);
784 	}
785 
786 	OUT_RING(color);
787 
788 	OUT_RING((x << 16) | y);
789 	OUT_RING((w << 16) | h);
790 
791 	ADVANCE_RING();
792 }
793 
radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv,struct drm_radeon_master_private * master_priv)794 static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv, struct drm_radeon_master_private *master_priv)
795 {
796 	/* Collapse various things into a wait flag -- trying to
797 	 * guess if userspase slept -- better just to have them tell us.
798 	 */
799 	if (dev_priv->stats.last_frame_reads > 1 ||
800 	    dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
801 		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
802 	}
803 
804 	if (dev_priv->stats.freelist_loops) {
805 		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
806 	}
807 
808 	/* Purple box for page flipping
809 	 */
810 	if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
811 		radeon_clear_box(dev_priv, master_priv, 4, 4, 8, 8, 255, 0, 255);
812 
813 	/* Red box if we have to wait for idle at any point
814 	 */
815 	if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
816 		radeon_clear_box(dev_priv, master_priv, 16, 4, 8, 8, 255, 0, 0);
817 
818 	/* Blue box: lost context?
819 	 */
820 
821 	/* Yellow box for texture swaps
822 	 */
823 	if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
824 		radeon_clear_box(dev_priv, master_priv, 40, 4, 8, 8, 255, 255, 0);
825 
826 	/* Green box if hardware never idles (as far as we can tell)
827 	 */
828 	if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
829 		radeon_clear_box(dev_priv, master_priv, 64, 4, 8, 8, 0, 255, 0);
830 
831 	/* Draw bars indicating number of buffers allocated
832 	 * (not a great measure, easily confused)
833 	 */
834 	if (dev_priv->stats.requested_bufs) {
835 		if (dev_priv->stats.requested_bufs > 100)
836 			dev_priv->stats.requested_bufs = 100;
837 
838 		radeon_clear_box(dev_priv, master_priv, 4, 16,
839 				 dev_priv->stats.requested_bufs, 4,
840 				 196, 128, 128);
841 	}
842 
843 	memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
844 
845 }
846 
847 /* ================================================================
848  * CP command dispatch functions
849  */
850 
radeon_cp_dispatch_clear(struct drm_device * dev,struct drm_master * master,drm_radeon_clear_t * clear,drm_radeon_clear_rect_t * depth_boxes)851 static void radeon_cp_dispatch_clear(struct drm_device * dev,
852 				     struct drm_master *master,
853 				     drm_radeon_clear_t * clear,
854 				     drm_radeon_clear_rect_t * depth_boxes)
855 {
856 	drm_radeon_private_t *dev_priv = dev->dev_private;
857 	struct drm_radeon_master_private *master_priv = master->driver_priv;
858 	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
859 	drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
860 	int nbox = sarea_priv->nbox;
861 	struct drm_clip_rect *pbox = sarea_priv->boxes;
862 	unsigned int flags = clear->flags;
863 	u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
864 	int i;
865 	RING_LOCALS;
866 	DRM_DEBUG("flags = 0x%x\n", flags);
867 
868 	dev_priv->stats.clears++;
869 
870 	if (sarea_priv->pfCurrentPage == 1) {
871 		unsigned int tmp = flags;
872 
873 		flags &= ~(RADEON_FRONT | RADEON_BACK);
874 		if (tmp & RADEON_FRONT)
875 			flags |= RADEON_BACK;
876 		if (tmp & RADEON_BACK)
877 			flags |= RADEON_FRONT;
878 	}
879 
880 	if (flags & (RADEON_FRONT | RADEON_BACK)) {
881 
882 		BEGIN_RING(4);
883 
884 		/* Ensure the 3D stream is idle before doing a
885 		 * 2D fill to clear the front or back buffer.
886 		 */
887 		RADEON_WAIT_UNTIL_3D_IDLE();
888 
889 		OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
890 		OUT_RING(clear->color_mask);
891 
892 		ADVANCE_RING();
893 
894 		/* Make sure we restore the 3D state next time.
895 		 */
896 		sarea_priv->ctx_owner = 0;
897 
898 		for (i = 0; i < nbox; i++) {
899 			int x = pbox[i].x1;
900 			int y = pbox[i].y1;
901 			int w = pbox[i].x2 - x;
902 			int h = pbox[i].y2 - y;
903 
904 			DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
905 				  x, y, w, h, flags);
906 
907 			if (flags & RADEON_FRONT) {
908 				BEGIN_RING(6);
909 
910 				OUT_RING(CP_PACKET3
911 					 (RADEON_CNTL_PAINT_MULTI, 4));
912 				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
913 					 RADEON_GMC_BRUSH_SOLID_COLOR |
914 					 (dev_priv->
915 					  color_fmt << 8) |
916 					 RADEON_GMC_SRC_DATATYPE_COLOR |
917 					 RADEON_ROP3_P |
918 					 RADEON_GMC_CLR_CMP_CNTL_DIS);
919 
920 				OUT_RING(dev_priv->front_pitch_offset);
921 				OUT_RING(clear->clear_color);
922 
923 				OUT_RING((x << 16) | y);
924 				OUT_RING((w << 16) | h);
925 
926 				ADVANCE_RING();
927 			}
928 
929 			if (flags & RADEON_BACK) {
930 				BEGIN_RING(6);
931 
932 				OUT_RING(CP_PACKET3
933 					 (RADEON_CNTL_PAINT_MULTI, 4));
934 				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
935 					 RADEON_GMC_BRUSH_SOLID_COLOR |
936 					 (dev_priv->
937 					  color_fmt << 8) |
938 					 RADEON_GMC_SRC_DATATYPE_COLOR |
939 					 RADEON_ROP3_P |
940 					 RADEON_GMC_CLR_CMP_CNTL_DIS);
941 
942 				OUT_RING(dev_priv->back_pitch_offset);
943 				OUT_RING(clear->clear_color);
944 
945 				OUT_RING((x << 16) | y);
946 				OUT_RING((w << 16) | h);
947 
948 				ADVANCE_RING();
949 			}
950 		}
951 	}
952 
953 	/* hyper z clear */
954 	/* no docs available, based on reverse engeneering by Stephane Marchesin */
955 	if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
956 	    && (flags & RADEON_CLEAR_FASTZ)) {
957 
958 		int i;
959 		int depthpixperline =
960 		    dev_priv->depth_fmt ==
961 		    RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
962 						       2) : (dev_priv->
963 							     depth_pitch / 4);
964 
965 		u32 clearmask;
966 
967 		u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
968 		    ((clear->depth_mask & 0xff) << 24);
969 
970 		/* Make sure we restore the 3D state next time.
971 		 * we haven't touched any "normal" state - still need this?
972 		 */
973 		sarea_priv->ctx_owner = 0;
974 
975 		if ((dev_priv->flags & RADEON_HAS_HIERZ)
976 		    && (flags & RADEON_USE_HIERZ)) {
977 			/* FIXME : reverse engineer that for Rx00 cards */
978 			/* FIXME : the mask supposedly contains low-res z values. So can't set
979 			   just to the max (0xff? or actually 0x3fff?), need to take z clear
980 			   value into account? */
981 			/* pattern seems to work for r100, though get slight
982 			   rendering errors with glxgears. If hierz is not enabled for r100,
983 			   only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
984 			   other ones are ignored, and the same clear mask can be used. That's
985 			   very different behaviour than R200 which needs different clear mask
986 			   and different number of tiles to clear if hierz is enabled or not !?!
987 			 */
988 			clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
989 		} else {
990 			/* clear mask : chooses the clearing pattern.
991 			   rv250: could be used to clear only parts of macrotiles
992 			   (but that would get really complicated...)?
993 			   bit 0 and 1 (either or both of them ?!?!) are used to
994 			   not clear tile (or maybe one of the bits indicates if the tile is
995 			   compressed or not), bit 2 and 3 to not clear tile 1,...,.
996 			   Pattern is as follows:
997 			   | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
998 			   bits -------------------------------------------------
999 			   | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
1000 			   rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1001 			   covers 256 pixels ?!?
1002 			 */
1003 			clearmask = 0x0;
1004 		}
1005 
1006 		BEGIN_RING(8);
1007 		RADEON_WAIT_UNTIL_2D_IDLE();
1008 		OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1009 			     tempRB3D_DEPTHCLEARVALUE);
1010 		/* what offset is this exactly ? */
1011 		OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1012 		/* need ctlstat, otherwise get some strange black flickering */
1013 		OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1014 			     RADEON_RB3D_ZC_FLUSH_ALL);
1015 		ADVANCE_RING();
1016 
1017 		for (i = 0; i < nbox; i++) {
1018 			int tileoffset, nrtilesx, nrtilesy, j;
1019 			/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1020 			if ((dev_priv->flags & RADEON_HAS_HIERZ)
1021 			    && !(dev_priv->microcode_version == UCODE_R200)) {
1022 				/* FIXME : figure this out for r200 (when hierz is enabled). Or
1023 				   maybe r200 actually doesn't need to put the low-res z value into
1024 				   the tile cache like r100, but just needs to clear the hi-level z-buffer?
1025 				   Works for R100, both with hierz and without.
1026 				   R100 seems to operate on 2x1 8x8 tiles, but...
1027 				   odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1028 				   problematic with resolutions which are not 64 pix aligned? */
1029 				tileoffset =
1030 				    ((pbox[i].y1 >> 3) * depthpixperline +
1031 				     pbox[i].x1) >> 6;
1032 				nrtilesx =
1033 				    ((pbox[i].x2 & ~63) -
1034 				     (pbox[i].x1 & ~63)) >> 4;
1035 				nrtilesy =
1036 				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1037 				for (j = 0; j <= nrtilesy; j++) {
1038 					BEGIN_RING(4);
1039 					OUT_RING(CP_PACKET3
1040 						 (RADEON_3D_CLEAR_ZMASK, 2));
1041 					/* first tile */
1042 					OUT_RING(tileoffset * 8);
1043 					/* the number of tiles to clear */
1044 					OUT_RING(nrtilesx + 4);
1045 					/* clear mask : chooses the clearing pattern. */
1046 					OUT_RING(clearmask);
1047 					ADVANCE_RING();
1048 					tileoffset += depthpixperline >> 6;
1049 				}
1050 			} else if (dev_priv->microcode_version == UCODE_R200) {
1051 				/* works for rv250. */
1052 				/* find first macro tile (8x2 4x4 z-pixels on rv250) */
1053 				tileoffset =
1054 				    ((pbox[i].y1 >> 3) * depthpixperline +
1055 				     pbox[i].x1) >> 5;
1056 				nrtilesx =
1057 				    (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1058 				nrtilesy =
1059 				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1060 				for (j = 0; j <= nrtilesy; j++) {
1061 					BEGIN_RING(4);
1062 					OUT_RING(CP_PACKET3
1063 						 (RADEON_3D_CLEAR_ZMASK, 2));
1064 					/* first tile */
1065 					/* judging by the first tile offset needed, could possibly
1066 					   directly address/clear 4x4 tiles instead of 8x2 * 4x4
1067 					   macro tiles, though would still need clear mask for
1068 					   right/bottom if truely 4x4 granularity is desired ? */
1069 					OUT_RING(tileoffset * 16);
1070 					/* the number of tiles to clear */
1071 					OUT_RING(nrtilesx + 1);
1072 					/* clear mask : chooses the clearing pattern. */
1073 					OUT_RING(clearmask);
1074 					ADVANCE_RING();
1075 					tileoffset += depthpixperline >> 5;
1076 				}
1077 			} else {	/* rv 100 */
1078 				/* rv100 might not need 64 pix alignment, who knows */
1079 				/* offsets are, hmm, weird */
1080 				tileoffset =
1081 				    ((pbox[i].y1 >> 4) * depthpixperline +
1082 				     pbox[i].x1) >> 6;
1083 				nrtilesx =
1084 				    ((pbox[i].x2 & ~63) -
1085 				     (pbox[i].x1 & ~63)) >> 4;
1086 				nrtilesy =
1087 				    (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1088 				for (j = 0; j <= nrtilesy; j++) {
1089 					BEGIN_RING(4);
1090 					OUT_RING(CP_PACKET3
1091 						 (RADEON_3D_CLEAR_ZMASK, 2));
1092 					OUT_RING(tileoffset * 128);
1093 					/* the number of tiles to clear */
1094 					OUT_RING(nrtilesx + 4);
1095 					/* clear mask : chooses the clearing pattern. */
1096 					OUT_RING(clearmask);
1097 					ADVANCE_RING();
1098 					tileoffset += depthpixperline >> 6;
1099 				}
1100 			}
1101 		}
1102 
1103 		/* TODO don't always clear all hi-level z tiles */
1104 		if ((dev_priv->flags & RADEON_HAS_HIERZ)
1105 		    && (dev_priv->microcode_version == UCODE_R200)
1106 		    && (flags & RADEON_USE_HIERZ))
1107 			/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1108 			/* FIXME : the mask supposedly contains low-res z values. So can't set
1109 			   just to the max (0xff? or actually 0x3fff?), need to take z clear
1110 			   value into account? */
1111 		{
1112 			BEGIN_RING(4);
1113 			OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1114 			OUT_RING(0x0);	/* First tile */
1115 			OUT_RING(0x3cc0);
1116 			OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1117 			ADVANCE_RING();
1118 		}
1119 	}
1120 
1121 	/* We have to clear the depth and/or stencil buffers by
1122 	 * rendering a quad into just those buffers.  Thus, we have to
1123 	 * make sure the 3D engine is configured correctly.
1124 	 */
1125 	else if ((dev_priv->microcode_version == UCODE_R200) &&
1126 		(flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1127 
1128 		int tempPP_CNTL;
1129 		int tempRE_CNTL;
1130 		int tempRB3D_CNTL;
1131 		int tempRB3D_ZSTENCILCNTL;
1132 		int tempRB3D_STENCILREFMASK;
1133 		int tempRB3D_PLANEMASK;
1134 		int tempSE_CNTL;
1135 		int tempSE_VTE_CNTL;
1136 		int tempSE_VTX_FMT_0;
1137 		int tempSE_VTX_FMT_1;
1138 		int tempSE_VAP_CNTL;
1139 		int tempRE_AUX_SCISSOR_CNTL;
1140 
1141 		tempPP_CNTL = 0;
1142 		tempRE_CNTL = 0;
1143 
1144 		tempRB3D_CNTL = depth_clear->rb3d_cntl;
1145 
1146 		tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1147 		tempRB3D_STENCILREFMASK = 0x0;
1148 
1149 		tempSE_CNTL = depth_clear->se_cntl;
1150 
1151 		/* Disable TCL */
1152 
1153 		tempSE_VAP_CNTL = (	/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1154 					  (0x9 <<
1155 					   SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1156 
1157 		tempRB3D_PLANEMASK = 0x0;
1158 
1159 		tempRE_AUX_SCISSOR_CNTL = 0x0;
1160 
1161 		tempSE_VTE_CNTL =
1162 		    SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1163 
1164 		/* Vertex format (X, Y, Z, W) */
1165 		tempSE_VTX_FMT_0 =
1166 		    SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1167 		    SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1168 		tempSE_VTX_FMT_1 = 0x0;
1169 
1170 		/*
1171 		 * Depth buffer specific enables
1172 		 */
1173 		if (flags & RADEON_DEPTH) {
1174 			/* Enable depth buffer */
1175 			tempRB3D_CNTL |= RADEON_Z_ENABLE;
1176 		} else {
1177 			/* Disable depth buffer */
1178 			tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1179 		}
1180 
1181 		/*
1182 		 * Stencil buffer specific enables
1183 		 */
1184 		if (flags & RADEON_STENCIL) {
1185 			tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1186 			tempRB3D_STENCILREFMASK = clear->depth_mask;
1187 		} else {
1188 			tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1189 			tempRB3D_STENCILREFMASK = 0x00000000;
1190 		}
1191 
1192 		if (flags & RADEON_USE_COMP_ZBUF) {
1193 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1194 			    RADEON_Z_DECOMPRESSION_ENABLE;
1195 		}
1196 		if (flags & RADEON_USE_HIERZ) {
1197 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1198 		}
1199 
1200 		BEGIN_RING(26);
1201 		RADEON_WAIT_UNTIL_2D_IDLE();
1202 
1203 		OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1204 		OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1205 		OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1206 		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1207 		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1208 			     tempRB3D_STENCILREFMASK);
1209 		OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1210 		OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1211 		OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1212 		OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1213 		OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1214 		OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1215 		OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1216 		ADVANCE_RING();
1217 
1218 		/* Make sure we restore the 3D state next time.
1219 		 */
1220 		sarea_priv->ctx_owner = 0;
1221 
1222 		for (i = 0; i < nbox; i++) {
1223 
1224 			/* Funny that this should be required --
1225 			 *  sets top-left?
1226 			 */
1227 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1228 
1229 			BEGIN_RING(14);
1230 			OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1231 			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1232 				  RADEON_PRIM_WALK_RING |
1233 				  (3 << RADEON_NUM_VERTICES_SHIFT)));
1234 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1235 			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1236 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1237 			OUT_RING(0x3f800000);
1238 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1239 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1240 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1241 			OUT_RING(0x3f800000);
1242 			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1243 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1244 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1245 			OUT_RING(0x3f800000);
1246 			ADVANCE_RING();
1247 		}
1248 	} else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1249 
1250 		int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1251 
1252 		rb3d_cntl = depth_clear->rb3d_cntl;
1253 
1254 		if (flags & RADEON_DEPTH) {
1255 			rb3d_cntl |= RADEON_Z_ENABLE;
1256 		} else {
1257 			rb3d_cntl &= ~RADEON_Z_ENABLE;
1258 		}
1259 
1260 		if (flags & RADEON_STENCIL) {
1261 			rb3d_cntl |= RADEON_STENCIL_ENABLE;
1262 			rb3d_stencilrefmask = clear->depth_mask;	/* misnamed field */
1263 		} else {
1264 			rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1265 			rb3d_stencilrefmask = 0x00000000;
1266 		}
1267 
1268 		if (flags & RADEON_USE_COMP_ZBUF) {
1269 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1270 			    RADEON_Z_DECOMPRESSION_ENABLE;
1271 		}
1272 		if (flags & RADEON_USE_HIERZ) {
1273 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1274 		}
1275 
1276 		BEGIN_RING(13);
1277 		RADEON_WAIT_UNTIL_2D_IDLE();
1278 
1279 		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1280 		OUT_RING(0x00000000);
1281 		OUT_RING(rb3d_cntl);
1282 
1283 		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1284 		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1285 		OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1286 		OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1287 		ADVANCE_RING();
1288 
1289 		/* Make sure we restore the 3D state next time.
1290 		 */
1291 		sarea_priv->ctx_owner = 0;
1292 
1293 		for (i = 0; i < nbox; i++) {
1294 
1295 			/* Funny that this should be required --
1296 			 *  sets top-left?
1297 			 */
1298 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1299 
1300 			BEGIN_RING(15);
1301 
1302 			OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1303 			OUT_RING(RADEON_VTX_Z_PRESENT |
1304 				 RADEON_VTX_PKCOLOR_PRESENT);
1305 			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1306 				  RADEON_PRIM_WALK_RING |
1307 				  RADEON_MAOS_ENABLE |
1308 				  RADEON_VTX_FMT_RADEON_MODE |
1309 				  (3 << RADEON_NUM_VERTICES_SHIFT)));
1310 
1311 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1312 			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1313 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1314 			OUT_RING(0x0);
1315 
1316 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1317 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1318 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1319 			OUT_RING(0x0);
1320 
1321 			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1322 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1323 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1324 			OUT_RING(0x0);
1325 
1326 			ADVANCE_RING();
1327 		}
1328 	}
1329 
1330 	/* Increment the clear counter.  The client-side 3D driver must
1331 	 * wait on this value before performing the clear ioctl.  We
1332 	 * need this because the card's so damned fast...
1333 	 */
1334 	sarea_priv->last_clear++;
1335 
1336 	BEGIN_RING(4);
1337 
1338 	RADEON_CLEAR_AGE(sarea_priv->last_clear);
1339 	RADEON_WAIT_UNTIL_IDLE();
1340 
1341 	ADVANCE_RING();
1342 }
1343 
radeon_cp_dispatch_swap(struct drm_device * dev,struct drm_master * master)1344 static void radeon_cp_dispatch_swap(struct drm_device *dev, struct drm_master *master)
1345 {
1346 	drm_radeon_private_t *dev_priv = dev->dev_private;
1347 	struct drm_radeon_master_private *master_priv = master->driver_priv;
1348 	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1349 	int nbox = sarea_priv->nbox;
1350 	struct drm_clip_rect *pbox = sarea_priv->boxes;
1351 	int i;
1352 	RING_LOCALS;
1353 	DRM_DEBUG("\n");
1354 
1355 	/* Do some trivial performance monitoring...
1356 	 */
1357 	if (dev_priv->do_boxes)
1358 		radeon_cp_performance_boxes(dev_priv, master_priv);
1359 
1360 	/* Wait for the 3D stream to idle before dispatching the bitblt.
1361 	 * This will prevent data corruption between the two streams.
1362 	 */
1363 	BEGIN_RING(2);
1364 
1365 	RADEON_WAIT_UNTIL_3D_IDLE();
1366 
1367 	ADVANCE_RING();
1368 
1369 	for (i = 0; i < nbox; i++) {
1370 		int x = pbox[i].x1;
1371 		int y = pbox[i].y1;
1372 		int w = pbox[i].x2 - x;
1373 		int h = pbox[i].y2 - y;
1374 
1375 		DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1376 
1377 		BEGIN_RING(9);
1378 
1379 		OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1380 		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1381 			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1382 			 RADEON_GMC_BRUSH_NONE |
1383 			 (dev_priv->color_fmt << 8) |
1384 			 RADEON_GMC_SRC_DATATYPE_COLOR |
1385 			 RADEON_ROP3_S |
1386 			 RADEON_DP_SRC_SOURCE_MEMORY |
1387 			 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1388 
1389 		/* Make this work even if front & back are flipped:
1390 		 */
1391 		OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1392 		if (sarea_priv->pfCurrentPage == 0) {
1393 			OUT_RING(dev_priv->back_pitch_offset);
1394 			OUT_RING(dev_priv->front_pitch_offset);
1395 		} else {
1396 			OUT_RING(dev_priv->front_pitch_offset);
1397 			OUT_RING(dev_priv->back_pitch_offset);
1398 		}
1399 
1400 		OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1401 		OUT_RING((x << 16) | y);
1402 		OUT_RING((x << 16) | y);
1403 		OUT_RING((w << 16) | h);
1404 
1405 		ADVANCE_RING();
1406 	}
1407 
1408 	/* Increment the frame counter.  The client-side 3D driver must
1409 	 * throttle the framerate by waiting for this value before
1410 	 * performing the swapbuffer ioctl.
1411 	 */
1412 	sarea_priv->last_frame++;
1413 
1414 	BEGIN_RING(4);
1415 
1416 	RADEON_FRAME_AGE(sarea_priv->last_frame);
1417 	RADEON_WAIT_UNTIL_2D_IDLE();
1418 
1419 	ADVANCE_RING();
1420 }
1421 
radeon_cp_dispatch_flip(struct drm_device * dev,struct drm_master * master)1422 void radeon_cp_dispatch_flip(struct drm_device *dev, struct drm_master *master)
1423 {
1424 	drm_radeon_private_t *dev_priv = dev->dev_private;
1425 	struct drm_radeon_master_private *master_priv = master->driver_priv;
1426 	struct drm_sarea *sarea = (struct drm_sarea *)master_priv->sarea->handle;
1427 	int offset = (master_priv->sarea_priv->pfCurrentPage == 1)
1428 	    ? dev_priv->front_offset : dev_priv->back_offset;
1429 	RING_LOCALS;
1430 	DRM_DEBUG("pfCurrentPage=%d\n",
1431 		  master_priv->sarea_priv->pfCurrentPage);
1432 
1433 	/* Do some trivial performance monitoring...
1434 	 */
1435 	if (dev_priv->do_boxes) {
1436 		dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1437 		radeon_cp_performance_boxes(dev_priv, master_priv);
1438 	}
1439 
1440 	/* Update the frame offsets for both CRTCs
1441 	 */
1442 	BEGIN_RING(6);
1443 
1444 	RADEON_WAIT_UNTIL_3D_IDLE();
1445 	OUT_RING_REG(RADEON_CRTC_OFFSET,
1446 		     ((sarea->frame.y * dev_priv->front_pitch +
1447 		       sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1448 		     + offset);
1449 	OUT_RING_REG(RADEON_CRTC2_OFFSET, master_priv->sarea_priv->crtc2_base
1450 		     + offset);
1451 
1452 	ADVANCE_RING();
1453 
1454 	/* Increment the frame counter.  The client-side 3D driver must
1455 	 * throttle the framerate by waiting for this value before
1456 	 * performing the swapbuffer ioctl.
1457 	 */
1458 	master_priv->sarea_priv->last_frame++;
1459 	master_priv->sarea_priv->pfCurrentPage =
1460 		1 - master_priv->sarea_priv->pfCurrentPage;
1461 
1462 	BEGIN_RING(2);
1463 
1464 	RADEON_FRAME_AGE(master_priv->sarea_priv->last_frame);
1465 
1466 	ADVANCE_RING();
1467 }
1468 
bad_prim_vertex_nr(int primitive,int nr)1469 static int bad_prim_vertex_nr(int primitive, int nr)
1470 {
1471 	switch (primitive & RADEON_PRIM_TYPE_MASK) {
1472 	case RADEON_PRIM_TYPE_NONE:
1473 	case RADEON_PRIM_TYPE_POINT:
1474 		return nr < 1;
1475 	case RADEON_PRIM_TYPE_LINE:
1476 		return (nr & 1) || nr == 0;
1477 	case RADEON_PRIM_TYPE_LINE_STRIP:
1478 		return nr < 2;
1479 	case RADEON_PRIM_TYPE_TRI_LIST:
1480 	case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1481 	case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1482 	case RADEON_PRIM_TYPE_RECT_LIST:
1483 		return nr % 3 || nr == 0;
1484 	case RADEON_PRIM_TYPE_TRI_FAN:
1485 	case RADEON_PRIM_TYPE_TRI_STRIP:
1486 		return nr < 3;
1487 	default:
1488 		return 1;
1489 	}
1490 }
1491 
1492 typedef struct {
1493 	unsigned int start;
1494 	unsigned int finish;
1495 	unsigned int prim;
1496 	unsigned int numverts;
1497 	unsigned int offset;
1498 	unsigned int vc_format;
1499 } drm_radeon_tcl_prim_t;
1500 
radeon_cp_dispatch_vertex(struct drm_device * dev,struct drm_file * file_priv,struct drm_buf * buf,drm_radeon_tcl_prim_t * prim)1501 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1502 				      struct drm_file *file_priv,
1503 				      struct drm_buf * buf,
1504 				      drm_radeon_tcl_prim_t * prim)
1505 {
1506 	drm_radeon_private_t *dev_priv = dev->dev_private;
1507 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1508 	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1509 	int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1510 	int numverts = (int)prim->numverts;
1511 	int nbox = sarea_priv->nbox;
1512 	int i = 0;
1513 	RING_LOCALS;
1514 
1515 	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1516 		  prim->prim,
1517 		  prim->vc_format, prim->start, prim->finish, prim->numverts);
1518 
1519 	if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1520 		DRM_ERROR("bad prim %x numverts %d\n",
1521 			  prim->prim, prim->numverts);
1522 		return;
1523 	}
1524 
1525 	do {
1526 		/* Emit the next cliprect */
1527 		if (i < nbox) {
1528 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1529 		}
1530 
1531 		/* Emit the vertex buffer rendering commands */
1532 		BEGIN_RING(5);
1533 
1534 		OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1535 		OUT_RING(offset);
1536 		OUT_RING(numverts);
1537 		OUT_RING(prim->vc_format);
1538 		OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1539 			 RADEON_COLOR_ORDER_RGBA |
1540 			 RADEON_VTX_FMT_RADEON_MODE |
1541 			 (numverts << RADEON_NUM_VERTICES_SHIFT));
1542 
1543 		ADVANCE_RING();
1544 
1545 		i++;
1546 	} while (i < nbox);
1547 }
1548 
radeon_cp_discard_buffer(struct drm_device * dev,struct drm_master * master,struct drm_buf * buf)1549 static void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
1550 {
1551 	drm_radeon_private_t *dev_priv = dev->dev_private;
1552 	struct drm_radeon_master_private *master_priv = master->driver_priv;
1553 	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1554 	RING_LOCALS;
1555 
1556 	buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
1557 
1558 	/* Emit the vertex buffer age */
1559 	BEGIN_RING(2);
1560 	RADEON_DISPATCH_AGE(buf_priv->age);
1561 	ADVANCE_RING();
1562 
1563 	buf->pending = 1;
1564 	buf->used = 0;
1565 }
1566 
radeon_cp_dispatch_indirect(struct drm_device * dev,struct drm_buf * buf,int start,int end)1567 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1568 					struct drm_buf * buf, int start, int end)
1569 {
1570 	drm_radeon_private_t *dev_priv = dev->dev_private;
1571 	RING_LOCALS;
1572 	DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1573 
1574 	if (start != end) {
1575 		int offset = (dev_priv->gart_buffers_offset
1576 			      + buf->offset + start);
1577 		int dwords = (end - start + 3) / sizeof(u32);
1578 
1579 		/* Indirect buffer data must be an even number of
1580 		 * dwords, so if we've been given an odd number we must
1581 		 * pad the data with a Type-2 CP packet.
1582 		 */
1583 		if (dwords & 1) {
1584 			u32 *data = (u32 *)
1585 			    ((char *)dev->agp_buffer_map->handle
1586 			     + buf->offset + start);
1587 			data[dwords++] = RADEON_CP_PACKET2;
1588 		}
1589 
1590 		/* Fire off the indirect buffer */
1591 		BEGIN_RING(3);
1592 
1593 		OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1594 		OUT_RING(offset);
1595 		OUT_RING(dwords);
1596 
1597 		ADVANCE_RING();
1598 	}
1599 }
1600 
radeon_cp_dispatch_indices(struct drm_device * dev,struct drm_master * master,struct drm_buf * elt_buf,drm_radeon_tcl_prim_t * prim)1601 static void radeon_cp_dispatch_indices(struct drm_device *dev,
1602 				       struct drm_master *master,
1603 				       struct drm_buf * elt_buf,
1604 				       drm_radeon_tcl_prim_t * prim)
1605 {
1606 	drm_radeon_private_t *dev_priv = dev->dev_private;
1607 	struct drm_radeon_master_private *master_priv = master->driver_priv;
1608 	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1609 	int offset = dev_priv->gart_buffers_offset + prim->offset;
1610 	u32 *data;
1611 	int dwords;
1612 	int i = 0;
1613 	int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1614 	int count = (prim->finish - start) / sizeof(u16);
1615 	int nbox = sarea_priv->nbox;
1616 
1617 	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1618 		  prim->prim,
1619 		  prim->vc_format,
1620 		  prim->start, prim->finish, prim->offset, prim->numverts);
1621 
1622 	if (bad_prim_vertex_nr(prim->prim, count)) {
1623 		DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1624 		return;
1625 	}
1626 
1627 	if (start >= prim->finish || (prim->start & 0x7)) {
1628 		DRM_ERROR("buffer prim %d\n", prim->prim);
1629 		return;
1630 	}
1631 
1632 	dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1633 
1634 	data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1635 			elt_buf->offset + prim->start);
1636 
1637 	data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1638 	data[1] = offset;
1639 	data[2] = prim->numverts;
1640 	data[3] = prim->vc_format;
1641 	data[4] = (prim->prim |
1642 		   RADEON_PRIM_WALK_IND |
1643 		   RADEON_COLOR_ORDER_RGBA |
1644 		   RADEON_VTX_FMT_RADEON_MODE |
1645 		   (count << RADEON_NUM_VERTICES_SHIFT));
1646 
1647 	do {
1648 		if (i < nbox)
1649 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1650 
1651 		radeon_cp_dispatch_indirect(dev, elt_buf,
1652 					    prim->start, prim->finish);
1653 
1654 		i++;
1655 	} while (i < nbox);
1656 
1657 }
1658 
1659 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1660 
radeon_cp_dispatch_texture(struct drm_device * dev,struct drm_file * file_priv,drm_radeon_texture_t * tex,drm_radeon_tex_image_t * image)1661 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1662 				      struct drm_file *file_priv,
1663 				      drm_radeon_texture_t * tex,
1664 				      drm_radeon_tex_image_t * image)
1665 {
1666 	drm_radeon_private_t *dev_priv = dev->dev_private;
1667 	struct drm_buf *buf;
1668 	u32 format;
1669 	u32 *buffer;
1670 	const u8 __user *data;
1671 	int size, dwords, tex_width, blit_width, spitch;
1672 	u32 height;
1673 	int i;
1674 	u32 texpitch, microtile;
1675 	u32 offset, byte_offset;
1676 	RING_LOCALS;
1677 
1678 	if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1679 		DRM_ERROR("Invalid destination offset\n");
1680 		return -EINVAL;
1681 	}
1682 
1683 	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1684 
1685 	/* Flush the pixel cache.  This ensures no pixel data gets mixed
1686 	 * up with the texture data from the host data blit, otherwise
1687 	 * part of the texture image may be corrupted.
1688 	 */
1689 	BEGIN_RING(4);
1690 	RADEON_FLUSH_CACHE();
1691 	RADEON_WAIT_UNTIL_IDLE();
1692 	ADVANCE_RING();
1693 
1694 	/* The compiler won't optimize away a division by a variable,
1695 	 * even if the only legal values are powers of two.  Thus, we'll
1696 	 * use a shift instead.
1697 	 */
1698 	switch (tex->format) {
1699 	case RADEON_TXFORMAT_ARGB8888:
1700 	case RADEON_TXFORMAT_RGBA8888:
1701 		format = RADEON_COLOR_FORMAT_ARGB8888;
1702 		tex_width = tex->width * 4;
1703 		blit_width = image->width * 4;
1704 		break;
1705 	case RADEON_TXFORMAT_AI88:
1706 	case RADEON_TXFORMAT_ARGB1555:
1707 	case RADEON_TXFORMAT_RGB565:
1708 	case RADEON_TXFORMAT_ARGB4444:
1709 	case RADEON_TXFORMAT_VYUY422:
1710 	case RADEON_TXFORMAT_YVYU422:
1711 		format = RADEON_COLOR_FORMAT_RGB565;
1712 		tex_width = tex->width * 2;
1713 		blit_width = image->width * 2;
1714 		break;
1715 	case RADEON_TXFORMAT_I8:
1716 	case RADEON_TXFORMAT_RGB332:
1717 		format = RADEON_COLOR_FORMAT_CI8;
1718 		tex_width = tex->width * 1;
1719 		blit_width = image->width * 1;
1720 		break;
1721 	default:
1722 		DRM_ERROR("invalid texture format %d\n", tex->format);
1723 		return -EINVAL;
1724 	}
1725 	spitch = blit_width >> 6;
1726 	if (spitch == 0 && image->height > 1)
1727 		return -EINVAL;
1728 
1729 	texpitch = tex->pitch;
1730 	if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1731 		microtile = 1;
1732 		if (tex_width < 64) {
1733 			texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1734 			/* we got tiled coordinates, untile them */
1735 			image->x *= 2;
1736 		}
1737 	} else
1738 		microtile = 0;
1739 
1740 	/* this might fail for zero-sized uploads - are those illegal? */
1741 	if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1742 				blit_width - 1)) {
1743 		DRM_ERROR("Invalid final destination offset\n");
1744 		return -EINVAL;
1745 	}
1746 
1747 	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1748 
1749 	do {
1750 		DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1751 			  tex->offset >> 10, tex->pitch, tex->format,
1752 			  image->x, image->y, image->width, image->height);
1753 
1754 		/* Make a copy of some parameters in case we have to
1755 		 * update them for a multi-pass texture blit.
1756 		 */
1757 		height = image->height;
1758 		data = (const u8 __user *)image->data;
1759 
1760 		size = height * blit_width;
1761 
1762 		if (size > RADEON_MAX_TEXTURE_SIZE) {
1763 			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1764 			size = height * blit_width;
1765 		} else if (size < 4 && size > 0) {
1766 			size = 4;
1767 		} else if (size == 0) {
1768 			return 0;
1769 		}
1770 
1771 		buf = radeon_freelist_get(dev);
1772 		if (0 && !buf) {
1773 			radeon_do_cp_idle(dev_priv);
1774 			buf = radeon_freelist_get(dev);
1775 		}
1776 		if (!buf) {
1777 			DRM_DEBUG("EAGAIN\n");
1778 			if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1779 				return -EFAULT;
1780 			return -EAGAIN;
1781 		}
1782 
1783 		/* Dispatch the indirect buffer.
1784 		 */
1785 		buffer =
1786 		    (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1787 		dwords = size / 4;
1788 
1789 #define RADEON_COPY_MT(_buf, _data, _width) \
1790 	do { \
1791 		if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1792 			DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1793 			return -EFAULT; \
1794 		} \
1795 	} while(0)
1796 
1797 		if (microtile) {
1798 			/* texture micro tiling in use, minimum texture width is thus 16 bytes.
1799 			   however, we cannot use blitter directly for texture width < 64 bytes,
1800 			   since minimum tex pitch is 64 bytes and we need this to match
1801 			   the texture width, otherwise the blitter will tile it wrong.
1802 			   Thus, tiling manually in this case. Additionally, need to special
1803 			   case tex height = 1, since our actual image will have height 2
1804 			   and we need to ensure we don't read beyond the texture size
1805 			   from user space. */
1806 			if (tex->height == 1) {
1807 				if (tex_width >= 64 || tex_width <= 16) {
1808 					RADEON_COPY_MT(buffer, data,
1809 						(int)(tex_width * sizeof(u32)));
1810 				} else if (tex_width == 32) {
1811 					RADEON_COPY_MT(buffer, data, 16);
1812 					RADEON_COPY_MT(buffer + 8,
1813 						       data + 16, 16);
1814 				}
1815 			} else if (tex_width >= 64 || tex_width == 16) {
1816 				RADEON_COPY_MT(buffer, data,
1817 					       (int)(dwords * sizeof(u32)));
1818 			} else if (tex_width < 16) {
1819 				for (i = 0; i < tex->height; i++) {
1820 					RADEON_COPY_MT(buffer, data, tex_width);
1821 					buffer += 4;
1822 					data += tex_width;
1823 				}
1824 			} else if (tex_width == 32) {
1825 				/* TODO: make sure this works when not fitting in one buffer
1826 				   (i.e. 32bytes x 2048...) */
1827 				for (i = 0; i < tex->height; i += 2) {
1828 					RADEON_COPY_MT(buffer, data, 16);
1829 					data += 16;
1830 					RADEON_COPY_MT(buffer + 8, data, 16);
1831 					data += 16;
1832 					RADEON_COPY_MT(buffer + 4, data, 16);
1833 					data += 16;
1834 					RADEON_COPY_MT(buffer + 12, data, 16);
1835 					data += 16;
1836 					buffer += 16;
1837 				}
1838 			}
1839 		} else {
1840 			if (tex_width >= 32) {
1841 				/* Texture image width is larger than the minimum, so we
1842 				 * can upload it directly.
1843 				 */
1844 				RADEON_COPY_MT(buffer, data,
1845 					       (int)(dwords * sizeof(u32)));
1846 			} else {
1847 				/* Texture image width is less than the minimum, so we
1848 				 * need to pad out each image scanline to the minimum
1849 				 * width.
1850 				 */
1851 				for (i = 0; i < tex->height; i++) {
1852 					RADEON_COPY_MT(buffer, data, tex_width);
1853 					buffer += 8;
1854 					data += tex_width;
1855 				}
1856 			}
1857 		}
1858 
1859 #undef RADEON_COPY_MT
1860 		byte_offset = (image->y & ~2047) * blit_width;
1861 		buf->file_priv = file_priv;
1862 		buf->used = size;
1863 		offset = dev_priv->gart_buffers_offset + buf->offset;
1864 		BEGIN_RING(9);
1865 		OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1866 		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1867 			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1868 			 RADEON_GMC_BRUSH_NONE |
1869 			 (format << 8) |
1870 			 RADEON_GMC_SRC_DATATYPE_COLOR |
1871 			 RADEON_ROP3_S |
1872 			 RADEON_DP_SRC_SOURCE_MEMORY |
1873 			 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1874 		OUT_RING((spitch << 22) | (offset >> 10));
1875 		OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1876 		OUT_RING(0);
1877 		OUT_RING((image->x << 16) | (image->y % 2048));
1878 		OUT_RING((image->width << 16) | height);
1879 		RADEON_WAIT_UNTIL_2D_IDLE();
1880 		ADVANCE_RING();
1881 		COMMIT_RING();
1882 
1883 		radeon_cp_discard_buffer(dev, file_priv->master, buf);
1884 
1885 		/* Update the input parameters for next time */
1886 		image->y += height;
1887 		image->height -= height;
1888 		image->data = (const u8 __user *)image->data + size;
1889 	} while (image->height > 0);
1890 
1891 	/* Flush the pixel cache after the blit completes.  This ensures
1892 	 * the texture data is written out to memory before rendering
1893 	 * continues.
1894 	 */
1895 	BEGIN_RING(4);
1896 	RADEON_FLUSH_CACHE();
1897 	RADEON_WAIT_UNTIL_2D_IDLE();
1898 	ADVANCE_RING();
1899 	COMMIT_RING();
1900 
1901 	return 0;
1902 }
1903 
radeon_cp_dispatch_stipple(struct drm_device * dev,u32 * stipple)1904 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1905 {
1906 	drm_radeon_private_t *dev_priv = dev->dev_private;
1907 	int i;
1908 	RING_LOCALS;
1909 	DRM_DEBUG("\n");
1910 
1911 	BEGIN_RING(35);
1912 
1913 	OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1914 	OUT_RING(0x00000000);
1915 
1916 	OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1917 	for (i = 0; i < 32; i++) {
1918 		OUT_RING(stipple[i]);
1919 	}
1920 
1921 	ADVANCE_RING();
1922 }
1923 
radeon_apply_surface_regs(int surf_index,drm_radeon_private_t * dev_priv)1924 static void radeon_apply_surface_regs(int surf_index,
1925 				      drm_radeon_private_t *dev_priv)
1926 {
1927 	if (!dev_priv->mmio)
1928 		return;
1929 
1930 	radeon_do_cp_idle(dev_priv);
1931 
1932 	RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1933 		     dev_priv->surfaces[surf_index].flags);
1934 	RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1935 		     dev_priv->surfaces[surf_index].lower);
1936 	RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1937 		     dev_priv->surfaces[surf_index].upper);
1938 }
1939 
1940 /* Allocates a virtual surface
1941  * doesn't always allocate a real surface, will stretch an existing
1942  * surface when possible.
1943  *
1944  * Note that refcount can be at most 2, since during a free refcount=3
1945  * might mean we have to allocate a new surface which might not always
1946  * be available.
1947  * For example : we allocate three contigous surfaces ABC. If B is
1948  * freed, we suddenly need two surfaces to store A and C, which might
1949  * not always be available.
1950  */
alloc_surface(drm_radeon_surface_alloc_t * new,drm_radeon_private_t * dev_priv,struct drm_file * file_priv)1951 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1952 			 drm_radeon_private_t *dev_priv,
1953 			 struct drm_file *file_priv)
1954 {
1955 	struct radeon_virt_surface *s;
1956 	int i;
1957 	int virt_surface_index;
1958 	uint32_t new_upper, new_lower;
1959 
1960 	new_lower = new->address;
1961 	new_upper = new_lower + new->size - 1;
1962 
1963 	/* sanity check */
1964 	if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1965 	    ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1966 	     RADEON_SURF_ADDRESS_FIXED_MASK)
1967 	    || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1968 		return -1;
1969 
1970 	/* make sure there is no overlap with existing surfaces */
1971 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1972 		if ((dev_priv->surfaces[i].refcount != 0) &&
1973 		    (((new_lower >= dev_priv->surfaces[i].lower) &&
1974 		      (new_lower < dev_priv->surfaces[i].upper)) ||
1975 		     ((new_lower < dev_priv->surfaces[i].lower) &&
1976 		      (new_upper > dev_priv->surfaces[i].lower)))) {
1977 			return -1;
1978 		}
1979 	}
1980 
1981 	/* find a virtual surface */
1982 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1983 		if (dev_priv->virt_surfaces[i].file_priv == 0)
1984 			break;
1985 	if (i == 2 * RADEON_MAX_SURFACES) {
1986 		return -1;
1987 	}
1988 	virt_surface_index = i;
1989 
1990 	/* try to reuse an existing surface */
1991 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1992 		/* extend before */
1993 		if ((dev_priv->surfaces[i].refcount == 1) &&
1994 		    (new->flags == dev_priv->surfaces[i].flags) &&
1995 		    (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1996 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
1997 			s->surface_index = i;
1998 			s->lower = new_lower;
1999 			s->upper = new_upper;
2000 			s->flags = new->flags;
2001 			s->file_priv = file_priv;
2002 			dev_priv->surfaces[i].refcount++;
2003 			dev_priv->surfaces[i].lower = s->lower;
2004 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2005 			return virt_surface_index;
2006 		}
2007 
2008 		/* extend after */
2009 		if ((dev_priv->surfaces[i].refcount == 1) &&
2010 		    (new->flags == dev_priv->surfaces[i].flags) &&
2011 		    (new_lower == dev_priv->surfaces[i].upper + 1)) {
2012 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2013 			s->surface_index = i;
2014 			s->lower = new_lower;
2015 			s->upper = new_upper;
2016 			s->flags = new->flags;
2017 			s->file_priv = file_priv;
2018 			dev_priv->surfaces[i].refcount++;
2019 			dev_priv->surfaces[i].upper = s->upper;
2020 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2021 			return virt_surface_index;
2022 		}
2023 	}
2024 
2025 	/* okay, we need a new one */
2026 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2027 		if (dev_priv->surfaces[i].refcount == 0) {
2028 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2029 			s->surface_index = i;
2030 			s->lower = new_lower;
2031 			s->upper = new_upper;
2032 			s->flags = new->flags;
2033 			s->file_priv = file_priv;
2034 			dev_priv->surfaces[i].refcount = 1;
2035 			dev_priv->surfaces[i].lower = s->lower;
2036 			dev_priv->surfaces[i].upper = s->upper;
2037 			dev_priv->surfaces[i].flags = s->flags;
2038 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2039 			return virt_surface_index;
2040 		}
2041 	}
2042 
2043 	/* we didn't find anything */
2044 	return -1;
2045 }
2046 
free_surface(struct drm_file * file_priv,drm_radeon_private_t * dev_priv,int lower)2047 static int free_surface(struct drm_file *file_priv,
2048 			drm_radeon_private_t * dev_priv,
2049 			int lower)
2050 {
2051 	struct radeon_virt_surface *s;
2052 	int i;
2053 	/* find the virtual surface */
2054 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2055 		s = &(dev_priv->virt_surfaces[i]);
2056 		if (s->file_priv) {
2057 			if ((lower == s->lower) && (file_priv == s->file_priv))
2058 			{
2059 				if (dev_priv->surfaces[s->surface_index].
2060 				    lower == s->lower)
2061 					dev_priv->surfaces[s->surface_index].
2062 					    lower = s->upper;
2063 
2064 				if (dev_priv->surfaces[s->surface_index].
2065 				    upper == s->upper)
2066 					dev_priv->surfaces[s->surface_index].
2067 					    upper = s->lower;
2068 
2069 				dev_priv->surfaces[s->surface_index].refcount--;
2070 				if (dev_priv->surfaces[s->surface_index].
2071 				    refcount == 0)
2072 					dev_priv->surfaces[s->surface_index].
2073 					    flags = 0;
2074 				s->file_priv = NULL;
2075 				radeon_apply_surface_regs(s->surface_index,
2076 							  dev_priv);
2077 				return 0;
2078 			}
2079 		}
2080 	}
2081 	return 1;
2082 }
2083 
radeon_surfaces_release(struct drm_file * file_priv,drm_radeon_private_t * dev_priv)2084 static void radeon_surfaces_release(struct drm_file *file_priv,
2085 				    drm_radeon_private_t * dev_priv)
2086 {
2087 	int i;
2088 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2089 		if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2090 			free_surface(file_priv, dev_priv,
2091 				     dev_priv->virt_surfaces[i].lower);
2092 	}
2093 }
2094 
2095 /* ================================================================
2096  * IOCTL functions
2097  */
radeon_surface_alloc(struct drm_device * dev,void * data,struct drm_file * file_priv)2098 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2099 {
2100 	drm_radeon_private_t *dev_priv = dev->dev_private;
2101 	drm_radeon_surface_alloc_t *alloc = data;
2102 
2103 	if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2104 		return -EINVAL;
2105 	else
2106 		return 0;
2107 }
2108 
radeon_surface_free(struct drm_device * dev,void * data,struct drm_file * file_priv)2109 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2110 {
2111 	drm_radeon_private_t *dev_priv = dev->dev_private;
2112 	drm_radeon_surface_free_t *memfree = data;
2113 
2114 	if (free_surface(file_priv, dev_priv, memfree->address))
2115 		return -EINVAL;
2116 	else
2117 		return 0;
2118 }
2119 
radeon_cp_clear(struct drm_device * dev,void * data,struct drm_file * file_priv)2120 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2121 {
2122 	drm_radeon_private_t *dev_priv = dev->dev_private;
2123 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2124 	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2125 	drm_radeon_clear_t *clear = data;
2126 	drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2127 	DRM_DEBUG("\n");
2128 
2129 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2130 
2131 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2132 
2133 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2134 		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2135 
2136 	if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2137 			       sarea_priv->nbox * sizeof(depth_boxes[0])))
2138 		return -EFAULT;
2139 
2140 	radeon_cp_dispatch_clear(dev, file_priv->master, clear, depth_boxes);
2141 
2142 	COMMIT_RING();
2143 	return 0;
2144 }
2145 
2146 /* Not sure why this isn't set all the time:
2147  */
radeon_do_init_pageflip(struct drm_device * dev,struct drm_master * master)2148 static int radeon_do_init_pageflip(struct drm_device *dev, struct drm_master *master)
2149 {
2150 	drm_radeon_private_t *dev_priv = dev->dev_private;
2151 	struct drm_radeon_master_private *master_priv = master->driver_priv;
2152 	RING_LOCALS;
2153 
2154 	DRM_DEBUG("\n");
2155 
2156 	BEGIN_RING(6);
2157 	RADEON_WAIT_UNTIL_3D_IDLE();
2158 	OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2159 	OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2160 		 RADEON_CRTC_OFFSET_FLIP_CNTL);
2161 	OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2162 	OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2163 		 RADEON_CRTC_OFFSET_FLIP_CNTL);
2164 	ADVANCE_RING();
2165 
2166 	dev_priv->page_flipping = 1;
2167 
2168 	if (master_priv->sarea_priv->pfCurrentPage != 1)
2169 		master_priv->sarea_priv->pfCurrentPage = 0;
2170 
2171 	return 0;
2172 }
2173 
2174 /* Swapping and flipping are different operations, need different ioctls.
2175  * They can & should be intermixed to support multiple 3d windows.
2176  */
radeon_cp_flip(struct drm_device * dev,void * data,struct drm_file * file_priv)2177 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2178 {
2179 	drm_radeon_private_t *dev_priv = dev->dev_private;
2180 	DRM_DEBUG("\n");
2181 
2182 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2183 
2184 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2185 
2186 	if (!dev_priv->page_flipping)
2187 		radeon_do_init_pageflip(dev, file_priv->master);
2188 
2189 	radeon_cp_dispatch_flip(dev, file_priv->master);
2190 
2191 	COMMIT_RING();
2192 	return 0;
2193 }
2194 
radeon_cp_swap(struct drm_device * dev,void * data,struct drm_file * file_priv)2195 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2196 {
2197 	drm_radeon_private_t *dev_priv = dev->dev_private;
2198 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2199 	drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2200 
2201 	DRM_DEBUG("\n");
2202 
2203 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2204 
2205 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2206 
2207 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2208 		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2209 
2210 	radeon_cp_dispatch_swap(dev, file_priv->master);
2211 	sarea_priv->ctx_owner = 0;
2212 
2213 	COMMIT_RING();
2214 	return 0;
2215 }
2216 
radeon_cp_vertex(struct drm_device * dev,void * data,struct drm_file * file_priv)2217 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2218 {
2219 	drm_radeon_private_t *dev_priv = dev->dev_private;
2220 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2221 	drm_radeon_sarea_t *sarea_priv;
2222 	struct drm_device_dma *dma = dev->dma;
2223 	struct drm_buf *buf;
2224 	drm_radeon_vertex_t *vertex = data;
2225 	drm_radeon_tcl_prim_t prim;
2226 
2227 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2228 
2229 	sarea_priv = master_priv->sarea_priv;
2230 
2231 	DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2232 		  DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2233 
2234 	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2235 		DRM_ERROR("buffer index %d (of %d max)\n",
2236 			  vertex->idx, dma->buf_count - 1);
2237 		return -EINVAL;
2238 	}
2239 	if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2240 		DRM_ERROR("buffer prim %d\n", vertex->prim);
2241 		return -EINVAL;
2242 	}
2243 
2244 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2245 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2246 
2247 	buf = dma->buflist[vertex->idx];
2248 
2249 	if (buf->file_priv != file_priv) {
2250 		DRM_ERROR("process %d using buffer owned by %p\n",
2251 			  DRM_CURRENTPID, buf->file_priv);
2252 		return -EINVAL;
2253 	}
2254 	if (buf->pending) {
2255 		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2256 		return -EINVAL;
2257 	}
2258 
2259 	/* Build up a prim_t record:
2260 	 */
2261 	if (vertex->count) {
2262 		buf->used = vertex->count;	/* not used? */
2263 
2264 		if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2265 			if (radeon_emit_state(dev_priv, file_priv,
2266 					      &sarea_priv->context_state,
2267 					      sarea_priv->tex_state,
2268 					      sarea_priv->dirty)) {
2269 				DRM_ERROR("radeon_emit_state failed\n");
2270 				return -EINVAL;
2271 			}
2272 
2273 			sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2274 					       RADEON_UPLOAD_TEX1IMAGES |
2275 					       RADEON_UPLOAD_TEX2IMAGES |
2276 					       RADEON_REQUIRE_QUIESCENCE);
2277 		}
2278 
2279 		prim.start = 0;
2280 		prim.finish = vertex->count;	/* unused */
2281 		prim.prim = vertex->prim;
2282 		prim.numverts = vertex->count;
2283 		prim.vc_format = sarea_priv->vc_format;
2284 
2285 		radeon_cp_dispatch_vertex(dev, file_priv, buf, &prim);
2286 	}
2287 
2288 	if (vertex->discard) {
2289 		radeon_cp_discard_buffer(dev, file_priv->master, buf);
2290 	}
2291 
2292 	COMMIT_RING();
2293 	return 0;
2294 }
2295 
radeon_cp_indices(struct drm_device * dev,void * data,struct drm_file * file_priv)2296 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2297 {
2298 	drm_radeon_private_t *dev_priv = dev->dev_private;
2299 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2300 	drm_radeon_sarea_t *sarea_priv;
2301 	struct drm_device_dma *dma = dev->dma;
2302 	struct drm_buf *buf;
2303 	drm_radeon_indices_t *elts = data;
2304 	drm_radeon_tcl_prim_t prim;
2305 	int count;
2306 
2307 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2308 
2309 	sarea_priv = master_priv->sarea_priv;
2310 
2311 	DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2312 		  DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2313 		  elts->discard);
2314 
2315 	if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2316 		DRM_ERROR("buffer index %d (of %d max)\n",
2317 			  elts->idx, dma->buf_count - 1);
2318 		return -EINVAL;
2319 	}
2320 	if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2321 		DRM_ERROR("buffer prim %d\n", elts->prim);
2322 		return -EINVAL;
2323 	}
2324 
2325 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2326 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2327 
2328 	buf = dma->buflist[elts->idx];
2329 
2330 	if (buf->file_priv != file_priv) {
2331 		DRM_ERROR("process %d using buffer owned by %p\n",
2332 			  DRM_CURRENTPID, buf->file_priv);
2333 		return -EINVAL;
2334 	}
2335 	if (buf->pending) {
2336 		DRM_ERROR("sending pending buffer %d\n", elts->idx);
2337 		return -EINVAL;
2338 	}
2339 
2340 	count = (elts->end - elts->start) / sizeof(u16);
2341 	elts->start -= RADEON_INDEX_PRIM_OFFSET;
2342 
2343 	if (elts->start & 0x7) {
2344 		DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2345 		return -EINVAL;
2346 	}
2347 	if (elts->start < buf->used) {
2348 		DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2349 		return -EINVAL;
2350 	}
2351 
2352 	buf->used = elts->end;
2353 
2354 	if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2355 		if (radeon_emit_state(dev_priv, file_priv,
2356 				      &sarea_priv->context_state,
2357 				      sarea_priv->tex_state,
2358 				      sarea_priv->dirty)) {
2359 			DRM_ERROR("radeon_emit_state failed\n");
2360 			return -EINVAL;
2361 		}
2362 
2363 		sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2364 				       RADEON_UPLOAD_TEX1IMAGES |
2365 				       RADEON_UPLOAD_TEX2IMAGES |
2366 				       RADEON_REQUIRE_QUIESCENCE);
2367 	}
2368 
2369 	/* Build up a prim_t record:
2370 	 */
2371 	prim.start = elts->start;
2372 	prim.finish = elts->end;
2373 	prim.prim = elts->prim;
2374 	prim.offset = 0;	/* offset from start of dma buffers */
2375 	prim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
2376 	prim.vc_format = sarea_priv->vc_format;
2377 
2378 	radeon_cp_dispatch_indices(dev, file_priv->master, buf, &prim);
2379 	if (elts->discard) {
2380 		radeon_cp_discard_buffer(dev, file_priv->master, buf);
2381 	}
2382 
2383 	COMMIT_RING();
2384 	return 0;
2385 }
2386 
radeon_cp_texture(struct drm_device * dev,void * data,struct drm_file * file_priv)2387 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2388 {
2389 	drm_radeon_private_t *dev_priv = dev->dev_private;
2390 	drm_radeon_texture_t *tex = data;
2391 	drm_radeon_tex_image_t image;
2392 	int ret;
2393 
2394 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2395 
2396 	if (tex->image == NULL) {
2397 		DRM_ERROR("null texture image!\n");
2398 		return -EINVAL;
2399 	}
2400 
2401 	if (DRM_COPY_FROM_USER(&image,
2402 			       (drm_radeon_tex_image_t __user *) tex->image,
2403 			       sizeof(image)))
2404 		return -EFAULT;
2405 
2406 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2407 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2408 
2409 	ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2410 
2411 	return ret;
2412 }
2413 
radeon_cp_stipple(struct drm_device * dev,void * data,struct drm_file * file_priv)2414 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2415 {
2416 	drm_radeon_private_t *dev_priv = dev->dev_private;
2417 	drm_radeon_stipple_t *stipple = data;
2418 	u32 mask[32];
2419 
2420 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2421 
2422 	if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2423 		return -EFAULT;
2424 
2425 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2426 
2427 	radeon_cp_dispatch_stipple(dev, mask);
2428 
2429 	COMMIT_RING();
2430 	return 0;
2431 }
2432 
radeon_cp_indirect(struct drm_device * dev,void * data,struct drm_file * file_priv)2433 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2434 {
2435 	drm_radeon_private_t *dev_priv = dev->dev_private;
2436 	struct drm_device_dma *dma = dev->dma;
2437 	struct drm_buf *buf;
2438 	drm_radeon_indirect_t *indirect = data;
2439 	RING_LOCALS;
2440 
2441 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2442 
2443 	DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2444 		  indirect->idx, indirect->start, indirect->end,
2445 		  indirect->discard);
2446 
2447 	if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2448 		DRM_ERROR("buffer index %d (of %d max)\n",
2449 			  indirect->idx, dma->buf_count - 1);
2450 		return -EINVAL;
2451 	}
2452 
2453 	buf = dma->buflist[indirect->idx];
2454 
2455 	if (buf->file_priv != file_priv) {
2456 		DRM_ERROR("process %d using buffer owned by %p\n",
2457 			  DRM_CURRENTPID, buf->file_priv);
2458 		return -EINVAL;
2459 	}
2460 	if (buf->pending) {
2461 		DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2462 		return -EINVAL;
2463 	}
2464 
2465 	if (indirect->start < buf->used) {
2466 		DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2467 			  indirect->start, buf->used);
2468 		return -EINVAL;
2469 	}
2470 
2471 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2472 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2473 
2474 	buf->used = indirect->end;
2475 
2476 	/* Wait for the 3D stream to idle before the indirect buffer
2477 	 * containing 2D acceleration commands is processed.
2478 	 */
2479 	BEGIN_RING(2);
2480 
2481 	RADEON_WAIT_UNTIL_3D_IDLE();
2482 
2483 	ADVANCE_RING();
2484 
2485 	/* Dispatch the indirect buffer full of commands from the
2486 	 * X server.  This is insecure and is thus only available to
2487 	 * privileged clients.
2488 	 */
2489 	radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2490 	if (indirect->discard) {
2491 		radeon_cp_discard_buffer(dev, file_priv->master, buf);
2492 	}
2493 
2494 	COMMIT_RING();
2495 	return 0;
2496 }
2497 
radeon_cp_vertex2(struct drm_device * dev,void * data,struct drm_file * file_priv)2498 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2499 {
2500 	drm_radeon_private_t *dev_priv = dev->dev_private;
2501 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2502 	drm_radeon_sarea_t *sarea_priv;
2503 	struct drm_device_dma *dma = dev->dma;
2504 	struct drm_buf *buf;
2505 	drm_radeon_vertex2_t *vertex = data;
2506 	int i;
2507 	unsigned char laststate;
2508 
2509 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2510 
2511 	sarea_priv = master_priv->sarea_priv;
2512 
2513 	DRM_DEBUG("pid=%d index=%d discard=%d\n",
2514 		  DRM_CURRENTPID, vertex->idx, vertex->discard);
2515 
2516 	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2517 		DRM_ERROR("buffer index %d (of %d max)\n",
2518 			  vertex->idx, dma->buf_count - 1);
2519 		return -EINVAL;
2520 	}
2521 
2522 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2523 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2524 
2525 	buf = dma->buflist[vertex->idx];
2526 
2527 	if (buf->file_priv != file_priv) {
2528 		DRM_ERROR("process %d using buffer owned by %p\n",
2529 			  DRM_CURRENTPID, buf->file_priv);
2530 		return -EINVAL;
2531 	}
2532 
2533 	if (buf->pending) {
2534 		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2535 		return -EINVAL;
2536 	}
2537 
2538 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2539 		return -EINVAL;
2540 
2541 	for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2542 		drm_radeon_prim_t prim;
2543 		drm_radeon_tcl_prim_t tclprim;
2544 
2545 		if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2546 			return -EFAULT;
2547 
2548 		if (prim.stateidx != laststate) {
2549 			drm_radeon_state_t state;
2550 
2551 			if (DRM_COPY_FROM_USER(&state,
2552 					       &vertex->state[prim.stateidx],
2553 					       sizeof(state)))
2554 				return -EFAULT;
2555 
2556 			if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2557 				DRM_ERROR("radeon_emit_state2 failed\n");
2558 				return -EINVAL;
2559 			}
2560 
2561 			laststate = prim.stateidx;
2562 		}
2563 
2564 		tclprim.start = prim.start;
2565 		tclprim.finish = prim.finish;
2566 		tclprim.prim = prim.prim;
2567 		tclprim.vc_format = prim.vc_format;
2568 
2569 		if (prim.prim & RADEON_PRIM_WALK_IND) {
2570 			tclprim.offset = prim.numverts * 64;
2571 			tclprim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
2572 
2573 			radeon_cp_dispatch_indices(dev, file_priv->master, buf, &tclprim);
2574 		} else {
2575 			tclprim.numverts = prim.numverts;
2576 			tclprim.offset = 0;	/* not used */
2577 
2578 			radeon_cp_dispatch_vertex(dev, file_priv, buf, &tclprim);
2579 		}
2580 
2581 		if (sarea_priv->nbox == 1)
2582 			sarea_priv->nbox = 0;
2583 	}
2584 
2585 	if (vertex->discard) {
2586 		radeon_cp_discard_buffer(dev, file_priv->master, buf);
2587 	}
2588 
2589 	COMMIT_RING();
2590 	return 0;
2591 }
2592 
radeon_emit_packets(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2593 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2594 			       struct drm_file *file_priv,
2595 			       drm_radeon_cmd_header_t header,
2596 			       drm_radeon_kcmd_buffer_t *cmdbuf)
2597 {
2598 	int id = (int)header.packet.packet_id;
2599 	int sz, reg;
2600 	int *data = (int *)cmdbuf->buf;
2601 	RING_LOCALS;
2602 
2603 	if (id >= RADEON_MAX_STATE_PACKETS)
2604 		return -EINVAL;
2605 
2606 	sz = packet[id].len;
2607 	reg = packet[id].start;
2608 
2609 	if (sz * sizeof(int) > cmdbuf->bufsz) {
2610 		DRM_ERROR("Packet size provided larger than data provided\n");
2611 		return -EINVAL;
2612 	}
2613 
2614 	if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
2615 		DRM_ERROR("Packet verification failed\n");
2616 		return -EINVAL;
2617 	}
2618 
2619 	BEGIN_RING(sz + 1);
2620 	OUT_RING(CP_PACKET0(reg, (sz - 1)));
2621 	OUT_RING_TABLE(data, sz);
2622 	ADVANCE_RING();
2623 
2624 	cmdbuf->buf += sz * sizeof(int);
2625 	cmdbuf->bufsz -= sz * sizeof(int);
2626 	return 0;
2627 }
2628 
radeon_emit_scalars(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2629 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2630 					  drm_radeon_cmd_header_t header,
2631 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2632 {
2633 	int sz = header.scalars.count;
2634 	int start = header.scalars.offset;
2635 	int stride = header.scalars.stride;
2636 	RING_LOCALS;
2637 
2638 	BEGIN_RING(3 + sz);
2639 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2640 	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2641 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2642 	OUT_RING_TABLE(cmdbuf->buf, sz);
2643 	ADVANCE_RING();
2644 	cmdbuf->buf += sz * sizeof(int);
2645 	cmdbuf->bufsz -= sz * sizeof(int);
2646 	return 0;
2647 }
2648 
2649 /* God this is ugly
2650  */
radeon_emit_scalars2(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2651 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2652 					   drm_radeon_cmd_header_t header,
2653 					   drm_radeon_kcmd_buffer_t *cmdbuf)
2654 {
2655 	int sz = header.scalars.count;
2656 	int start = ((unsigned int)header.scalars.offset) + 0x100;
2657 	int stride = header.scalars.stride;
2658 	RING_LOCALS;
2659 
2660 	BEGIN_RING(3 + sz);
2661 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2662 	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2663 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2664 	OUT_RING_TABLE(cmdbuf->buf, sz);
2665 	ADVANCE_RING();
2666 	cmdbuf->buf += sz * sizeof(int);
2667 	cmdbuf->bufsz -= sz * sizeof(int);
2668 	return 0;
2669 }
2670 
radeon_emit_vectors(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2671 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2672 					  drm_radeon_cmd_header_t header,
2673 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2674 {
2675 	int sz = header.vectors.count;
2676 	int start = header.vectors.offset;
2677 	int stride = header.vectors.stride;
2678 	RING_LOCALS;
2679 
2680 	BEGIN_RING(5 + sz);
2681 	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2682 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2683 	OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2684 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2685 	OUT_RING_TABLE(cmdbuf->buf, sz);
2686 	ADVANCE_RING();
2687 
2688 	cmdbuf->buf += sz * sizeof(int);
2689 	cmdbuf->bufsz -= sz * sizeof(int);
2690 	return 0;
2691 }
2692 
radeon_emit_veclinear(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2693 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2694 					  drm_radeon_cmd_header_t header,
2695 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2696 {
2697 	int sz = header.veclinear.count * 4;
2698 	int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2699 	RING_LOCALS;
2700 
2701         if (!sz)
2702                 return 0;
2703         if (sz * 4 > cmdbuf->bufsz)
2704                 return -EINVAL;
2705 
2706 	BEGIN_RING(5 + sz);
2707 	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2708 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2709 	OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2710 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2711 	OUT_RING_TABLE(cmdbuf->buf, sz);
2712 	ADVANCE_RING();
2713 
2714 	cmdbuf->buf += sz * sizeof(int);
2715 	cmdbuf->bufsz -= sz * sizeof(int);
2716 	return 0;
2717 }
2718 
radeon_emit_packet3(struct drm_device * dev,struct drm_file * file_priv,drm_radeon_kcmd_buffer_t * cmdbuf)2719 static int radeon_emit_packet3(struct drm_device * dev,
2720 			       struct drm_file *file_priv,
2721 			       drm_radeon_kcmd_buffer_t *cmdbuf)
2722 {
2723 	drm_radeon_private_t *dev_priv = dev->dev_private;
2724 	unsigned int cmdsz;
2725 	int ret;
2726 	RING_LOCALS;
2727 
2728 	DRM_DEBUG("\n");
2729 
2730 	if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2731 						  cmdbuf, &cmdsz))) {
2732 		DRM_ERROR("Packet verification failed\n");
2733 		return ret;
2734 	}
2735 
2736 	BEGIN_RING(cmdsz);
2737 	OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2738 	ADVANCE_RING();
2739 
2740 	cmdbuf->buf += cmdsz * 4;
2741 	cmdbuf->bufsz -= cmdsz * 4;
2742 	return 0;
2743 }
2744 
radeon_emit_packet3_cliprect(struct drm_device * dev,struct drm_file * file_priv,drm_radeon_kcmd_buffer_t * cmdbuf,int orig_nbox)2745 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2746 					struct drm_file *file_priv,
2747 					drm_radeon_kcmd_buffer_t *cmdbuf,
2748 					int orig_nbox)
2749 {
2750 	drm_radeon_private_t *dev_priv = dev->dev_private;
2751 	struct drm_clip_rect box;
2752 	unsigned int cmdsz;
2753 	int ret;
2754 	struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2755 	int i = 0;
2756 	RING_LOCALS;
2757 
2758 	DRM_DEBUG("\n");
2759 
2760 	if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2761 						  cmdbuf, &cmdsz))) {
2762 		DRM_ERROR("Packet verification failed\n");
2763 		return ret;
2764 	}
2765 
2766 	if (!orig_nbox)
2767 		goto out;
2768 
2769 	do {
2770 		if (i < cmdbuf->nbox) {
2771 			if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2772 				return -EFAULT;
2773 			/* FIXME The second and subsequent times round
2774 			 * this loop, send a WAIT_UNTIL_3D_IDLE before
2775 			 * calling emit_clip_rect(). This fixes a
2776 			 * lockup on fast machines when sending
2777 			 * several cliprects with a cmdbuf, as when
2778 			 * waving a 2D window over a 3D
2779 			 * window. Something in the commands from user
2780 			 * space seems to hang the card when they're
2781 			 * sent several times in a row. That would be
2782 			 * the correct place to fix it but this works
2783 			 * around it until I can figure that out - Tim
2784 			 * Smith */
2785 			if (i) {
2786 				BEGIN_RING(2);
2787 				RADEON_WAIT_UNTIL_3D_IDLE();
2788 				ADVANCE_RING();
2789 			}
2790 			radeon_emit_clip_rect(dev_priv, &box);
2791 		}
2792 
2793 		BEGIN_RING(cmdsz);
2794 		OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2795 		ADVANCE_RING();
2796 
2797 	} while (++i < cmdbuf->nbox);
2798 	if (cmdbuf->nbox == 1)
2799 		cmdbuf->nbox = 0;
2800 
2801       out:
2802 	cmdbuf->buf += cmdsz * 4;
2803 	cmdbuf->bufsz -= cmdsz * 4;
2804 	return 0;
2805 }
2806 
radeon_emit_wait(struct drm_device * dev,int flags)2807 static int radeon_emit_wait(struct drm_device * dev, int flags)
2808 {
2809 	drm_radeon_private_t *dev_priv = dev->dev_private;
2810 	RING_LOCALS;
2811 
2812 	DRM_DEBUG("%x\n", flags);
2813 	switch (flags) {
2814 	case RADEON_WAIT_2D:
2815 		BEGIN_RING(2);
2816 		RADEON_WAIT_UNTIL_2D_IDLE();
2817 		ADVANCE_RING();
2818 		break;
2819 	case RADEON_WAIT_3D:
2820 		BEGIN_RING(2);
2821 		RADEON_WAIT_UNTIL_3D_IDLE();
2822 		ADVANCE_RING();
2823 		break;
2824 	case RADEON_WAIT_2D | RADEON_WAIT_3D:
2825 		BEGIN_RING(2);
2826 		RADEON_WAIT_UNTIL_IDLE();
2827 		ADVANCE_RING();
2828 		break;
2829 	default:
2830 		return -EINVAL;
2831 	}
2832 
2833 	return 0;
2834 }
2835 
radeon_cp_cmdbuf(struct drm_device * dev,void * data,struct drm_file * file_priv)2836 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
2837 {
2838 	drm_radeon_private_t *dev_priv = dev->dev_private;
2839 	struct drm_device_dma *dma = dev->dma;
2840 	struct drm_buf *buf = NULL;
2841 	int idx;
2842 	drm_radeon_kcmd_buffer_t *cmdbuf = data;
2843 	drm_radeon_cmd_header_t header;
2844 	int orig_nbox, orig_bufsz;
2845 	char *kbuf = NULL;
2846 
2847 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2848 
2849 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2850 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2851 
2852 	if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2853 		return -EINVAL;
2854 	}
2855 
2856 	/* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2857 	 * races between checking values and using those values in other code,
2858 	 * and simply to avoid a lot of function calls to copy in data.
2859 	 */
2860 	orig_bufsz = cmdbuf->bufsz;
2861 	if (orig_bufsz != 0) {
2862 		kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER);
2863 		if (kbuf == NULL)
2864 			return -ENOMEM;
2865 		if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
2866 				       cmdbuf->bufsz)) {
2867 			drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2868 			return -EFAULT;
2869 		}
2870 		cmdbuf->buf = kbuf;
2871 	}
2872 
2873 	orig_nbox = cmdbuf->nbox;
2874 
2875 	if (dev_priv->microcode_version == UCODE_R300) {
2876 		int temp;
2877 		temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2878 
2879 		if (orig_bufsz != 0)
2880 			drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2881 
2882 		return temp;
2883 	}
2884 
2885 	/* microcode_version != r300 */
2886 	while (cmdbuf->bufsz >= sizeof(header)) {
2887 
2888 		header.i = *(int *)cmdbuf->buf;
2889 		cmdbuf->buf += sizeof(header);
2890 		cmdbuf->bufsz -= sizeof(header);
2891 
2892 		switch (header.header.cmd_type) {
2893 		case RADEON_CMD_PACKET:
2894 			DRM_DEBUG("RADEON_CMD_PACKET\n");
2895 			if (radeon_emit_packets
2896 			    (dev_priv, file_priv, header, cmdbuf)) {
2897 				DRM_ERROR("radeon_emit_packets failed\n");
2898 				goto err;
2899 			}
2900 			break;
2901 
2902 		case RADEON_CMD_SCALARS:
2903 			DRM_DEBUG("RADEON_CMD_SCALARS\n");
2904 			if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
2905 				DRM_ERROR("radeon_emit_scalars failed\n");
2906 				goto err;
2907 			}
2908 			break;
2909 
2910 		case RADEON_CMD_VECTORS:
2911 			DRM_DEBUG("RADEON_CMD_VECTORS\n");
2912 			if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
2913 				DRM_ERROR("radeon_emit_vectors failed\n");
2914 				goto err;
2915 			}
2916 			break;
2917 
2918 		case RADEON_CMD_DMA_DISCARD:
2919 			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2920 			idx = header.dma.buf_idx;
2921 			if (idx < 0 || idx >= dma->buf_count) {
2922 				DRM_ERROR("buffer index %d (of %d max)\n",
2923 					  idx, dma->buf_count - 1);
2924 				goto err;
2925 			}
2926 
2927 			buf = dma->buflist[idx];
2928 			if (buf->file_priv != file_priv || buf->pending) {
2929 				DRM_ERROR("bad buffer %p %p %d\n",
2930 					  buf->file_priv, file_priv,
2931 					  buf->pending);
2932 				goto err;
2933 			}
2934 
2935 			radeon_cp_discard_buffer(dev, file_priv->master, buf);
2936 			break;
2937 
2938 		case RADEON_CMD_PACKET3:
2939 			DRM_DEBUG("RADEON_CMD_PACKET3\n");
2940 			if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2941 				DRM_ERROR("radeon_emit_packet3 failed\n");
2942 				goto err;
2943 			}
2944 			break;
2945 
2946 		case RADEON_CMD_PACKET3_CLIP:
2947 			DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2948 			if (radeon_emit_packet3_cliprect
2949 			    (dev, file_priv, cmdbuf, orig_nbox)) {
2950 				DRM_ERROR("radeon_emit_packet3_clip failed\n");
2951 				goto err;
2952 			}
2953 			break;
2954 
2955 		case RADEON_CMD_SCALARS2:
2956 			DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2957 			if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
2958 				DRM_ERROR("radeon_emit_scalars2 failed\n");
2959 				goto err;
2960 			}
2961 			break;
2962 
2963 		case RADEON_CMD_WAIT:
2964 			DRM_DEBUG("RADEON_CMD_WAIT\n");
2965 			if (radeon_emit_wait(dev, header.wait.flags)) {
2966 				DRM_ERROR("radeon_emit_wait failed\n");
2967 				goto err;
2968 			}
2969 			break;
2970 		case RADEON_CMD_VECLINEAR:
2971 			DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2972 			if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
2973 				DRM_ERROR("radeon_emit_veclinear failed\n");
2974 				goto err;
2975 			}
2976 			break;
2977 
2978 		default:
2979 			DRM_ERROR("bad cmd_type %d at %p\n",
2980 				  header.header.cmd_type,
2981 				  cmdbuf->buf - sizeof(header));
2982 			goto err;
2983 		}
2984 	}
2985 
2986 	if (orig_bufsz != 0)
2987 		drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2988 
2989 	DRM_DEBUG("DONE\n");
2990 	COMMIT_RING();
2991 	return 0;
2992 
2993       err:
2994 	if (orig_bufsz != 0)
2995 		drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2996 	return -EINVAL;
2997 }
2998 
radeon_cp_getparam(struct drm_device * dev,void * data,struct drm_file * file_priv)2999 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3000 {
3001 	drm_radeon_private_t *dev_priv = dev->dev_private;
3002 	drm_radeon_getparam_t *param = data;
3003 	int value;
3004 
3005 	DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3006 
3007 	switch (param->param) {
3008 	case RADEON_PARAM_GART_BUFFER_OFFSET:
3009 		value = dev_priv->gart_buffers_offset;
3010 		break;
3011 	case RADEON_PARAM_LAST_FRAME:
3012 		dev_priv->stats.last_frame_reads++;
3013 		value = GET_SCRATCH(0);
3014 		break;
3015 	case RADEON_PARAM_LAST_DISPATCH:
3016 		value = GET_SCRATCH(1);
3017 		break;
3018 	case RADEON_PARAM_LAST_CLEAR:
3019 		dev_priv->stats.last_clear_reads++;
3020 		value = GET_SCRATCH(2);
3021 		break;
3022 	case RADEON_PARAM_IRQ_NR:
3023 		value = drm_dev_to_irq(dev);
3024 		break;
3025 	case RADEON_PARAM_GART_BASE:
3026 		value = dev_priv->gart_vm_start;
3027 		break;
3028 	case RADEON_PARAM_REGISTER_HANDLE:
3029 		value = dev_priv->mmio->offset;
3030 		break;
3031 	case RADEON_PARAM_STATUS_HANDLE:
3032 		value = dev_priv->ring_rptr_offset;
3033 		break;
3034 #if BITS_PER_LONG == 32
3035 		/*
3036 		 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3037 		 * pointer which can't fit into an int-sized variable.  According to
3038 		 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3039 		 * not supporting it shouldn't be a problem.  If the same functionality
3040 		 * is needed on 64-bit platforms, a new ioctl() would have to be added,
3041 		 * so backwards-compatibility for the embedded platforms can be
3042 		 * maintained.  --davidm 4-Feb-2004.
3043 		 */
3044 	case RADEON_PARAM_SAREA_HANDLE:
3045 		/* The lock is the first dword in the sarea. */
3046 		/* no users of this parameter */
3047 		break;
3048 #endif
3049 	case RADEON_PARAM_GART_TEX_HANDLE:
3050 		value = dev_priv->gart_textures_offset;
3051 		break;
3052 	case RADEON_PARAM_SCRATCH_OFFSET:
3053 		if (!dev_priv->writeback_works)
3054 			return -EINVAL;
3055 		value = RADEON_SCRATCH_REG_OFFSET;
3056 		break;
3057 	case RADEON_PARAM_CARD_TYPE:
3058 		if (dev_priv->flags & RADEON_IS_PCIE)
3059 			value = RADEON_CARD_PCIE;
3060 		else if (dev_priv->flags & RADEON_IS_AGP)
3061 			value = RADEON_CARD_AGP;
3062 		else
3063 			value = RADEON_CARD_PCI;
3064 		break;
3065 	case RADEON_PARAM_VBLANK_CRTC:
3066 		value = radeon_vblank_crtc_get(dev);
3067 		break;
3068 	case RADEON_PARAM_FB_LOCATION:
3069 		value = radeon_read_fb_location(dev_priv);
3070 		break;
3071 	case RADEON_PARAM_NUM_GB_PIPES:
3072 		value = dev_priv->num_gb_pipes;
3073 		break;
3074 	default:
3075 		DRM_DEBUG("Invalid parameter %d\n", param->param);
3076 		return -EINVAL;
3077 	}
3078 
3079 	if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3080 		DRM_ERROR("copy_to_user\n");
3081 		return -EFAULT;
3082 	}
3083 
3084 	return 0;
3085 }
3086 
radeon_cp_setparam(struct drm_device * dev,void * data,struct drm_file * file_priv)3087 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3088 {
3089 	drm_radeon_private_t *dev_priv = dev->dev_private;
3090 	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
3091 	drm_radeon_setparam_t *sp = data;
3092 	struct drm_radeon_driver_file_fields *radeon_priv;
3093 
3094 	switch (sp->param) {
3095 	case RADEON_SETPARAM_FB_LOCATION:
3096 		radeon_priv = file_priv->driver_priv;
3097 		radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3098 		    sp->value;
3099 		break;
3100 	case RADEON_SETPARAM_SWITCH_TILING:
3101 		if (sp->value == 0) {
3102 			DRM_DEBUG("color tiling disabled\n");
3103 			dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3104 			dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3105 			if (master_priv->sarea_priv)
3106 				master_priv->sarea_priv->tiling_enabled = 0;
3107 		} else if (sp->value == 1) {
3108 			DRM_DEBUG("color tiling enabled\n");
3109 			dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3110 			dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3111 			if (master_priv->sarea_priv)
3112 				master_priv->sarea_priv->tiling_enabled = 1;
3113 		}
3114 		break;
3115 	case RADEON_SETPARAM_PCIGART_LOCATION:
3116 		dev_priv->pcigart_offset = sp->value;
3117 		dev_priv->pcigart_offset_set = 1;
3118 		break;
3119 	case RADEON_SETPARAM_NEW_MEMMAP:
3120 		dev_priv->new_memmap = sp->value;
3121 		break;
3122 	case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3123 		dev_priv->gart_info.table_size = sp->value;
3124 		if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3125 			dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3126 		break;
3127 	case RADEON_SETPARAM_VBLANK_CRTC:
3128 		return radeon_vblank_crtc_set(dev, sp->value);
3129 		break;
3130 	default:
3131 		DRM_DEBUG("Invalid parameter %d\n", sp->param);
3132 		return -EINVAL;
3133 	}
3134 
3135 	return 0;
3136 }
3137 
3138 /* When a client dies:
3139  *    - Check for and clean up flipped page state
3140  *    - Free any alloced GART memory.
3141  *    - Free any alloced radeon surfaces.
3142  *
3143  * DRM infrastructure takes care of reclaiming dma buffers.
3144  */
radeon_driver_preclose(struct drm_device * dev,struct drm_file * file_priv)3145 void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
3146 {
3147 	if (dev->dev_private) {
3148 		drm_radeon_private_t *dev_priv = dev->dev_private;
3149 		dev_priv->page_flipping = 0;
3150 		radeon_mem_release(file_priv, dev_priv->gart_heap);
3151 		radeon_mem_release(file_priv, dev_priv->fb_heap);
3152 		radeon_surfaces_release(file_priv, dev_priv);
3153 	}
3154 }
3155 
radeon_driver_lastclose(struct drm_device * dev)3156 void radeon_driver_lastclose(struct drm_device *dev)
3157 {
3158 	radeon_do_release(dev);
3159 }
3160 
radeon_driver_open(struct drm_device * dev,struct drm_file * file_priv)3161 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3162 {
3163 	drm_radeon_private_t *dev_priv = dev->dev_private;
3164 	struct drm_radeon_driver_file_fields *radeon_priv;
3165 
3166 	DRM_DEBUG("\n");
3167 	radeon_priv =
3168 	    (struct drm_radeon_driver_file_fields *)
3169 	    drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3170 
3171 	if (!radeon_priv)
3172 		return -ENOMEM;
3173 
3174 	file_priv->driver_priv = radeon_priv;
3175 
3176 	if (dev_priv)
3177 		radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3178 	else
3179 		radeon_priv->radeon_fb_delta = 0;
3180 	return 0;
3181 }
3182 
radeon_driver_postclose(struct drm_device * dev,struct drm_file * file_priv)3183 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3184 {
3185 	struct drm_radeon_driver_file_fields *radeon_priv =
3186 	    file_priv->driver_priv;
3187 
3188 	drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3189 }
3190 
3191 struct drm_ioctl_desc radeon_ioctls[] = {
3192 	DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3193 	DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3194 	DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3195 	DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3196 	DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3197 	DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3198 	DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3199 	DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3200 	DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3201 	DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3202 	DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3203 	DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3204 	DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3205 	DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3206 	DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3207 	DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3208 	DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3209 	DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3210 	DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3211 	DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3212 	DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
3213 	DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3214 	DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3215 	DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3216 	DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3217 	DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3218 	DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH)
3219 };
3220 
3221 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
3222