• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2 
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
5 
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
9 
10 All Rights Reserved.
11 
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
18 
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
21 Software.
22 
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
30 
31 **************************************************************************/
32 
33 #include <errno.h>
34 #include "radeon_common.h"
35 #include "radeon_fog.h"
36 #include "util/simple_list.h"
37 #include "util/u_memory.h"
38 
39 #if defined(USE_X86_ASM)
40 #define COPY_DWORDS( dst, src, nr )					\
41 do {									\
42 	int __tmp;							\
43 	__asm__ __volatile__( "rep ; movsl"				\
44 			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
45 			      : "0" (nr),				\
46 			        "D" ((long)dst),			\
47 			        "S" ((long)src) );			\
48 } while (0)
49 #else
50 #define COPY_DWORDS( dst, src, nr )		\
51 do {						\
52    int j;					\
53    for ( j = 0 ; j < nr ; j++ )			\
54       dst[j] = ((int *)src)[j];			\
55    dst += nr;					\
56 } while (0)
57 #endif
58 
radeonEmitVec4(uint32_t * out,const GLvoid * data,int stride,int count)59 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
60 {
61 	int i;
62 
63 	if (RADEON_DEBUG & RADEON_VERTS)
64 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
65 			__func__, count, stride, (void *)out, (void *)data);
66 
67 	if (stride == 4)
68 		COPY_DWORDS(out, data, count);
69 	else
70 		for (i = 0; i < count; i++) {
71 			out[0] = *(int *)data;
72 			out++;
73 			data += stride;
74 		}
75 }
76 
radeonEmitVec8(uint32_t * out,const GLvoid * data,int stride,int count)77 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
78 {
79 	int i;
80 
81 	if (RADEON_DEBUG & RADEON_VERTS)
82 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
83 			__func__, count, stride, (void *)out, (void *)data);
84 
85 	if (stride == 8)
86 		COPY_DWORDS(out, data, count * 2);
87 	else
88 		for (i = 0; i < count; i++) {
89 			out[0] = *(int *)data;
90 			out[1] = *(int *)(data + 4);
91 			out += 2;
92 			data += stride;
93 		}
94 }
95 
radeonEmitVec12(uint32_t * out,const GLvoid * data,int stride,int count)96 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
97 {
98 	int i;
99 
100 	if (RADEON_DEBUG & RADEON_VERTS)
101 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
102 			__func__, count, stride, (void *)out, (void *)data);
103 
104 	if (stride == 12) {
105 		COPY_DWORDS(out, data, count * 3);
106     }
107 	else
108 		for (i = 0; i < count; i++) {
109 			out[0] = *(int *)data;
110 			out[1] = *(int *)(data + 4);
111 			out[2] = *(int *)(data + 8);
112 			out += 3;
113 			data += stride;
114 		}
115 }
116 
radeonEmitVec16(uint32_t * out,const GLvoid * data,int stride,int count)117 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
118 {
119 	int i;
120 
121 	if (RADEON_DEBUG & RADEON_VERTS)
122 		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
123 			__func__, count, stride, (void *)out, (void *)data);
124 
125 	if (stride == 16)
126 		COPY_DWORDS(out, data, count * 4);
127 	else
128 		for (i = 0; i < count; i++) {
129 			out[0] = *(int *)data;
130 			out[1] = *(int *)(data + 4);
131 			out[2] = *(int *)(data + 8);
132 			out[3] = *(int *)(data + 12);
133 			out += 4;
134 			data += stride;
135 		}
136 }
137 
rcommon_emit_vector(struct gl_context * ctx,struct radeon_aos * aos,const GLvoid * data,int size,int stride,int count)138 void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos,
139 			 const GLvoid * data, int size, int stride, int count)
140 {
141 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
142 	uint32_t *out;
143 
144 	if (stride == 0) {
145 		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
146 		count = 1;
147 		aos->stride = 0;
148 	} else {
149 		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
150 		aos->stride = size;
151 	}
152 
153 	aos->components = size;
154 	aos->count = count;
155 
156 	radeon_bo_map(aos->bo, 1);
157 	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
158 	switch (size) {
159 	case 1: radeonEmitVec4(out, data, stride, count); break;
160 	case 2: radeonEmitVec8(out, data, stride, count); break;
161 	case 3: radeonEmitVec12(out, data, stride, count); break;
162 	case 4: radeonEmitVec16(out, data, stride, count); break;
163 	default:
164 		assert(0);
165 		break;
166 	}
167 	radeon_bo_unmap(aos->bo);
168 }
169 
rcommon_emit_vecfog(struct gl_context * ctx,struct radeon_aos * aos,GLvoid * data,int stride,int count)170 void rcommon_emit_vecfog(struct gl_context *ctx, struct radeon_aos *aos,
171 			 GLvoid *data, int stride, int count)
172 {
173 	int i;
174 	float *out;
175 	int size = 1;
176 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
177 
178 	if (RADEON_DEBUG & RADEON_VERTS)
179 		fprintf(stderr, "%s count %d stride %d\n",
180 			__func__, count, stride);
181 
182 	if (stride == 0) {
183 		radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
184 		count = 1;
185 		aos->stride = 0;
186 	} else {
187 		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
188 		aos->stride = size;
189 	}
190 
191 	aos->components = size;
192 	aos->count = count;
193 
194 	/* Emit the data */
195 	radeon_bo_map(aos->bo, 1);
196 	out = (float*)((char*)aos->bo->ptr + aos->offset);
197 	for (i = 0; i < count; i++) {
198 		out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
199 		out++;
200 		data += stride;
201 	}
202 	radeon_bo_unmap(aos->bo);
203 }
204 
radeon_init_dma(radeonContextPtr rmesa)205 void radeon_init_dma(radeonContextPtr rmesa)
206 {
207 	make_empty_list(&rmesa->dma.free);
208 	make_empty_list(&rmesa->dma.wait);
209 	make_empty_list(&rmesa->dma.reserved);
210 	rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
211 }
212 
radeonRefillCurrentDmaRegion(radeonContextPtr rmesa,int size)213 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
214 {
215 	struct radeon_dma_bo *dma_bo = NULL;
216 	/* we set minimum sizes to at least requested size
217 	   aligned to next 16 bytes. */
218 	if (size > rmesa->dma.minimum_size)
219 		rmesa->dma.minimum_size = (size + 15) & (~15);
220 
221 	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %zi\n",
222 			__func__, size, rmesa->dma.minimum_size);
223 
224 	if (is_empty_list(&rmesa->dma.free)
225 	      || last_elem(&rmesa->dma.free)->bo->size < size) {
226 		dma_bo = CALLOC_STRUCT(radeon_dma_bo);
227 		assert(dma_bo);
228 
229 again_alloc:
230 		dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
231 					    0, rmesa->dma.minimum_size, 4,
232 					    RADEON_GEM_DOMAIN_GTT, 0);
233 
234 		if (!dma_bo->bo) {
235 			rcommonFlushCmdBuf(rmesa, __func__);
236 			goto again_alloc;
237 		}
238 		insert_at_head(&rmesa->dma.reserved, dma_bo);
239 	} else {
240 		/* We push and pop buffers from end of list so we can keep
241 		   counter on unused buffers for later freeing them from
242 		   begin of list */
243 		dma_bo = last_elem(&rmesa->dma.free);
244 		remove_from_list(dma_bo);
245 		insert_at_head(&rmesa->dma.reserved, dma_bo);
246 	}
247 
248 	rmesa->dma.current_used = 0;
249 	rmesa->dma.current_vertexptr = 0;
250 
251 	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
252 					  first_elem(&rmesa->dma.reserved)->bo,
253 					  RADEON_GEM_DOMAIN_GTT, 0))
254 		fprintf(stderr,"failure to revalidate BOs - badness\n");
255 
256 	if (is_empty_list(&rmesa->dma.reserved)) {
257         /* Cmd buff have been flushed in radeon_revalidate_bos */
258 		goto again_alloc;
259 	}
260 	radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
261 }
262 
263 /* Allocates a region from rmesa->dma.current.  If there isn't enough
264  * space in current, grab a new buffer (and discard what was left of current)
265  */
radeonAllocDmaRegion(radeonContextPtr rmesa,struct radeon_bo ** pbo,int * poffset,int bytes,int alignment)266 void radeonAllocDmaRegion(radeonContextPtr rmesa,
267 			  struct radeon_bo **pbo, int *poffset,
268 			  int bytes, int alignment)
269 {
270 	if (RADEON_DEBUG & RADEON_IOCTL)
271 		fprintf(stderr, "%s %d\n", __func__, bytes);
272 
273 	if (rmesa->dma.flush)
274 		rmesa->dma.flush(&rmesa->glCtx);
275 
276 	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
277 
278 	alignment--;
279 	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
280 
281 	if (is_empty_list(&rmesa->dma.reserved)
282 		|| rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
283 		radeonRefillCurrentDmaRegion(rmesa, bytes);
284 
285 	*poffset = rmesa->dma.current_used;
286 	*pbo = first_elem(&rmesa->dma.reserved)->bo;
287 	radeon_bo_ref(*pbo);
288 
289 	/* Always align to at least 16 bytes */
290 	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
291 	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
292 
293 	assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
294 }
295 
radeonFreeDmaRegions(radeonContextPtr rmesa)296 void radeonFreeDmaRegions(radeonContextPtr rmesa)
297 {
298 	struct radeon_dma_bo *dma_bo;
299 	struct radeon_dma_bo *temp;
300 	if (RADEON_DEBUG & RADEON_DMA)
301 		fprintf(stderr, "%s\n", __func__);
302 
303 	foreach_s(dma_bo, temp, &rmesa->dma.free) {
304 		remove_from_list(dma_bo);
305 	        radeon_bo_unref(dma_bo->bo);
306 		free(dma_bo);
307 	}
308 
309 	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
310 		remove_from_list(dma_bo);
311 	        radeon_bo_unref(dma_bo->bo);
312 		free(dma_bo);
313 	}
314 
315 	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
316 		remove_from_list(dma_bo);
317 	        radeon_bo_unref(dma_bo->bo);
318 		free(dma_bo);
319 	}
320 }
321 
radeonReturnDmaRegion(radeonContextPtr rmesa,int return_bytes)322 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
323 {
324 	if (is_empty_list(&rmesa->dma.reserved))
325 		return;
326 
327 	if (RADEON_DEBUG & RADEON_IOCTL)
328 		fprintf(stderr, "%s %d\n", __func__, return_bytes);
329 	rmesa->dma.current_used -= return_bytes;
330 	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
331 }
332 
radeon_bo_is_idle(struct radeon_bo * bo)333 static int radeon_bo_is_idle(struct radeon_bo* bo)
334 {
335 	uint32_t domain;
336 	int ret = radeon_bo_is_busy(bo, &domain);
337 	if (ret == -EINVAL) {
338 		WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
339 			"This may cause small performance drop for you.\n");
340 	}
341 	return ret != -EBUSY;
342 }
343 
radeonReleaseDmaRegions(radeonContextPtr rmesa)344 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
345 {
346 	struct radeon_dma_bo *dma_bo;
347 	struct radeon_dma_bo *temp;
348 	const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
349 	const int time = rmesa->dma.free.expire_counter;
350 
351 	if (RADEON_DEBUG & RADEON_DMA) {
352 		size_t free = 0,
353 		       wait = 0,
354 		       reserved = 0;
355 		foreach(dma_bo, &rmesa->dma.free)
356 			++free;
357 
358 		foreach(dma_bo, &rmesa->dma.wait)
359 			++wait;
360 
361 		foreach(dma_bo, &rmesa->dma.reserved)
362 			++reserved;
363 
364 		fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
365 		      __func__, free, wait, reserved, rmesa->dma.minimum_size);
366 	}
367 
368 	/* move waiting bos to free list.
369 	   wait list provides gpu time to handle data before reuse */
370 	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
371 		if (dma_bo->expire_counter == time) {
372 			WARN_ONCE("Leaking dma buffer object!\n");
373 			radeon_bo_unref(dma_bo->bo);
374 			remove_from_list(dma_bo);
375 			free(dma_bo);
376 			continue;
377 		}
378 		/* free objects that are too small to be used because of large request */
379 		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
380 		   radeon_bo_unref(dma_bo->bo);
381 		   remove_from_list(dma_bo);
382 		   free(dma_bo);
383 		   continue;
384 		}
385 		if (!radeon_bo_is_idle(dma_bo->bo)) {
386 			break;
387 		}
388 		remove_from_list(dma_bo);
389 		dma_bo->expire_counter = expire_at;
390 		insert_at_tail(&rmesa->dma.free, dma_bo);
391 	}
392 
393 	/* move reserved to wait list */
394 	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
395 		radeon_bo_unmap(dma_bo->bo);
396 		/* free objects that are too small to be used because of large request */
397 		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
398 		   radeon_bo_unref(dma_bo->bo);
399 		   remove_from_list(dma_bo);
400 		   free(dma_bo);
401 		   continue;
402 		}
403 		remove_from_list(dma_bo);
404 		dma_bo->expire_counter = expire_at;
405 		insert_at_tail(&rmesa->dma.wait, dma_bo);
406 	}
407 
408 	/* free bos that have been unused for some time */
409 	foreach_s(dma_bo, temp, &rmesa->dma.free) {
410 		if (dma_bo->expire_counter != time)
411 			break;
412 		remove_from_list(dma_bo);
413 	        radeon_bo_unref(dma_bo->bo);
414 		free(dma_bo);
415 	}
416 
417 }
418 
419 
420 /* Flush vertices in the current dma region.
421  */
rcommon_flush_last_swtcl_prim(struct gl_context * ctx)422 void rcommon_flush_last_swtcl_prim( struct gl_context *ctx  )
423 {
424 	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
425 	struct radeon_dma *dma = &rmesa->dma;
426 
427 	if (RADEON_DEBUG & RADEON_IOCTL)
428 		fprintf(stderr, "%s\n", __func__);
429 	dma->flush = NULL;
430 
431 	radeon_bo_unmap(rmesa->swtcl.bo);
432 
433 	if (!is_empty_list(&dma->reserved)) {
434 	    GLuint current_offset = dma->current_used;
435 
436 	    assert (dma->current_used +
437 		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
438 		    dma->current_vertexptr);
439 
440 	    if (dma->current_used != dma->current_vertexptr) {
441 		    dma->current_used = dma->current_vertexptr;
442 
443 		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
444 	    }
445 	    rmesa->swtcl.numverts = 0;
446 	}
447 	radeon_bo_unref(rmesa->swtcl.bo);
448 	rmesa->swtcl.bo = NULL;
449 }
450 /* Alloc space in the current dma region.
451  */
452 void *
rcommonAllocDmaLowVerts(radeonContextPtr rmesa,int nverts,int vsize)453 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
454 {
455 	GLuint bytes = vsize * nverts;
456 	void *head;
457 	if (RADEON_DEBUG & RADEON_IOCTL)
458 		fprintf(stderr, "%s\n", __func__);
459 
460 	if(is_empty_list(&rmesa->dma.reserved)
461 	      ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
462 		if (rmesa->dma.flush) {
463 			rmesa->dma.flush(&rmesa->glCtx);
464 		}
465 
466                 radeonRefillCurrentDmaRegion(rmesa, bytes);
467 
468 		return NULL;
469 	}
470 
471         if (!rmesa->dma.flush) {
472 		/* if cmdbuf flushed DMA restart */
473                 rmesa->glCtx.Driver.NeedFlush |= FLUSH_STORED_VERTICES;
474                 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
475         }
476 
477 	assert( vsize == rmesa->swtcl.vertex_size * 4 );
478         assert( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
479         assert( rmesa->dma.current_used +
480                 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
481                 rmesa->dma.current_vertexptr );
482 
483 	if (!rmesa->swtcl.bo) {
484 		rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
485 		radeon_bo_ref(rmesa->swtcl.bo);
486 		radeon_bo_map(rmesa->swtcl.bo, 1);
487 	}
488 
489 	head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
490 	rmesa->dma.current_vertexptr += bytes;
491 	rmesa->swtcl.numverts += nverts;
492 	return head;
493 }
494 
radeonReleaseArrays(struct gl_context * ctx,GLuint newinputs)495 void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs )
496 {
497    radeonContextPtr radeon = RADEON_CONTEXT( ctx );
498    int i;
499 	if (RADEON_DEBUG & RADEON_IOCTL)
500 		fprintf(stderr, "%s\n", __func__);
501 
502    if (radeon->dma.flush) {
503        radeon->dma.flush(&radeon->glCtx);
504    }
505    for (i = 0; i < radeon->tcl.aos_count; i++) {
506       if (radeon->tcl.aos[i].bo) {
507          radeon_bo_unref(radeon->tcl.aos[i].bo);
508          radeon->tcl.aos[i].bo = NULL;
509 
510       }
511    }
512 }
513