1 /**************************************************************************
2
3 Copyright (C) 2004 Nicolai Haehnle.
4 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
5
6 The Weather Channel (TM) funded Tungsten Graphics to develop the
7 initial release of the Radeon 8500 driver under the XFree86 license.
8 This notice must be preserved.
9
10 All Rights Reserved.
11
12 Permission is hereby granted, free of charge, to any person obtaining a
13 copy of this software and associated documentation files (the "Software"),
14 to deal in the Software without restriction, including without limitation
15 on the rights to use, copy, modify, merge, publish, distribute, sub
16 license, and/or sell copies of the Software, and to permit persons to whom
17 the Software is furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice (including the next
20 paragraph) shall be included in all copies or substantial portions of the
21 Software.
22
23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31 **************************************************************************/
32
33 #include <errno.h>
34 #include "radeon_common.h"
35 #include "radeon_fog.h"
36 #include "util/simple_list.h"
37 #include "util/u_memory.h"
38
39 #if defined(USE_X86_ASM)
40 #define COPY_DWORDS( dst, src, nr ) \
41 do { \
42 int __tmp; \
43 __asm__ __volatile__( "rep ; movsl" \
44 : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \
45 : "0" (nr), \
46 "D" ((long)dst), \
47 "S" ((long)src) ); \
48 } while (0)
49 #else
50 #define COPY_DWORDS( dst, src, nr ) \
51 do { \
52 int j; \
53 for ( j = 0 ; j < nr ; j++ ) \
54 dst[j] = ((int *)src)[j]; \
55 dst += nr; \
56 } while (0)
57 #endif
58
radeonEmitVec4(uint32_t * out,const GLvoid * data,int stride,int count)59 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
60 {
61 int i;
62
63 if (RADEON_DEBUG & RADEON_VERTS)
64 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
65 __func__, count, stride, (void *)out, (void *)data);
66
67 if (stride == 4)
68 COPY_DWORDS(out, data, count);
69 else
70 for (i = 0; i < count; i++) {
71 out[0] = *(int *)data;
72 out++;
73 data += stride;
74 }
75 }
76
radeonEmitVec8(uint32_t * out,const GLvoid * data,int stride,int count)77 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
78 {
79 int i;
80
81 if (RADEON_DEBUG & RADEON_VERTS)
82 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
83 __func__, count, stride, (void *)out, (void *)data);
84
85 if (stride == 8)
86 COPY_DWORDS(out, data, count * 2);
87 else
88 for (i = 0; i < count; i++) {
89 out[0] = *(int *)data;
90 out[1] = *(int *)(data + 4);
91 out += 2;
92 data += stride;
93 }
94 }
95
radeonEmitVec12(uint32_t * out,const GLvoid * data,int stride,int count)96 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
97 {
98 int i;
99
100 if (RADEON_DEBUG & RADEON_VERTS)
101 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
102 __func__, count, stride, (void *)out, (void *)data);
103
104 if (stride == 12) {
105 COPY_DWORDS(out, data, count * 3);
106 }
107 else
108 for (i = 0; i < count; i++) {
109 out[0] = *(int *)data;
110 out[1] = *(int *)(data + 4);
111 out[2] = *(int *)(data + 8);
112 out += 3;
113 data += stride;
114 }
115 }
116
radeonEmitVec16(uint32_t * out,const GLvoid * data,int stride,int count)117 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
118 {
119 int i;
120
121 if (RADEON_DEBUG & RADEON_VERTS)
122 fprintf(stderr, "%s count %d stride %d out %p data %p\n",
123 __func__, count, stride, (void *)out, (void *)data);
124
125 if (stride == 16)
126 COPY_DWORDS(out, data, count * 4);
127 else
128 for (i = 0; i < count; i++) {
129 out[0] = *(int *)data;
130 out[1] = *(int *)(data + 4);
131 out[2] = *(int *)(data + 8);
132 out[3] = *(int *)(data + 12);
133 out += 4;
134 data += stride;
135 }
136 }
137
rcommon_emit_vector(struct gl_context * ctx,struct radeon_aos * aos,const GLvoid * data,int size,int stride,int count)138 void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos,
139 const GLvoid * data, int size, int stride, int count)
140 {
141 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
142 uint32_t *out;
143
144 if (stride == 0) {
145 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
146 count = 1;
147 aos->stride = 0;
148 } else {
149 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
150 aos->stride = size;
151 }
152
153 aos->components = size;
154 aos->count = count;
155
156 radeon_bo_map(aos->bo, 1);
157 out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
158 switch (size) {
159 case 1: radeonEmitVec4(out, data, stride, count); break;
160 case 2: radeonEmitVec8(out, data, stride, count); break;
161 case 3: radeonEmitVec12(out, data, stride, count); break;
162 case 4: radeonEmitVec16(out, data, stride, count); break;
163 default:
164 assert(0);
165 break;
166 }
167 radeon_bo_unmap(aos->bo);
168 }
169
rcommon_emit_vecfog(struct gl_context * ctx,struct radeon_aos * aos,GLvoid * data,int stride,int count)170 void rcommon_emit_vecfog(struct gl_context *ctx, struct radeon_aos *aos,
171 GLvoid *data, int stride, int count)
172 {
173 int i;
174 float *out;
175 int size = 1;
176 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
177
178 if (RADEON_DEBUG & RADEON_VERTS)
179 fprintf(stderr, "%s count %d stride %d\n",
180 __func__, count, stride);
181
182 if (stride == 0) {
183 radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
184 count = 1;
185 aos->stride = 0;
186 } else {
187 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
188 aos->stride = size;
189 }
190
191 aos->components = size;
192 aos->count = count;
193
194 /* Emit the data */
195 radeon_bo_map(aos->bo, 1);
196 out = (float*)((char*)aos->bo->ptr + aos->offset);
197 for (i = 0; i < count; i++) {
198 out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
199 out++;
200 data += stride;
201 }
202 radeon_bo_unmap(aos->bo);
203 }
204
radeon_init_dma(radeonContextPtr rmesa)205 void radeon_init_dma(radeonContextPtr rmesa)
206 {
207 make_empty_list(&rmesa->dma.free);
208 make_empty_list(&rmesa->dma.wait);
209 make_empty_list(&rmesa->dma.reserved);
210 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
211 }
212
radeonRefillCurrentDmaRegion(radeonContextPtr rmesa,int size)213 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
214 {
215 struct radeon_dma_bo *dma_bo = NULL;
216 /* we set minimum sizes to at least requested size
217 aligned to next 16 bytes. */
218 if (size > rmesa->dma.minimum_size)
219 rmesa->dma.minimum_size = (size + 15) & (~15);
220
221 radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %zi\n",
222 __func__, size, rmesa->dma.minimum_size);
223
224 if (is_empty_list(&rmesa->dma.free)
225 || last_elem(&rmesa->dma.free)->bo->size < size) {
226 dma_bo = CALLOC_STRUCT(radeon_dma_bo);
227 assert(dma_bo);
228
229 again_alloc:
230 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
231 0, rmesa->dma.minimum_size, 4,
232 RADEON_GEM_DOMAIN_GTT, 0);
233
234 if (!dma_bo->bo) {
235 rcommonFlushCmdBuf(rmesa, __func__);
236 goto again_alloc;
237 }
238 insert_at_head(&rmesa->dma.reserved, dma_bo);
239 } else {
240 /* We push and pop buffers from end of list so we can keep
241 counter on unused buffers for later freeing them from
242 begin of list */
243 dma_bo = last_elem(&rmesa->dma.free);
244 remove_from_list(dma_bo);
245 insert_at_head(&rmesa->dma.reserved, dma_bo);
246 }
247
248 rmesa->dma.current_used = 0;
249 rmesa->dma.current_vertexptr = 0;
250
251 if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
252 first_elem(&rmesa->dma.reserved)->bo,
253 RADEON_GEM_DOMAIN_GTT, 0))
254 fprintf(stderr,"failure to revalidate BOs - badness\n");
255
256 if (is_empty_list(&rmesa->dma.reserved)) {
257 /* Cmd buff have been flushed in radeon_revalidate_bos */
258 goto again_alloc;
259 }
260 radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
261 }
262
263 /* Allocates a region from rmesa->dma.current. If there isn't enough
264 * space in current, grab a new buffer (and discard what was left of current)
265 */
radeonAllocDmaRegion(radeonContextPtr rmesa,struct radeon_bo ** pbo,int * poffset,int bytes,int alignment)266 void radeonAllocDmaRegion(radeonContextPtr rmesa,
267 struct radeon_bo **pbo, int *poffset,
268 int bytes, int alignment)
269 {
270 if (RADEON_DEBUG & RADEON_IOCTL)
271 fprintf(stderr, "%s %d\n", __func__, bytes);
272
273 if (rmesa->dma.flush)
274 rmesa->dma.flush(&rmesa->glCtx);
275
276 assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
277
278 alignment--;
279 rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
280
281 if (is_empty_list(&rmesa->dma.reserved)
282 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
283 radeonRefillCurrentDmaRegion(rmesa, bytes);
284
285 *poffset = rmesa->dma.current_used;
286 *pbo = first_elem(&rmesa->dma.reserved)->bo;
287 radeon_bo_ref(*pbo);
288
289 /* Always align to at least 16 bytes */
290 rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
291 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
292
293 assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
294 }
295
radeonFreeDmaRegions(radeonContextPtr rmesa)296 void radeonFreeDmaRegions(radeonContextPtr rmesa)
297 {
298 struct radeon_dma_bo *dma_bo;
299 struct radeon_dma_bo *temp;
300 if (RADEON_DEBUG & RADEON_DMA)
301 fprintf(stderr, "%s\n", __func__);
302
303 foreach_s(dma_bo, temp, &rmesa->dma.free) {
304 remove_from_list(dma_bo);
305 radeon_bo_unref(dma_bo->bo);
306 free(dma_bo);
307 }
308
309 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
310 remove_from_list(dma_bo);
311 radeon_bo_unref(dma_bo->bo);
312 free(dma_bo);
313 }
314
315 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
316 remove_from_list(dma_bo);
317 radeon_bo_unref(dma_bo->bo);
318 free(dma_bo);
319 }
320 }
321
radeonReturnDmaRegion(radeonContextPtr rmesa,int return_bytes)322 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
323 {
324 if (is_empty_list(&rmesa->dma.reserved))
325 return;
326
327 if (RADEON_DEBUG & RADEON_IOCTL)
328 fprintf(stderr, "%s %d\n", __func__, return_bytes);
329 rmesa->dma.current_used -= return_bytes;
330 rmesa->dma.current_vertexptr = rmesa->dma.current_used;
331 }
332
radeon_bo_is_idle(struct radeon_bo * bo)333 static int radeon_bo_is_idle(struct radeon_bo* bo)
334 {
335 uint32_t domain;
336 int ret = radeon_bo_is_busy(bo, &domain);
337 if (ret == -EINVAL) {
338 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
339 "This may cause small performance drop for you.\n");
340 }
341 return ret != -EBUSY;
342 }
343
radeonReleaseDmaRegions(radeonContextPtr rmesa)344 void radeonReleaseDmaRegions(radeonContextPtr rmesa)
345 {
346 struct radeon_dma_bo *dma_bo;
347 struct radeon_dma_bo *temp;
348 const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
349 const int time = rmesa->dma.free.expire_counter;
350
351 if (RADEON_DEBUG & RADEON_DMA) {
352 size_t free = 0,
353 wait = 0,
354 reserved = 0;
355 foreach(dma_bo, &rmesa->dma.free)
356 ++free;
357
358 foreach(dma_bo, &rmesa->dma.wait)
359 ++wait;
360
361 foreach(dma_bo, &rmesa->dma.reserved)
362 ++reserved;
363
364 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
365 __func__, free, wait, reserved, rmesa->dma.minimum_size);
366 }
367
368 /* move waiting bos to free list.
369 wait list provides gpu time to handle data before reuse */
370 foreach_s(dma_bo, temp, &rmesa->dma.wait) {
371 if (dma_bo->expire_counter == time) {
372 WARN_ONCE("Leaking dma buffer object!\n");
373 radeon_bo_unref(dma_bo->bo);
374 remove_from_list(dma_bo);
375 free(dma_bo);
376 continue;
377 }
378 /* free objects that are too small to be used because of large request */
379 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
380 radeon_bo_unref(dma_bo->bo);
381 remove_from_list(dma_bo);
382 free(dma_bo);
383 continue;
384 }
385 if (!radeon_bo_is_idle(dma_bo->bo)) {
386 break;
387 }
388 remove_from_list(dma_bo);
389 dma_bo->expire_counter = expire_at;
390 insert_at_tail(&rmesa->dma.free, dma_bo);
391 }
392
393 /* move reserved to wait list */
394 foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
395 radeon_bo_unmap(dma_bo->bo);
396 /* free objects that are too small to be used because of large request */
397 if (dma_bo->bo->size < rmesa->dma.minimum_size) {
398 radeon_bo_unref(dma_bo->bo);
399 remove_from_list(dma_bo);
400 free(dma_bo);
401 continue;
402 }
403 remove_from_list(dma_bo);
404 dma_bo->expire_counter = expire_at;
405 insert_at_tail(&rmesa->dma.wait, dma_bo);
406 }
407
408 /* free bos that have been unused for some time */
409 foreach_s(dma_bo, temp, &rmesa->dma.free) {
410 if (dma_bo->expire_counter != time)
411 break;
412 remove_from_list(dma_bo);
413 radeon_bo_unref(dma_bo->bo);
414 free(dma_bo);
415 }
416
417 }
418
419
420 /* Flush vertices in the current dma region.
421 */
rcommon_flush_last_swtcl_prim(struct gl_context * ctx)422 void rcommon_flush_last_swtcl_prim( struct gl_context *ctx )
423 {
424 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
425 struct radeon_dma *dma = &rmesa->dma;
426
427 if (RADEON_DEBUG & RADEON_IOCTL)
428 fprintf(stderr, "%s\n", __func__);
429 dma->flush = NULL;
430
431 radeon_bo_unmap(rmesa->swtcl.bo);
432
433 if (!is_empty_list(&dma->reserved)) {
434 GLuint current_offset = dma->current_used;
435
436 assert (dma->current_used +
437 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
438 dma->current_vertexptr);
439
440 if (dma->current_used != dma->current_vertexptr) {
441 dma->current_used = dma->current_vertexptr;
442
443 rmesa->vtbl.swtcl_flush(ctx, current_offset);
444 }
445 rmesa->swtcl.numverts = 0;
446 }
447 radeon_bo_unref(rmesa->swtcl.bo);
448 rmesa->swtcl.bo = NULL;
449 }
450 /* Alloc space in the current dma region.
451 */
452 void *
rcommonAllocDmaLowVerts(radeonContextPtr rmesa,int nverts,int vsize)453 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
454 {
455 GLuint bytes = vsize * nverts;
456 void *head;
457 if (RADEON_DEBUG & RADEON_IOCTL)
458 fprintf(stderr, "%s\n", __func__);
459
460 if(is_empty_list(&rmesa->dma.reserved)
461 ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
462 if (rmesa->dma.flush) {
463 rmesa->dma.flush(&rmesa->glCtx);
464 }
465
466 radeonRefillCurrentDmaRegion(rmesa, bytes);
467
468 return NULL;
469 }
470
471 if (!rmesa->dma.flush) {
472 /* if cmdbuf flushed DMA restart */
473 rmesa->glCtx.Driver.NeedFlush |= FLUSH_STORED_VERTICES;
474 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
475 }
476
477 assert( vsize == rmesa->swtcl.vertex_size * 4 );
478 assert( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
479 assert( rmesa->dma.current_used +
480 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
481 rmesa->dma.current_vertexptr );
482
483 if (!rmesa->swtcl.bo) {
484 rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
485 radeon_bo_ref(rmesa->swtcl.bo);
486 radeon_bo_map(rmesa->swtcl.bo, 1);
487 }
488
489 head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
490 rmesa->dma.current_vertexptr += bytes;
491 rmesa->swtcl.numverts += nverts;
492 return head;
493 }
494
radeonReleaseArrays(struct gl_context * ctx,GLuint newinputs)495 void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs )
496 {
497 radeonContextPtr radeon = RADEON_CONTEXT( ctx );
498 int i;
499 if (RADEON_DEBUG & RADEON_IOCTL)
500 fprintf(stderr, "%s\n", __func__);
501
502 if (radeon->dma.flush) {
503 radeon->dma.flush(&radeon->glCtx);
504 }
505 for (i = 0; i < radeon->tcl.aos_count; i++) {
506 if (radeon->tcl.aos[i].bo) {
507 radeon_bo_unref(radeon->tcl.aos[i].bo);
508 radeon->tcl.aos[i].bo = NULL;
509
510 }
511 }
512 }
513