1 /*
2 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3 * Copyright 2010 Marek Olšák <maraeo@gmail.com>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "compiler/nir/nir.h"
25 #include "util/format/u_format.h"
26 #include "util/format/u_format_s3tc.h"
27 #include "util/u_screen.h"
28 #include "util/u_memory.h"
29 #include "util/hex.h"
30 #include "util/os_time.h"
31 #include "vl/vl_decoder.h"
32 #include "vl/vl_video_buffer.h"
33
34 #include "r300_context.h"
35 #include "r300_texture.h"
36 #include "r300_screen_buffer.h"
37 #include "r300_state_inlines.h"
38 #include "r300_public.h"
39 #include "compiler/r300_nir.h"
40
41 #include "draw/draw_context.h"
42
43 /* Return the identifier behind whom the brave coders responsible for this
44 * amalgamation of code, sweat, and duct tape, routinely obscure their names.
45 *
46 * ...I should have just put "Corbin Simpson", but I'm not that cool.
47 *
48 * (Or egotistical. Yet.) */
r300_get_vendor(struct pipe_screen * pscreen)49 static const char* r300_get_vendor(struct pipe_screen* pscreen)
50 {
51 return "Mesa";
52 }
53
r300_get_device_vendor(struct pipe_screen * pscreen)54 static const char* r300_get_device_vendor(struct pipe_screen* pscreen)
55 {
56 return "ATI";
57 }
58
59 static const char* chip_families[] = {
60 "unknown",
61 "ATI R300",
62 "ATI R350",
63 "ATI RV350",
64 "ATI RV370",
65 "ATI RV380",
66 "ATI RS400",
67 "ATI RC410",
68 "ATI RS480",
69 "ATI R420",
70 "ATI R423",
71 "ATI R430",
72 "ATI R480",
73 "ATI R481",
74 "ATI RV410",
75 "ATI RS600",
76 "ATI RS690",
77 "ATI RS740",
78 "ATI RV515",
79 "ATI R520",
80 "ATI RV530",
81 "ATI R580",
82 "ATI RV560",
83 "ATI RV570"
84 };
85
r300_get_family_name(struct r300_screen * r300screen)86 static const char* r300_get_family_name(struct r300_screen* r300screen)
87 {
88 return chip_families[r300screen->caps.family];
89 }
90
r300_get_name(struct pipe_screen * pscreen)91 static const char* r300_get_name(struct pipe_screen* pscreen)
92 {
93 struct r300_screen* r300screen = r300_screen(pscreen);
94
95 return r300_get_family_name(r300screen);
96 }
97
r300_disk_cache_create(struct r300_screen * r300screen)98 static void r300_disk_cache_create(struct r300_screen* r300screen)
99 {
100 struct mesa_sha1 ctx;
101 unsigned char sha1[20];
102 char cache_id[20 * 2 + 1];
103
104 _mesa_sha1_init(&ctx);
105 if (!disk_cache_get_function_identifier(r300_disk_cache_create,
106 &ctx))
107 return;
108
109 _mesa_sha1_final(&ctx, sha1);
110 mesa_bytes_to_hex(cache_id, sha1, 20);
111
112 r300screen->disk_shader_cache =
113 disk_cache_create(r300_get_family_name(r300screen),
114 cache_id,
115 r300screen->debug);
116 }
117
r300_get_disk_shader_cache(struct pipe_screen * pscreen)118 static struct disk_cache* r300_get_disk_shader_cache(struct pipe_screen* pscreen)
119 {
120 struct r300_screen* r300screen = r300_screen(pscreen);
121 return r300screen->disk_shader_cache;
122 }
123
r300_get_param(struct pipe_screen * pscreen,enum pipe_cap param)124 static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
125 {
126 struct r300_screen* r300screen = r300_screen(pscreen);
127 bool is_r500 = r300screen->caps.is_r500;
128
129 switch (param) {
130 /* Supported features (boolean caps). */
131 case PIPE_CAP_NPOT_TEXTURES:
132 case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
133 case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
134 case PIPE_CAP_ANISOTROPIC_FILTER:
135 case PIPE_CAP_OCCLUSION_QUERY:
136 case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
137 case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
138 case PIPE_CAP_BLEND_EQUATION_SEPARATE:
139 case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
140 case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT:
141 case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
142 case PIPE_CAP_CONDITIONAL_RENDER:
143 case PIPE_CAP_TEXTURE_BARRIER:
144 case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
145 case PIPE_CAP_CLIP_HALFZ:
146 case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
147 case PIPE_CAP_LEGACY_MATH_RULES:
148 case PIPE_CAP_TGSI_TEXCOORD:
149 return 1;
150
151 case PIPE_CAP_TEXTURE_TRANSFER_MODES:
152 return PIPE_TEXTURE_TRANSFER_BLIT;
153
154 case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
155 return R300_BUFFER_ALIGNMENT;
156
157 case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
158 return 16;
159
160 case PIPE_CAP_GLSL_FEATURE_LEVEL:
161 case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
162 return 120;
163
164 /* r300 cannot do swizzling of compressed textures. Supported otherwise. */
165 case PIPE_CAP_TEXTURE_SWIZZLE:
166 return r300screen->caps.dxtc_swizzle;
167
168 /* We don't support color clamping on r500, so that we can use color
169 * interpolators for generic varyings. */
170 case PIPE_CAP_VERTEX_COLOR_CLAMPED:
171 return !is_r500;
172
173 /* Supported on r500 only. */
174 case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
175 case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
176 case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
177 case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
178 return is_r500 ? 1 : 0;
179
180 case PIPE_CAP_SHAREABLE_SHADERS:
181 return 0;
182
183 case PIPE_CAP_MAX_GS_INVOCATIONS:
184 return 32;
185 case PIPE_CAP_MAX_SHADER_BUFFER_SIZE_UINT:
186 return 1 << 27;
187
188 /* SWTCL-only features. */
189 case PIPE_CAP_PRIMITIVE_RESTART:
190 case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
191 case PIPE_CAP_USER_VERTEX_BUFFERS:
192 case PIPE_CAP_VS_WINDOW_SPACE_POSITION:
193 return !r300screen->caps.has_tcl;
194
195 /* HWTCL-only features / limitations. */
196 case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
197 case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
198 case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
199 return r300screen->caps.has_tcl;
200
201 /* Texturing. */
202 case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
203 return is_r500 ? 4096 : 2048;
204 case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
205 case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
206 /* 13 == 4096, 12 == 2048 */
207 return is_r500 ? 13 : 12;
208
209 /* Render targets. */
210 case PIPE_CAP_MAX_RENDER_TARGETS:
211 return 4;
212 case PIPE_CAP_ENDIANNESS:
213 return PIPE_ENDIAN_LITTLE;
214
215 case PIPE_CAP_MAX_VIEWPORTS:
216 return 1;
217
218 case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
219 return 2048;
220
221 case PIPE_CAP_MAX_VARYINGS:
222 return 10;
223
224 case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:
225 return 0;
226
227 case PIPE_CAP_VENDOR_ID:
228 return 0x1002;
229 case PIPE_CAP_DEVICE_ID:
230 return r300screen->info.pci_id;
231 case PIPE_CAP_ACCELERATED:
232 return 1;
233 case PIPE_CAP_VIDEO_MEMORY:
234 return r300screen->info.vram_size_kb >> 10;
235 case PIPE_CAP_UMA:
236 return 0;
237 case PIPE_CAP_PCI_GROUP:
238 return r300screen->info.pci.domain;
239 case PIPE_CAP_PCI_BUS:
240 return r300screen->info.pci.bus;
241 case PIPE_CAP_PCI_DEVICE:
242 return r300screen->info.pci.dev;
243 case PIPE_CAP_PCI_FUNCTION:
244 return r300screen->info.pci.func;
245 default:
246 return u_pipe_screen_get_param_defaults(pscreen, param);
247 }
248 }
249
r300_get_shader_param(struct pipe_screen * pscreen,enum pipe_shader_type shader,enum pipe_shader_cap param)250 static int r300_get_shader_param(struct pipe_screen *pscreen,
251 enum pipe_shader_type shader,
252 enum pipe_shader_cap param)
253 {
254 struct r300_screen* r300screen = r300_screen(pscreen);
255 bool is_r400 = r300screen->caps.is_r400;
256 bool is_r500 = r300screen->caps.is_r500;
257
258 switch (param) {
259 case PIPE_SHADER_CAP_SUPPORTED_IRS:
260 return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);
261 default:
262 break;
263 }
264
265 switch (shader) {
266 case PIPE_SHADER_FRAGMENT:
267 switch (param)
268 {
269 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
270 return is_r500 || is_r400 ? 512 : 96;
271 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
272 return is_r500 || is_r400 ? 512 : 64;
273 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
274 return is_r500 || is_r400 ? 512 : 32;
275 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
276 return is_r500 ? 511 : 4;
277 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
278 return is_r500 ? 64 : 0; /* Actually unlimited on r500. */
279 /* Fragment shader limits. */
280 case PIPE_SHADER_CAP_MAX_INPUTS:
281 /* 2 colors + 8 texcoords are always supported
282 * (minus fog and wpos).
283 *
284 * R500 has the ability to turn 3rd and 4th color into
285 * additional texcoords but there is no two-sided color
286 * selection then. However the facing bit can be used instead. */
287 return 10;
288 case PIPE_SHADER_CAP_MAX_OUTPUTS:
289 return 4;
290 case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
291 return (is_r500 ? 256 : 32) * sizeof(float[4]);
292 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
293 case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
294 return 1;
295 case PIPE_SHADER_CAP_MAX_TEMPS:
296 return is_r500 ? 128 : is_r400 ? 64 : 32;
297 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
298 case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
299 return r300screen->caps.num_tex_units;
300 case PIPE_SHADER_CAP_CONT_SUPPORTED:
301 case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
302 case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
303 case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
304 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
305 case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
306 case PIPE_SHADER_CAP_SUBROUTINES:
307 case PIPE_SHADER_CAP_INTEGERS:
308 case PIPE_SHADER_CAP_INT64_ATOMICS:
309 case PIPE_SHADER_CAP_FP16:
310 case PIPE_SHADER_CAP_FP16_DERIVATIVES:
311 case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
312 case PIPE_SHADER_CAP_INT16:
313 case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
314 case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
315 case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
316 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
317 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
318 return 0;
319 default:
320 break;
321 }
322 break;
323 case PIPE_SHADER_VERTEX:
324 switch (param)
325 {
326 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
327 case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
328 case PIPE_SHADER_CAP_SUBROUTINES:
329 return 0;
330 default:;
331 }
332
333 if (!r300screen->caps.has_tcl) {
334 switch (param) {
335 case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
336 case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
337 return 0;
338
339 /* mesa/st requires that this cap is the same across stages, and the FS
340 * can't do ints.
341 */
342 case PIPE_SHADER_CAP_INTEGERS:
343 return 0;
344
345 /* Even if gallivm NIR can do this, we call nir_to_tgsi manually and
346 * TGSI can't.
347 */
348 case PIPE_SHADER_CAP_INT16:
349 case PIPE_SHADER_CAP_FP16:
350 case PIPE_SHADER_CAP_FP16_DERIVATIVES:
351 case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
352 return 0;
353
354 /* While draw could normally handle this for the VS, the NIR lowering
355 * to regs can't handle our non-native-integers, so we have to lower to
356 * if ladders.
357 */
358 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
359 return 0;
360 default:
361 return draw_get_shader_param(shader, param);
362 }
363 }
364
365 switch (param)
366 {
367 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
368 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
369 return is_r500 ? 1024 : 256;
370 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
371 return is_r500 ? 4 : 0; /* For loops; not sure about conditionals. */
372 case PIPE_SHADER_CAP_MAX_INPUTS:
373 return 16;
374 case PIPE_SHADER_CAP_MAX_OUTPUTS:
375 return 10;
376 case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
377 return 256 * sizeof(float[4]);
378 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
379 return 1;
380 case PIPE_SHADER_CAP_MAX_TEMPS:
381 return 32;
382 case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
383 case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
384 return 1;
385 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
386 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
387 case PIPE_SHADER_CAP_CONT_SUPPORTED:
388 case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
389 case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
390 case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
391 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
392 case PIPE_SHADER_CAP_SUBROUTINES:
393 case PIPE_SHADER_CAP_INTEGERS:
394 case PIPE_SHADER_CAP_FP16:
395 case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
396 case PIPE_SHADER_CAP_FP16_DERIVATIVES:
397 case PIPE_SHADER_CAP_INT16:
398 case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
399 case PIPE_SHADER_CAP_INT64_ATOMICS:
400 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
401 case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
402 case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
403 case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
404 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
405 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
406 return 0;
407 default:
408 break;
409 }
410 break;
411 default:
412 ; /* nothing */
413 }
414 return 0;
415 }
416
r300_get_paramf(struct pipe_screen * pscreen,enum pipe_capf param)417 static float r300_get_paramf(struct pipe_screen* pscreen,
418 enum pipe_capf param)
419 {
420 struct r300_screen* r300screen = r300_screen(pscreen);
421
422 switch (param) {
423 case PIPE_CAPF_MIN_LINE_WIDTH:
424 case PIPE_CAPF_MIN_LINE_WIDTH_AA:
425 case PIPE_CAPF_MIN_POINT_SIZE:
426 case PIPE_CAPF_MIN_POINT_SIZE_AA:
427 return 1;
428 case PIPE_CAPF_POINT_SIZE_GRANULARITY:
429 case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
430 return 0.1;
431 case PIPE_CAPF_MAX_LINE_WIDTH:
432 case PIPE_CAPF_MAX_LINE_WIDTH_AA:
433 case PIPE_CAPF_MAX_POINT_SIZE:
434 case PIPE_CAPF_MAX_POINT_SIZE_AA:
435 /* The maximum dimensions of the colorbuffer are our practical
436 * rendering limits. 2048 pixels should be enough for anybody. */
437 if (r300screen->caps.is_r500) {
438 return 4096.0f;
439 } else if (r300screen->caps.is_r400) {
440 return 4021.0f;
441 } else {
442 return 2560.0f;
443 }
444 case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
445 return 16.0f;
446 case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
447 return 16.0f;
448 case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
449 case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
450 case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
451 return 0.0f;
452 default:
453 debug_printf("r300: Warning: Unknown CAP %d in get_paramf.\n",
454 param);
455 return 0.0f;
456 }
457 }
458
r300_get_video_param(struct pipe_screen * screen,enum pipe_video_profile profile,enum pipe_video_entrypoint entrypoint,enum pipe_video_cap param)459 static int r300_get_video_param(struct pipe_screen *screen,
460 enum pipe_video_profile profile,
461 enum pipe_video_entrypoint entrypoint,
462 enum pipe_video_cap param)
463 {
464 switch (param) {
465 case PIPE_VIDEO_CAP_SUPPORTED:
466 return vl_profile_supported(screen, profile, entrypoint);
467 case PIPE_VIDEO_CAP_NPOT_TEXTURES:
468 return 0;
469 case PIPE_VIDEO_CAP_MAX_WIDTH:
470 case PIPE_VIDEO_CAP_MAX_HEIGHT:
471 return vl_video_buffer_max_size(screen);
472 case PIPE_VIDEO_CAP_PREFERED_FORMAT:
473 return PIPE_FORMAT_NV12;
474 case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
475 return false;
476 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
477 return false;
478 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
479 return true;
480 case PIPE_VIDEO_CAP_MAX_LEVEL:
481 return vl_level_supported(screen, profile);
482 default:
483 return 0;
484 }
485 }
486
487 #define COMMON_NIR_OPTIONS \
488 .fdot_replicates = true, \
489 .fuse_ffma32 = true, \
490 .fuse_ffma64 = true, \
491 .lower_bitops = true, \
492 .lower_extract_byte = true, \
493 .lower_extract_word = true, \
494 .lower_fceil = true, \
495 .lower_fdiv = true, \
496 .lower_fdph = true, \
497 .lower_ffloor = true, \
498 .lower_flrp32 = true, \
499 .lower_flrp64 = true, \
500 .lower_fmod = true, \
501 .lower_fsign = true, \
502 .lower_fsqrt = true, \
503 .lower_ftrunc = true, \
504 .lower_insert_byte = true, \
505 .lower_insert_word = true, \
506 .lower_uniforms_to_ubo = true, \
507 .lower_vector_cmp = true, \
508 .no_integers = true, \
509 .use_interpolated_input_intrinsics = true
510
511 static const nir_shader_compiler_options r500_vs_compiler_options = {
512 COMMON_NIR_OPTIONS,
513 .has_fused_comp_and_csel = true,
514
515 /* Have HW loops support and 1024 max instr count, but don't unroll *too*
516 * hard.
517 */
518 .max_unroll_iterations = 29,
519 };
520
521 static const nir_shader_compiler_options r500_fs_compiler_options = {
522 COMMON_NIR_OPTIONS,
523 .lower_fpow = true, /* POW is only in the VS */
524 .has_fused_comp_and_csel = true,
525
526 /* Have HW loops support and 512 max instr count, but don't unroll *too*
527 * hard.
528 */
529 .max_unroll_iterations = 32,
530 };
531
532 static const nir_shader_compiler_options r300_vs_compiler_options = {
533 COMMON_NIR_OPTIONS,
534 .lower_fsat = true, /* No fsat in pre-r500 VS */
535 .lower_sincos = true,
536
537 /* Note: has HW loops support, but only 256 ALU instructions. */
538 .max_unroll_iterations = 32,
539 };
540
541 static const nir_shader_compiler_options r300_fs_compiler_options = {
542 COMMON_NIR_OPTIONS,
543 .lower_fpow = true, /* POW is only in the VS */
544 .lower_sincos = true,
545 .has_fused_comp_and_csel = true,
546
547 /* No HW loops support, so set it equal to ALU instr max */
548 .max_unroll_iterations = 64,
549 };
550
551 static const void *
r300_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)552 r300_get_compiler_options(struct pipe_screen *pscreen,
553 enum pipe_shader_ir ir,
554 enum pipe_shader_type shader)
555 {
556 struct r300_screen* r300screen = r300_screen(pscreen);
557
558 assert(ir == PIPE_SHADER_IR_NIR);
559
560 if (r300screen->caps.is_r500) {
561 if (shader == PIPE_SHADER_VERTEX)
562 return &r500_vs_compiler_options;
563 else
564 return &r500_fs_compiler_options;
565 } else {
566 if (shader == PIPE_SHADER_VERTEX)
567 return &r300_vs_compiler_options;
568 else
569 return &r300_fs_compiler_options;
570 }
571 }
572
573 /**
574 * Whether the format matches:
575 * PIPE_FORMAT_?10?10?10?2_UNORM
576 */
577 static inline bool
util_format_is_rgba1010102_variant(const struct util_format_description * desc)578 util_format_is_rgba1010102_variant(const struct util_format_description *desc)
579 {
580 static const unsigned size[4] = {10, 10, 10, 2};
581 unsigned chan;
582
583 if (desc->block.width != 1 ||
584 desc->block.height != 1 ||
585 desc->block.bits != 32)
586 return false;
587
588 for (chan = 0; chan < 4; ++chan) {
589 if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED &&
590 desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID)
591 return false;
592 if (desc->channel[chan].size != size[chan])
593 return false;
594 }
595
596 return true;
597 }
598
r300_is_blending_supported(struct r300_screen * rscreen,enum pipe_format format)599 static bool r300_is_blending_supported(struct r300_screen *rscreen,
600 enum pipe_format format)
601 {
602 int c;
603 const struct util_format_description *desc =
604 util_format_description(format);
605
606 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
607 return false;
608
609 c = util_format_get_first_non_void_channel(format);
610
611 /* RGBA16F */
612 if (rscreen->caps.is_r500 &&
613 desc->nr_channels == 4 &&
614 desc->channel[c].size == 16 &&
615 desc->channel[c].type == UTIL_FORMAT_TYPE_FLOAT)
616 return true;
617
618 if (desc->channel[c].normalized &&
619 desc->channel[c].type == UTIL_FORMAT_TYPE_UNSIGNED &&
620 desc->channel[c].size >= 4 &&
621 desc->channel[c].size <= 10) {
622 /* RGB10_A2, RGBA8, RGB5_A1, RGBA4, RGB565 */
623 if (desc->nr_channels >= 3)
624 return true;
625
626 if (format == PIPE_FORMAT_R8G8_UNORM)
627 return true;
628
629 /* R8, I8, L8, A8 */
630 if (desc->nr_channels == 1)
631 return true;
632 }
633
634 return false;
635 }
636
r300_is_format_supported(struct pipe_screen * screen,enum pipe_format format,enum pipe_texture_target target,unsigned sample_count,unsigned storage_sample_count,unsigned usage)637 static bool r300_is_format_supported(struct pipe_screen* screen,
638 enum pipe_format format,
639 enum pipe_texture_target target,
640 unsigned sample_count,
641 unsigned storage_sample_count,
642 unsigned usage)
643 {
644 uint32_t retval = 0;
645 bool is_r500 = r300_screen(screen)->caps.is_r500;
646 bool is_r400 = r300_screen(screen)->caps.is_r400;
647 bool is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM ||
648 format == PIPE_FORMAT_R10G10B10X2_SNORM ||
649 format == PIPE_FORMAT_B10G10R10A2_UNORM ||
650 format == PIPE_FORMAT_B10G10R10X2_UNORM ||
651 format == PIPE_FORMAT_R10SG10SB10SA2U_NORM;
652 bool is_ati1n = format == PIPE_FORMAT_RGTC1_UNORM ||
653 format == PIPE_FORMAT_RGTC1_SNORM ||
654 format == PIPE_FORMAT_LATC1_UNORM ||
655 format == PIPE_FORMAT_LATC1_SNORM;
656 bool is_ati2n = format == PIPE_FORMAT_RGTC2_UNORM ||
657 format == PIPE_FORMAT_RGTC2_SNORM ||
658 format == PIPE_FORMAT_LATC2_UNORM ||
659 format == PIPE_FORMAT_LATC2_SNORM;
660 bool is_half_float = format == PIPE_FORMAT_R16_FLOAT ||
661 format == PIPE_FORMAT_R16G16_FLOAT ||
662 format == PIPE_FORMAT_R16G16B16_FLOAT ||
663 format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
664 format == PIPE_FORMAT_R16G16B16X16_FLOAT;
665 const struct util_format_description *desc;
666
667 if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
668 return false;
669
670 /* Check multisampling support. */
671 switch (sample_count) {
672 case 0:
673 case 1:
674 break;
675 case 2:
676 case 4:
677 case 6:
678 /* No texturing and scanout. */
679 if (usage & (PIPE_BIND_SAMPLER_VIEW |
680 PIPE_BIND_DISPLAY_TARGET |
681 PIPE_BIND_SCANOUT)) {
682 return false;
683 }
684
685 desc = util_format_description(format);
686
687 if (is_r500) {
688 /* Only allow depth/stencil, RGBA8, RGBA1010102, RGBA16F. */
689 if (!util_format_is_depth_or_stencil(format) &&
690 !util_format_is_rgba8_variant(desc) &&
691 !util_format_is_rgba1010102_variant(desc) &&
692 format != PIPE_FORMAT_R16G16B16A16_FLOAT &&
693 format != PIPE_FORMAT_R16G16B16X16_FLOAT) {
694 return false;
695 }
696 } else {
697 /* Only allow depth/stencil, RGBA8. */
698 if (!util_format_is_depth_or_stencil(format) &&
699 !util_format_is_rgba8_variant(desc)) {
700 return false;
701 }
702 }
703 break;
704 default:
705 return false;
706 }
707
708 /* Check sampler format support. */
709 if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
710 /* these two are broken for an unknown reason */
711 format != PIPE_FORMAT_R8G8B8X8_SNORM &&
712 format != PIPE_FORMAT_R16G16B16X16_SNORM &&
713 /* ATI1N is r5xx-only. */
714 (is_r500 || !is_ati1n) &&
715 /* ATI2N is supported on r4xx-r5xx. */
716 (is_r400 || is_r500 || !is_ati2n) &&
717 r300_is_sampler_format_supported(format)) {
718 retval |= PIPE_BIND_SAMPLER_VIEW;
719 }
720
721 /* Check colorbuffer format support. */
722 if ((usage & (PIPE_BIND_RENDER_TARGET |
723 PIPE_BIND_DISPLAY_TARGET |
724 PIPE_BIND_SCANOUT |
725 PIPE_BIND_SHARED |
726 PIPE_BIND_BLENDABLE)) &&
727 /* 2101010 cannot be rendered to on non-r5xx. */
728 (!is_color2101010 || is_r500) &&
729 r300_is_colorbuffer_format_supported(format)) {
730 retval |= usage &
731 (PIPE_BIND_RENDER_TARGET |
732 PIPE_BIND_DISPLAY_TARGET |
733 PIPE_BIND_SCANOUT |
734 PIPE_BIND_SHARED);
735
736 if (r300_is_blending_supported(r300_screen(screen), format)) {
737 retval |= usage & PIPE_BIND_BLENDABLE;
738 }
739 }
740
741 /* Check depth-stencil format support. */
742 if (usage & PIPE_BIND_DEPTH_STENCIL &&
743 r300_is_zs_format_supported(format)) {
744 retval |= PIPE_BIND_DEPTH_STENCIL;
745 }
746
747 /* Check vertex buffer format support. */
748 if (usage & PIPE_BIND_VERTEX_BUFFER) {
749 if (r300_screen(screen)->caps.has_tcl) {
750 /* Half float is supported on >= R400. */
751 if ((is_r400 || is_r500 || !is_half_float) &&
752 r300_translate_vertex_data_type(format) != R300_INVALID_FORMAT) {
753 retval |= PIPE_BIND_VERTEX_BUFFER;
754 }
755 } else {
756 /* SW TCL */
757 if (!util_format_is_pure_integer(format)) {
758 retval |= PIPE_BIND_VERTEX_BUFFER;
759 }
760 }
761 }
762
763 if (usage & PIPE_BIND_INDEX_BUFFER) {
764 if (format == PIPE_FORMAT_R8_UINT ||
765 format == PIPE_FORMAT_R16_UINT ||
766 format == PIPE_FORMAT_R32_UINT)
767 retval |= PIPE_BIND_INDEX_BUFFER;
768 }
769
770 return retval == usage;
771 }
772
r300_destroy_screen(struct pipe_screen * pscreen)773 static void r300_destroy_screen(struct pipe_screen* pscreen)
774 {
775 struct r300_screen* r300screen = r300_screen(pscreen);
776 struct radeon_winsys *rws = radeon_winsys(pscreen);
777
778 if (rws && !rws->unref(rws))
779 return;
780
781 mtx_destroy(&r300screen->cmask_mutex);
782 slab_destroy_parent(&r300screen->pool_transfers);
783
784 disk_cache_destroy(r300screen->disk_shader_cache);
785
786 if (rws)
787 rws->destroy(rws);
788
789 FREE(r300screen);
790 }
791
r300_fence_reference(struct pipe_screen * screen,struct pipe_fence_handle ** ptr,struct pipe_fence_handle * fence)792 static void r300_fence_reference(struct pipe_screen *screen,
793 struct pipe_fence_handle **ptr,
794 struct pipe_fence_handle *fence)
795 {
796 struct radeon_winsys *rws = r300_screen(screen)->rws;
797
798 rws->fence_reference(rws, ptr, fence);
799 }
800
r300_fence_finish(struct pipe_screen * screen,struct pipe_context * ctx,struct pipe_fence_handle * fence,uint64_t timeout)801 static bool r300_fence_finish(struct pipe_screen *screen,
802 struct pipe_context *ctx,
803 struct pipe_fence_handle *fence,
804 uint64_t timeout)
805 {
806 struct radeon_winsys *rws = r300_screen(screen)->rws;
807
808 return rws->fence_wait(rws, fence, timeout);
809 }
810
r300_screen_get_fd(struct pipe_screen * screen)811 static int r300_screen_get_fd(struct pipe_screen *screen)
812 {
813 struct radeon_winsys *rws = r300_screen(screen)->rws;
814
815 return rws->get_fd(rws);
816 }
817
r300_screen_create(struct radeon_winsys * rws,const struct pipe_screen_config * config)818 struct pipe_screen* r300_screen_create(struct radeon_winsys *rws,
819 const struct pipe_screen_config *config)
820 {
821 struct r300_screen *r300screen = CALLOC_STRUCT(r300_screen);
822
823 if (!r300screen) {
824 FREE(r300screen);
825 return NULL;
826 }
827
828 rws->query_info(rws, &r300screen->info);
829
830 r300_init_debug(r300screen);
831 r300_parse_chipset(r300screen->info.pci_id, &r300screen->caps);
832
833 if (SCREEN_DBG_ON(r300screen, DBG_NO_ZMASK))
834 r300screen->caps.zmask_ram = 0;
835 if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ))
836 r300screen->caps.hiz_ram = 0;
837 if (SCREEN_DBG_ON(r300screen, DBG_NO_TCL))
838 r300screen->caps.has_tcl = false;
839
840 r300screen->rws = rws;
841 r300screen->screen.destroy = r300_destroy_screen;
842 r300screen->screen.get_name = r300_get_name;
843 r300screen->screen.get_vendor = r300_get_vendor;
844 r300screen->screen.get_compiler_options = r300_get_compiler_options;
845 r300screen->screen.finalize_nir = r300_finalize_nir;
846 r300screen->screen.get_device_vendor = r300_get_device_vendor;
847 r300screen->screen.get_disk_shader_cache = r300_get_disk_shader_cache;
848 r300screen->screen.get_screen_fd = r300_screen_get_fd;
849 r300screen->screen.get_param = r300_get_param;
850 r300screen->screen.get_shader_param = r300_get_shader_param;
851 r300screen->screen.get_paramf = r300_get_paramf;
852 r300screen->screen.get_video_param = r300_get_video_param;
853 r300screen->screen.is_format_supported = r300_is_format_supported;
854 r300screen->screen.is_video_format_supported = vl_video_buffer_is_format_supported;
855 r300screen->screen.context_create = r300_create_context;
856 r300screen->screen.fence_reference = r300_fence_reference;
857 r300screen->screen.fence_finish = r300_fence_finish;
858
859 r300_init_screen_resource_functions(r300screen);
860
861 r300_disk_cache_create(r300screen);
862
863 slab_create_parent(&r300screen->pool_transfers, sizeof(struct pipe_transfer), 64);
864
865 (void) mtx_init(&r300screen->cmask_mutex, mtx_plain);
866
867 return &r300screen->screen;
868 }
869