1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include <errno.h>
24 #include <xf86drm.h>
25 #include "drm-uapi/nouveau_drm.h"
26 #include "util/format/u_format.h"
27 #include "util/format/u_format_s3tc.h"
28 #include "util/u_screen.h"
29 #include "pipe/p_screen.h"
30
31 #include "nv50_ir_driver.h"
32
33 #include "nv50/nv50_context.h"
34 #include "nv50/nv50_screen.h"
35
36 #include "nouveau_vp3_video.h"
37
38 #include "nv_object.xml.h"
39
40 /* affected by LOCAL_WARPS_LOG_ALLOC / LOCAL_WARPS_NO_CLAMP */
41 #define LOCAL_WARPS_ALLOC 32
42 /* affected by STACK_WARPS_LOG_ALLOC / STACK_WARPS_NO_CLAMP */
43 #define STACK_WARPS_ALLOC 32
44
45 #define THREADS_IN_WARP 32
46
47 static bool
nv50_screen_is_format_supported(struct pipe_screen * pscreen,enum pipe_format format,enum pipe_texture_target target,unsigned sample_count,unsigned storage_sample_count,unsigned bindings)48 nv50_screen_is_format_supported(struct pipe_screen *pscreen,
49 enum pipe_format format,
50 enum pipe_texture_target target,
51 unsigned sample_count,
52 unsigned storage_sample_count,
53 unsigned bindings)
54 {
55 if (sample_count > 8)
56 return false;
57 if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
58 return false;
59 if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)
60 return false;
61
62 if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
63 return false;
64
65 /* Short-circuit the rest of the logic -- this is used by the gallium frontend
66 * to determine valid MS levels in a no-attachments scenario.
67 */
68 if (format == PIPE_FORMAT_NONE && bindings & PIPE_BIND_RENDER_TARGET)
69 return true;
70
71 switch (format) {
72 case PIPE_FORMAT_Z16_UNORM:
73 if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)
74 return false;
75 break;
76 default:
77 break;
78 }
79
80 if (bindings & PIPE_BIND_LINEAR)
81 if (util_format_is_depth_or_stencil(format) ||
82 (target != PIPE_TEXTURE_1D &&
83 target != PIPE_TEXTURE_2D &&
84 target != PIPE_TEXTURE_RECT) ||
85 sample_count > 1)
86 return false;
87
88 /* shared is always supported */
89 bindings &= ~(PIPE_BIND_LINEAR |
90 PIPE_BIND_SHARED);
91
92 if (bindings & PIPE_BIND_INDEX_BUFFER) {
93 if (format != PIPE_FORMAT_R8_UINT &&
94 format != PIPE_FORMAT_R16_UINT &&
95 format != PIPE_FORMAT_R32_UINT)
96 return false;
97 bindings &= ~PIPE_BIND_INDEX_BUFFER;
98 }
99
100 return (( nv50_format_table[format].usage |
101 nv50_vertex_format[format].usage) & bindings) == bindings;
102 }
103
104 static int
nv50_screen_get_shader_param(struct pipe_screen * pscreen,enum pipe_shader_type shader,enum pipe_shader_cap param)105 nv50_screen_get_shader_param(struct pipe_screen *pscreen,
106 enum pipe_shader_type shader,
107 enum pipe_shader_cap param)
108 {
109 switch (shader) {
110 case PIPE_SHADER_VERTEX:
111 case PIPE_SHADER_GEOMETRY:
112 case PIPE_SHADER_FRAGMENT:
113 case PIPE_SHADER_COMPUTE:
114 break;
115 default:
116 return 0;
117 }
118
119 switch (param) {
120 case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
121 case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
122 case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
123 case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
124 return 16384;
125 case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
126 return 4;
127 case PIPE_SHADER_CAP_MAX_INPUTS:
128 if (shader == PIPE_SHADER_VERTEX)
129 return 32;
130 return 15;
131 case PIPE_SHADER_CAP_MAX_OUTPUTS:
132 return 16;
133 case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
134 return 65536;
135 case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
136 return NV50_MAX_PIPE_CONSTBUFS;
137 case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
138 case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
139 return 1;
140 case PIPE_SHADER_CAP_MAX_TEMPS:
141 return nv50_screen(pscreen)->max_tls_space / ONE_TEMP_SIZE;
142 case PIPE_SHADER_CAP_CONT_SUPPORTED:
143 return 1;
144 case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
145 return 1;
146 case PIPE_SHADER_CAP_INT64_ATOMICS:
147 case PIPE_SHADER_CAP_FP16:
148 case PIPE_SHADER_CAP_FP16_DERIVATIVES:
149 case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
150 case PIPE_SHADER_CAP_INT16:
151 case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
152 case PIPE_SHADER_CAP_SUBROUTINES:
153 return 0; /* please inline, or provide function declarations */
154 case PIPE_SHADER_CAP_INTEGERS:
155 return 1;
156 case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
157 /* The chip could handle more sampler views than samplers */
158 case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
159 return MIN2(16, PIPE_MAX_SAMPLERS);
160 case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
161 return shader == PIPE_SHADER_COMPUTE ? NV50_MAX_GLOBALS - 1 : 0;
162 case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
163 return shader == PIPE_SHADER_COMPUTE ? NV50_MAX_GLOBALS - 1 : 0;
164 case PIPE_SHADER_CAP_SUPPORTED_IRS:
165 return 1 << PIPE_SHADER_IR_NIR;
166 case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
167 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
168 case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
169 return 0;
170 default:
171 NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
172 return 0;
173 }
174 }
175
176 static int
nv50_screen_get_compute_param(struct pipe_screen * pscreen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * data)177 nv50_screen_get_compute_param(struct pipe_screen *pscreen,
178 enum pipe_shader_ir ir_type,
179 enum pipe_compute_cap param, void *data)
180 {
181 struct nv50_screen *screen = nv50_screen(pscreen);
182 struct nouveau_device *dev = screen->base.device;
183
184 #define RET(x) do { \
185 if (data) \
186 memcpy(data, x, sizeof(x)); \
187 return sizeof(x); \
188 } while (0)
189
190 switch (param) {
191 case PIPE_COMPUTE_CAP_GRID_DIMENSION:
192 RET((uint64_t []) { 3 });
193 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
194 RET(((uint64_t []) { 65535, 65535, 65535 }));
195 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
196 RET(((uint64_t []) { 512, 512, 64 }));
197 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
198 RET((uint64_t []) { 512 });
199 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */
200 RET((uint64_t []) { nouveau_device_get_global_mem_size(dev) });
201 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
202 RET((uint64_t []) { 16 << 10 });
203 case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
204 RET((uint64_t []) { 16 << 10 });
205 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
206 RET((uint64_t []) { 4096 });
207 case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
208 RET((uint32_t []) { 32 });
209 case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
210 RET((uint32_t []) { 0 });
211 case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
212 RET((uint64_t []) { nouveau_device_get_global_mem_size(dev) });
213 case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
214 RET((uint32_t []) { 0 });
215 case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
216 RET((uint32_t []) { screen->mp_count });
217 case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
218 RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
219 case PIPE_COMPUTE_CAP_ADDRESS_BITS:
220 RET((uint32_t []) { 32 });
221 case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
222 RET((uint64_t []) { 0 });
223 default:
224 return 0;
225 }
226
227 #undef RET
228 }
229
230 static void
nv50_init_screen_caps(struct nv50_screen * screen)231 nv50_init_screen_caps(struct nv50_screen *screen)
232 {
233 struct pipe_caps *caps = (struct pipe_caps *)&screen->base.base.caps;
234
235 u_init_pipe_screen_caps(&screen->base.base, 1);
236
237 const uint16_t class_3d = screen->base.class_3d;
238 struct nouveau_device *dev = screen->base.device;
239
240 /* Non-boolean caps */
241 caps->max_texture_2d_size = 8192;
242 caps->max_texture_3d_levels = 12;
243 caps->max_texture_cube_levels = 14;
244 caps->max_texture_array_layers = 512;
245 caps->min_texture_gather_offset =
246 caps->min_texel_offset = -8;
247 caps->max_texture_gather_offset =
248 caps->max_texel_offset = 7;
249 caps->max_texel_buffer_elements = 128 * 1024 * 1024;
250 caps->glsl_feature_level = 330;
251 caps->glsl_feature_level_compatibility = 330;
252 caps->essl_feature_level = class_3d >= NVA3_3D_CLASS ? 310 : 300;
253 caps->max_render_targets = 8;
254 caps->max_dual_source_render_targets = 1;
255 caps->max_combined_shader_output_resources = NV50_MAX_GLOBALS - 1;
256 caps->viewport_subpixel_bits =
257 caps->rasterizer_subpixel_bits = 8;
258 caps->max_stream_output_buffers = 4;
259 caps->max_stream_output_interleaved_components = 64;
260 caps->max_stream_output_separate_components = 4;
261 caps->max_geometry_output_vertices =
262 caps->max_geometry_total_output_components = 1024;
263 caps->max_vertex_streams = 1;
264 caps->max_gs_invocations = 0;
265 caps->max_shader_buffer_size = 1 << 27;
266 caps->max_vertex_attrib_stride = 2048;
267 caps->max_vertex_element_src_offset = 2047;
268 caps->constant_buffer_offset_alignment = 256;
269 caps->texture_buffer_offset_alignment = 16; /* 256 for binding as RT, but that's not possible in GL */
270 caps->shader_buffer_offset_alignment = 256; /* the access limit is aligned to 256 */
271 caps->min_map_buffer_alignment = NOUVEAU_MIN_BUFFER_MAP_ALIGN;
272 caps->max_viewports = NV50_MAX_VIEWPORTS;
273 caps->texture_border_color_quirk = PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
274 caps->endianness = PIPE_ENDIAN_LITTLE;
275 caps->max_texture_gather_components = (class_3d >= NVA3_3D_CLASS) ? 4 : 0;
276 caps->max_window_rectangles = NV50_MAX_WINDOW_RECTANGLES;
277 caps->max_texture_upload_memory_budget = 16 * 1024 * 1024;
278 caps->max_varyings = 15;
279 caps->max_vertex_buffers = 16;
280 caps->gl_begin_end_buffer_size = 512 * 1024; /* TODO: Investigate tuning this */
281 caps->max_texture_mb = 0; /* TODO: use 1/2 of VRAM for this? */
282
283 caps->supported_prim_modes_with_restart =
284 caps->supported_prim_modes = BITFIELD_MASK(MESA_PRIM_COUNT);
285
286 /* supported caps */
287 caps->texture_mirror_clamp = true;
288 caps->texture_mirror_clamp_to_edge = true;
289 caps->texture_swizzle = true;
290 caps->npot_textures = true;
291 caps->mixed_framebuffer_sizes = true;
292 caps->mixed_color_depth_bits = true;
293 caps->anisotropic_filter = true;
294 caps->texture_buffer_objects = true;
295 caps->depth_clip_disable = true;
296 caps->fragment_shader_texture_lod = true;
297 caps->fragment_shader_derivatives = true;
298 caps->fragment_color_clamped = true;
299 caps->vertex_color_unclamped = true;
300 caps->vertex_color_clamped = true;
301 caps->query_timestamp = true;
302 caps->query_time_elapsed = true;
303 caps->occlusion_query = true;
304 caps->blend_equation_separate = true;
305 caps->indep_blend_enable = true;
306 caps->fs_coord_origin_upper_left = true;
307 caps->fs_coord_pixel_center_half_integer = true;
308 caps->primitive_restart = true;
309 caps->primitive_restart_fixed_index = true;
310 caps->vs_instanceid = true;
311 caps->vertex_element_instance_divisor = true;
312 caps->conditional_render = true;
313 caps->texture_barrier = true;
314 caps->quads_follow_provoking_vertex_convention = true;
315 caps->start_instance = true;
316 caps->user_vertex_buffers = true;
317 caps->texture_multisample = true;
318 caps->fs_fine_derivative = true;
319 caps->sampler_view_target = true;
320 caps->conditional_render_inverted = true;
321 caps->clip_halfz = true;
322 caps->memobj = true;
323 caps->polygon_offset_clamp = true;
324 caps->query_pipeline_statistics = true;
325 caps->texture_float_linear = true;
326 caps->texture_half_float_linear = true;
327 caps->depth_bounds_test = true;
328 caps->texture_query_samples = true;
329 caps->copy_between_compressed_and_plain_formats = true;
330 caps->fs_face_is_integer_sysval = true;
331 caps->invalidate_buffer = true;
332 caps->string_marker = true;
333 caps->cull_distance = true;
334 caps->shader_array_components = true;
335 caps->legacy_math_rules = true;
336 caps->tgsi_tex_txf_lz = true;
337 caps->shader_clock = true;
338 caps->can_bind_const_buffer_as_vertex = true;
339 caps->tgsi_div = true;
340 caps->clear_scissored = true;
341 caps->framebuffer_no_attachment = true;
342 caps->compute = true;
343 caps->query_memory_info = true;
344
345 /* nvc0 has fixed function alpha test support, but nv50 doesn't. If we
346 * don't have it, then the frontend will lower it for us.
347 */
348 caps->alpha_test = class_3d >= NVC0_3D_CLASS;
349
350 caps->texture_transfer_modes = PIPE_TEXTURE_TRANSFER_BLIT;
351 caps->seamless_cube_map = true; /* class_3d >= NVA0_3D_CLASS; */
352 /* supported on nva0+ */
353 caps->stream_output_pause_resume = class_3d >= NVA0_3D_CLASS;
354 /* supported on nva3+ */
355 caps->cube_map_array =
356 caps->indep_blend_func =
357 caps->texture_query_lod =
358 caps->sample_shading =
359 caps->force_persample_interp = class_3d >= NVA3_3D_CLASS;
360
361 caps->pci_group = dev->info.pci.domain;
362 caps->pci_bus = dev->info.pci.bus;
363 caps->pci_device = dev->info.pci.dev;
364 caps->pci_function = dev->info.pci.func;
365
366 caps->multisample_z_resolve = false; /* potentially supported on some hw */
367 caps->integer_multiply_32x16 = false; /* could be done */
368 caps->map_unsynchronized_thread_safe = false; /* when we fix MT stuff */
369 caps->nir_images_as_deref = false;
370 caps->hardware_gl_select = false;
371
372 caps->vendor_id = 0x10de;
373 caps->device_id = dev->info.device_id;
374 caps->video_memory = dev->vram_size >> 20;
375 caps->uma = screen->base.is_uma;
376
377 caps->min_line_width =
378 caps->min_line_width_aa =
379 caps->min_point_size =
380 caps->min_point_size_aa = 1;
381 caps->point_size_granularity =
382 caps->line_width_granularity = 0.1;
383 caps->max_line_width =
384 caps->max_line_width_aa = 10.0f;
385 caps->max_point_size =
386 caps->max_point_size_aa = 64.0f;
387 caps->max_texture_anisotropy = 16.0f;
388 caps->max_texture_lod_bias = 15.0f;
389 }
390
391 static void
nv50_screen_destroy(struct pipe_screen * pscreen)392 nv50_screen_destroy(struct pipe_screen *pscreen)
393 {
394 struct nv50_screen *screen = nv50_screen(pscreen);
395
396 if (!screen->base.initialized)
397 return;
398
399 if (screen->blitter)
400 nv50_blitter_destroy(screen);
401 if (screen->pm.prog) {
402 screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
403 nv50_program_destroy(NULL, screen->pm.prog);
404 FREE(screen->pm.prog);
405 }
406
407 nouveau_bo_ref(NULL, &screen->code);
408 nouveau_bo_ref(NULL, &screen->tls_bo);
409 nouveau_bo_ref(NULL, &screen->stack_bo);
410 nouveau_bo_ref(NULL, &screen->txc);
411 nouveau_bo_ref(NULL, &screen->uniforms);
412 nouveau_bo_ref(NULL, &screen->fence.bo);
413
414 nouveau_heap_destroy(&screen->vp_code_heap);
415 nouveau_heap_destroy(&screen->gp_code_heap);
416 nouveau_heap_destroy(&screen->fp_code_heap);
417
418 FREE(screen->tic.entries);
419
420 nouveau_object_del(&screen->tesla);
421 nouveau_object_del(&screen->eng2d);
422 nouveau_object_del(&screen->m2mf);
423 nouveau_object_del(&screen->compute);
424 nouveau_object_del(&screen->sync);
425
426 nouveau_screen_fini(&screen->base);
427 simple_mtx_destroy(&screen->state_lock);
428
429 FREE(screen);
430 }
431
432 static void
nv50_screen_fence_emit(struct pipe_context * pcontext,u32 * sequence,struct nouveau_bo * wait)433 nv50_screen_fence_emit(struct pipe_context *pcontext, u32 *sequence,
434 struct nouveau_bo *wait)
435 {
436 struct nv50_context *nv50 = nv50_context(pcontext);
437 struct nv50_screen *screen = nv50->screen;
438 struct nouveau_pushbuf *push = nv50->base.pushbuf;
439 struct nouveau_pushbuf_refn ref = { wait, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR };
440
441 /* we need to do it after possible flush in MARK_RING */
442 *sequence = ++screen->base.fence.sequence;
443
444 assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);
445 PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
446 PUSH_DATAh(push, screen->fence.bo->offset);
447 PUSH_DATA (push, screen->fence.bo->offset);
448 PUSH_DATA (push, *sequence);
449 PUSH_DATA (push, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 |
450 NV50_3D_QUERY_GET_UNK4 |
451 NV50_3D_QUERY_GET_UNIT_CROP |
452 NV50_3D_QUERY_GET_TYPE_QUERY |
453 NV50_3D_QUERY_GET_QUERY_SELECT_ZERO |
454 NV50_3D_QUERY_GET_SHORT);
455
456 nouveau_pushbuf_refn(push, &ref, 1);
457 }
458
459 static u32
nv50_screen_fence_update(struct pipe_screen * pscreen)460 nv50_screen_fence_update(struct pipe_screen *pscreen)
461 {
462 return nv50_screen(pscreen)->fence.map[0];
463 }
464
465 static void
nv50_screen_init_hwctx(struct nv50_screen * screen)466 nv50_screen_init_hwctx(struct nv50_screen *screen)
467 {
468 struct nouveau_pushbuf *push = screen->base.pushbuf;
469 struct nv04_fifo *fifo;
470 unsigned i;
471
472 fifo = (struct nv04_fifo *)screen->base.channel->data;
473
474 BEGIN_NV04(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);
475 PUSH_DATA (push, screen->m2mf->handle);
476 BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_DMA_NOTIFY), 3);
477 PUSH_DATA (push, screen->sync->handle);
478 PUSH_DATA (push, fifo->vram);
479 PUSH_DATA (push, fifo->vram);
480
481 BEGIN_NV04(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1);
482 PUSH_DATA (push, screen->eng2d->handle);
483 BEGIN_NV04(push, NV50_2D(DMA_NOTIFY), 4);
484 PUSH_DATA (push, screen->sync->handle);
485 PUSH_DATA (push, fifo->vram);
486 PUSH_DATA (push, fifo->vram);
487 PUSH_DATA (push, fifo->vram);
488 BEGIN_NV04(push, NV50_2D(OPERATION), 1);
489 PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY);
490 BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1);
491 PUSH_DATA (push, 0);
492 BEGIN_NV04(push, NV50_2D(COLOR_KEY_ENABLE), 1);
493 PUSH_DATA (push, 0);
494 BEGIN_NV04(push, NV50_2D(SET_PIXELS_FROM_MEMORY_SAFE_OVERLAP), 1);
495 PUSH_DATA (push, 1);
496 BEGIN_NV04(push, NV50_2D(COND_MODE), 1);
497 PUSH_DATA (push, NV50_2D_COND_MODE_ALWAYS);
498
499 BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
500 PUSH_DATA (push, screen->tesla->handle);
501
502 BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
503 PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
504
505 BEGIN_NV04(push, NV50_3D(DMA_NOTIFY), 1);
506 PUSH_DATA (push, screen->sync->handle);
507 BEGIN_NV04(push, NV50_3D(DMA_ZETA), 11);
508 for (i = 0; i < 11; ++i)
509 PUSH_DATA(push, fifo->vram);
510 BEGIN_NV04(push, NV50_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN);
511 for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i)
512 PUSH_DATA(push, fifo->vram);
513
514 BEGIN_NV04(push, NV50_3D(REG_MODE), 1);
515 PUSH_DATA (push, NV50_3D_REG_MODE_STRIPED);
516 BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1);
517 PUSH_DATA (push, 0xf);
518
519 if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) {
520 BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1);
521 PUSH_DATA (push, 0x18);
522 }
523
524 BEGIN_NV04(push, NV50_3D(ZETA_COMP_ENABLE), 1);
525 PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
526
527 BEGIN_NV04(push, NV50_3D(RT_COMP_ENABLE(0)), 8);
528 for (i = 0; i < 8; ++i)
529 PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
530
531 BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
532 PUSH_DATA (push, 1);
533
534 BEGIN_NV04(push, NV50_3D(CSAA_ENABLE), 1);
535 PUSH_DATA (push, 0);
536 BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1);
537 PUSH_DATA (push, 0);
538 BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
539 PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1);
540 BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
541 PUSH_DATA (push, 0);
542 BEGIN_NV04(push, NV50_3D(PRIM_RESTART_WITH_DRAW_ARRAYS), 1);
543 PUSH_DATA (push, 1);
544 BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1);
545 PUSH_DATA (push, 1);
546
547 if (screen->tesla->oclass >= NVA0_3D_CLASS) {
548 BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
549 PUSH_DATA (push, 0);
550 }
551
552 BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1);
553 PUSH_DATA (push, 0);
554 BEGIN_NV04(push, NV50_3D(WINDOW_OFFSET_X), 2);
555 PUSH_DATA (push, 0);
556 PUSH_DATA (push, 0);
557 BEGIN_NV04(push, NV50_3D(ZCULL_REGION), 1);
558 PUSH_DATA (push, 0x3f);
559
560 BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2);
561 PUSH_DATAh(push, screen->code->offset + (NV50_SHADER_STAGE_VERTEX << NV50_CODE_BO_SIZE_LOG2));
562 PUSH_DATA (push, screen->code->offset + (NV50_SHADER_STAGE_VERTEX << NV50_CODE_BO_SIZE_LOG2));
563
564 BEGIN_NV04(push, NV50_3D(FP_ADDRESS_HIGH), 2);
565 PUSH_DATAh(push, screen->code->offset + (NV50_SHADER_STAGE_FRAGMENT << NV50_CODE_BO_SIZE_LOG2));
566 PUSH_DATA (push, screen->code->offset + (NV50_SHADER_STAGE_FRAGMENT << NV50_CODE_BO_SIZE_LOG2));
567
568 BEGIN_NV04(push, NV50_3D(GP_ADDRESS_HIGH), 2);
569 PUSH_DATAh(push, screen->code->offset + (NV50_SHADER_STAGE_GEOMETRY << NV50_CODE_BO_SIZE_LOG2));
570 PUSH_DATA (push, screen->code->offset + (NV50_SHADER_STAGE_GEOMETRY << NV50_CODE_BO_SIZE_LOG2));
571
572 BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
573 PUSH_DATAh(push, screen->tls_bo->offset);
574 PUSH_DATA (push, screen->tls_bo->offset);
575 PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
576
577 BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3);
578 PUSH_DATAh(push, screen->stack_bo->offset);
579 PUSH_DATA (push, screen->stack_bo->offset);
580 PUSH_DATA (push, 4);
581
582 BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
583 PUSH_DATAh(push, screen->uniforms->offset + (0 << 16));
584 PUSH_DATA (push, screen->uniforms->offset + (0 << 16));
585 PUSH_DATA (push, (NV50_CB_PVP << 16) | 0x0000);
586
587 BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
588 PUSH_DATAh(push, screen->uniforms->offset + (1 << 16));
589 PUSH_DATA (push, screen->uniforms->offset + (1 << 16));
590 PUSH_DATA (push, (NV50_CB_PGP << 16) | 0x0000);
591
592 BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
593 PUSH_DATAh(push, screen->uniforms->offset + (2 << 16));
594 PUSH_DATA (push, screen->uniforms->offset + (2 << 16));
595 PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000);
596
597 BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
598 PUSH_DATAh(push, screen->uniforms->offset + (4 << 16));
599 PUSH_DATA (push, screen->uniforms->offset + (4 << 16));
600 PUSH_DATA (push, (NV50_CB_AUX << 16) | (NV50_CB_AUX_SIZE & 0xffff));
601
602 BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3);
603 PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf01);
604 PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21);
605 PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31);
606
607 /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */
608 BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
609 PUSH_DATA (push, (NV50_CB_AUX_RUNOUT_OFFSET << (8 - 2)) | NV50_CB_AUX);
610 BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4);
611 PUSH_DATAf(push, 0.0f);
612 PUSH_DATAf(push, 0.0f);
613 PUSH_DATAf(push, 0.0f);
614 PUSH_DATAf(push, 0.0f);
615 BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
616 PUSH_DATAh(push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
617 PUSH_DATA (push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
618
619 /* set the membar offset */
620 BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
621 PUSH_DATA (push, (NV50_CB_AUX_MEMBAR_OFFSET << (8 - 2)) | NV50_CB_AUX);
622 BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 1);
623 PUSH_DATA (push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_MEMBAR_OFFSET);
624
625 nv50_upload_ms_info(push);
626
627 /* max TIC (bits 4:8) & TSC bindings, per program type */
628 for (i = 0; i < NV50_MAX_3D_SHADER_STAGES; ++i) {
629 BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1);
630 PUSH_DATA (push, 0x54);
631 }
632
633 BEGIN_NV04(push, NV50_3D(TIC_ADDRESS_HIGH), 3);
634 PUSH_DATAh(push, screen->txc->offset);
635 PUSH_DATA (push, screen->txc->offset);
636 PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
637
638 BEGIN_NV04(push, NV50_3D(TSC_ADDRESS_HIGH), 3);
639 PUSH_DATAh(push, screen->txc->offset + 65536);
640 PUSH_DATA (push, screen->txc->offset + 65536);
641 PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
642
643 BEGIN_NV04(push, NV50_3D(LINKED_TSC), 1);
644 PUSH_DATA (push, 0);
645
646 BEGIN_NV04(push, NV50_3D(CLIP_RECTS_EN), 1);
647 PUSH_DATA (push, 0);
648 BEGIN_NV04(push, NV50_3D(CLIP_RECTS_MODE), 1);
649 PUSH_DATA (push, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY);
650 BEGIN_NV04(push, NV50_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
651 for (i = 0; i < 8 * 2; ++i)
652 PUSH_DATA(push, 0);
653 BEGIN_NV04(push, NV50_3D(CLIPID_ENABLE), 1);
654 PUSH_DATA (push, 0);
655
656 BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
657 PUSH_DATA (push, 1);
658 for (i = 0; i < NV50_MAX_VIEWPORTS; i++) {
659 BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(i)), 2);
660 PUSH_DATAf(push, 0.0f);
661 PUSH_DATAf(push, 1.0f);
662 BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(i)), 2);
663 PUSH_DATA (push, 8192 << 16);
664 PUSH_DATA (push, 8192 << 16);
665 }
666
667 BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);
668 #ifdef NV50_SCISSORS_CLIPPING
669 PUSH_DATA (push, 0x0000);
670 #else
671 PUSH_DATA (push, 0x1080);
672 #endif
673
674 BEGIN_NV04(push, NV50_3D(CLEAR_FLAGS), 1);
675 PUSH_DATA (push, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT);
676
677 /* We use scissors instead of exact view volume clipping,
678 * so they're always enabled.
679 */
680 for (i = 0; i < NV50_MAX_VIEWPORTS; i++) {
681 BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(i)), 3);
682 PUSH_DATA (push, 1);
683 PUSH_DATA (push, 8192 << 16);
684 PUSH_DATA (push, 8192 << 16);
685 }
686
687 BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
688 PUSH_DATA (push, 1);
689 BEGIN_NV04(push, NV50_3D(POINT_RASTER_RULES), 1);
690 PUSH_DATA (push, NV50_3D_POINT_RASTER_RULES_OGL);
691 BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1);
692 PUSH_DATA (push, 0x11111111);
693 BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1);
694 PUSH_DATA (push, 1);
695
696 BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
697 PUSH_DATA (push, 0);
698 if (screen->base.class_3d >= NV84_3D_CLASS) {
699 BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
700 PUSH_DATA (push, 0);
701 }
702
703 BEGIN_NV04(push, NV50_3D(UNK0FDC), 1);
704 PUSH_DATA (push, 1);
705 BEGIN_NV04(push, NV50_3D(UNK19C0), 1);
706 PUSH_DATA (push, 1);
707 }
708
nv50_tls_alloc(struct nv50_screen * screen,unsigned tls_space,uint64_t * tls_size)709 static int nv50_tls_alloc(struct nv50_screen *screen, unsigned tls_space,
710 uint64_t *tls_size)
711 {
712 struct nouveau_device *dev = screen->base.device;
713 int ret;
714
715 assert(tls_space % ONE_TEMP_SIZE == 0);
716 screen->cur_tls_space = util_next_power_of_two(tls_space / ONE_TEMP_SIZE) *
717 ONE_TEMP_SIZE;
718 if (nouveau_mesa_debug)
719 debug_printf("allocating space for %u temps\n",
720 util_next_power_of_two(tls_space / ONE_TEMP_SIZE));
721 *tls_size = screen->cur_tls_space * util_next_power_of_two(screen->TPs) *
722 screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP;
723
724 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
725 *tls_size, NULL, &screen->tls_bo);
726 if (ret) {
727 NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret);
728 return ret;
729 }
730
731 return 0;
732 }
733
nv50_tls_realloc(struct nv50_screen * screen,unsigned tls_space)734 int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)
735 {
736 struct nouveau_pushbuf *push = screen->base.pushbuf;
737 int ret;
738 uint64_t tls_size;
739
740 if (tls_space < screen->cur_tls_space)
741 return 0;
742 if (tls_space > screen->max_tls_space) {
743 /* fixable by limiting number of warps (LOCAL_WARPS_LOG_ALLOC /
744 * LOCAL_WARPS_NO_CLAMP) */
745 NOUVEAU_ERR("Unsupported number of temporaries (%u > %u). Fixable if someone cares.\n",
746 (unsigned)(tls_space / ONE_TEMP_SIZE),
747 (unsigned)(screen->max_tls_space / ONE_TEMP_SIZE));
748 return -ENOMEM;
749 }
750
751 nouveau_bo_ref(NULL, &screen->tls_bo);
752 ret = nv50_tls_alloc(screen, tls_space, &tls_size);
753 if (ret)
754 return ret;
755
756 BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
757 PUSH_DATAh(push, screen->tls_bo->offset);
758 PUSH_DATA (push, screen->tls_bo->offset);
759 PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
760
761 return 1;
762 }
763
764 static const void *
nv50_screen_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)765 nv50_screen_get_compiler_options(struct pipe_screen *pscreen,
766 enum pipe_shader_ir ir,
767 enum pipe_shader_type shader)
768 {
769 if (ir == PIPE_SHADER_IR_NIR)
770 return nv50_ir_nir_shader_compiler_options(NVISA_G80_CHIPSET, shader);
771 return NULL;
772 }
773
774 struct nouveau_screen *
nv50_screen_create(struct nouveau_device * dev)775 nv50_screen_create(struct nouveau_device *dev)
776 {
777 struct nv50_screen *screen;
778 struct pipe_screen *pscreen;
779 struct nouveau_object *chan;
780 uint64_t value;
781 uint32_t tesla_class;
782 unsigned stack_size;
783 int ret;
784
785 screen = CALLOC_STRUCT(nv50_screen);
786 if (!screen)
787 return NULL;
788 pscreen = &screen->base.base;
789 pscreen->destroy = nv50_screen_destroy;
790
791 simple_mtx_init(&screen->state_lock, mtx_plain);
792 ret = nouveau_screen_init(&screen->base, dev);
793 if (ret) {
794 NOUVEAU_ERR("nouveau_screen_init failed: %d\n", ret);
795 goto fail;
796 }
797
798 /* TODO: Prevent FIFO prefetch before transfer of index buffers and
799 * admit them to VRAM.
800 */
801 screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
802 PIPE_BIND_VERTEX_BUFFER;
803 screen->base.sysmem_bindings |=
804 PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
805
806 screen->base.pushbuf->rsvd_kick = 5;
807
808 chan = screen->base.channel;
809
810 pscreen->context_create = nv50_create;
811 pscreen->is_format_supported = nv50_screen_is_format_supported;
812 pscreen->get_shader_param = nv50_screen_get_shader_param;
813 pscreen->get_compute_param = nv50_screen_get_compute_param;
814 pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
815 pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info;
816
817 /* nir stuff */
818 pscreen->get_compiler_options = nv50_screen_get_compiler_options;
819
820 nv50_screen_init_resource_functions(pscreen);
821
822 if (screen->base.device->chipset < 0x84 ||
823 debug_get_bool_option("NOUVEAU_PMPEG", false)) {
824 /* PMPEG */
825 nouveau_screen_init_vdec(&screen->base);
826 } else if (screen->base.device->chipset < 0x98 ||
827 screen->base.device->chipset == 0xa0) {
828 /* VP2 */
829 screen->base.base.get_video_param = nv84_screen_get_video_param;
830 screen->base.base.is_video_format_supported = nv84_screen_video_supported;
831 } else {
832 /* VP3/4 */
833 screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param;
834 screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
835 }
836
837 ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
838 NULL, &screen->fence.bo);
839 if (ret) {
840 NOUVEAU_ERR("Failed to allocate fence bo: %d\n", ret);
841 goto fail;
842 }
843
844 BO_MAP(&screen->base, screen->fence.bo, 0, NULL);
845 screen->fence.map = screen->fence.bo->map;
846 screen->base.fence.emit = nv50_screen_fence_emit;
847 screen->base.fence.update = nv50_screen_fence_update;
848
849 ret = nouveau_object_new(chan, 0xbeef0301, NOUVEAU_NOTIFIER_CLASS,
850 &(struct nv04_notify){ .length = 32 },
851 sizeof(struct nv04_notify), &screen->sync);
852 if (ret) {
853 NOUVEAU_ERR("Failed to allocate notifier: %d\n", ret);
854 goto fail;
855 }
856
857 ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS,
858 NULL, 0, &screen->m2mf);
859 if (ret) {
860 NOUVEAU_ERR("Failed to allocate PGRAPH context for M2MF: %d\n", ret);
861 goto fail;
862 }
863
864 ret = nouveau_object_new(chan, 0xbeef502d, NV50_2D_CLASS,
865 NULL, 0, &screen->eng2d);
866 if (ret) {
867 NOUVEAU_ERR("Failed to allocate PGRAPH context for 2D: %d\n", ret);
868 goto fail;
869 }
870
871 switch (dev->chipset & 0xf0) {
872 case 0x50:
873 tesla_class = NV50_3D_CLASS;
874 break;
875 case 0x80:
876 case 0x90:
877 tesla_class = NV84_3D_CLASS;
878 break;
879 case 0xa0:
880 switch (dev->chipset) {
881 case 0xa0:
882 case 0xaa:
883 case 0xac:
884 tesla_class = NVA0_3D_CLASS;
885 break;
886 case 0xaf:
887 tesla_class = NVAF_3D_CLASS;
888 break;
889 default:
890 tesla_class = NVA3_3D_CLASS;
891 break;
892 }
893 break;
894 default:
895 NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", dev->chipset);
896 goto fail;
897 }
898 screen->base.class_3d = tesla_class;
899
900 nv50_init_screen_caps(screen);
901
902 ret = nouveau_object_new(chan, 0xbeef5097, tesla_class,
903 NULL, 0, &screen->tesla);
904 if (ret) {
905 NOUVEAU_ERR("Failed to allocate PGRAPH context for 3D: %d\n", ret);
906 goto fail;
907 }
908
909 /* This over-allocates by a page. The GP, which would execute at the end of
910 * the last page, would trigger faults. The going theory is that it
911 * prefetches up to a certain amount.
912 */
913 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
914 (3 << NV50_CODE_BO_SIZE_LOG2) + 0x1000,
915 NULL, &screen->code);
916 if (ret) {
917 NOUVEAU_ERR("Failed to allocate code bo: %d\n", ret);
918 goto fail;
919 }
920
921 nouveau_heap_init(&screen->vp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
922 nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
923 nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
924
925 nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
926
927 screen->TPs = util_bitcount(value & 0xffff);
928 screen->MPsInTP = util_bitcount(value & 0x0f000000);
929
930 screen->mp_count = screen->TPs * screen->MPsInTP;
931
932 stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
933 STACK_WARPS_ALLOC * 64 * 8;
934
935 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, NULL,
936 &screen->stack_bo);
937 if (ret) {
938 NOUVEAU_ERR("Failed to allocate stack bo: %d\n", ret);
939 goto fail;
940 }
941
942 uint64_t size_of_one_temp = util_next_power_of_two(screen->TPs) *
943 screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP *
944 ONE_TEMP_SIZE;
945 screen->max_tls_space = dev->vram_size / size_of_one_temp * ONE_TEMP_SIZE;
946 screen->max_tls_space /= 2; /* half of vram */
947
948 /* hw can address max 64 KiB */
949 screen->max_tls_space = MIN2(screen->max_tls_space, 64 << 10);
950
951 uint64_t tls_size;
952 unsigned tls_space = 4/*temps*/ * ONE_TEMP_SIZE;
953 ret = nv50_tls_alloc(screen, tls_space, &tls_size);
954 if (ret)
955 goto fail;
956
957 if (nouveau_mesa_debug)
958 debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n",
959 screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10);
960
961 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 5 << 16, NULL,
962 &screen->uniforms);
963 if (ret) {
964 NOUVEAU_ERR("Failed to allocate uniforms bo: %d\n", ret);
965 goto fail;
966 }
967
968 ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 3 << 16, NULL,
969 &screen->txc);
970 if (ret) {
971 NOUVEAU_ERR("Failed to allocate TIC/TSC bo: %d\n", ret);
972 goto fail;
973 }
974
975 screen->tic.entries = CALLOC(4096, sizeof(void *));
976 screen->tsc.entries = screen->tic.entries + 2048;
977
978 if (!nv50_blitter_create(screen))
979 goto fail;
980
981 nv50_screen_init_hwctx(screen);
982
983 ret = nv50_screen_compute_setup(screen, screen->base.pushbuf);
984 if (ret) {
985 NOUVEAU_ERR("Failed to init compute context: %d\n", ret);
986 goto fail;
987 }
988
989 // submit all initial state
990 PUSH_KICK(screen->base.pushbuf);
991
992 return &screen->base;
993
994 fail:
995 screen->base.base.context_create = NULL;
996 return &screen->base;
997 }
998
999 int
nv50_screen_tic_alloc(struct nv50_screen * screen,void * entry)1000 nv50_screen_tic_alloc(struct nv50_screen *screen, void *entry)
1001 {
1002 int i = screen->tic.next;
1003
1004 while (screen->tic.lock[i / 32] & (1 << (i % 32)))
1005 i = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
1006
1007 screen->tic.next = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
1008
1009 if (screen->tic.entries[i])
1010 nv50_tic_entry(screen->tic.entries[i])->id = -1;
1011
1012 screen->tic.entries[i] = entry;
1013 return i;
1014 }
1015
1016 int
nv50_screen_tsc_alloc(struct nv50_screen * screen,void * entry)1017 nv50_screen_tsc_alloc(struct nv50_screen *screen, void *entry)
1018 {
1019 int i = screen->tsc.next;
1020
1021 while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
1022 i = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
1023
1024 screen->tsc.next = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
1025
1026 if (screen->tsc.entries[i])
1027 nv50_tsc_entry(screen->tsc.entries[i])->id = -1;
1028
1029 screen->tsc.entries[i] = entry;
1030 return i;
1031 }
1032