• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2010 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <errno.h>
24 #include <xf86drm.h>
25 #include "drm-uapi/nouveau_drm.h"
26 #include "util/format/u_format.h"
27 #include "util/format/u_format_s3tc.h"
28 #include "util/u_screen.h"
29 #include "pipe/p_screen.h"
30 
31 #include "nv50_ir_driver.h"
32 
33 #include "nv50/nv50_context.h"
34 #include "nv50/nv50_screen.h"
35 
36 #include "nouveau_vp3_video.h"
37 
38 #include "nv_object.xml.h"
39 
40 /* affected by LOCAL_WARPS_LOG_ALLOC / LOCAL_WARPS_NO_CLAMP */
41 #define LOCAL_WARPS_ALLOC 32
42 /* affected by STACK_WARPS_LOG_ALLOC / STACK_WARPS_NO_CLAMP */
43 #define STACK_WARPS_ALLOC 32
44 
45 #define THREADS_IN_WARP 32
46 
47 static bool
nv50_screen_is_format_supported(struct pipe_screen * pscreen,enum pipe_format format,enum pipe_texture_target target,unsigned sample_count,unsigned storage_sample_count,unsigned bindings)48 nv50_screen_is_format_supported(struct pipe_screen *pscreen,
49                                 enum pipe_format format,
50                                 enum pipe_texture_target target,
51                                 unsigned sample_count,
52                                 unsigned storage_sample_count,
53                                 unsigned bindings)
54 {
55    if (sample_count > 8)
56       return false;
57    if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
58       return false;
59    if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)
60       return false;
61 
62    if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))
63       return false;
64 
65    /* Short-circuit the rest of the logic -- this is used by the gallium frontend
66     * to determine valid MS levels in a no-attachments scenario.
67     */
68    if (format == PIPE_FORMAT_NONE && bindings & PIPE_BIND_RENDER_TARGET)
69       return true;
70 
71    switch (format) {
72    case PIPE_FORMAT_Z16_UNORM:
73       if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)
74          return false;
75       break;
76    default:
77       break;
78    }
79 
80    if (bindings & PIPE_BIND_LINEAR)
81       if (util_format_is_depth_or_stencil(format) ||
82           (target != PIPE_TEXTURE_1D &&
83            target != PIPE_TEXTURE_2D &&
84            target != PIPE_TEXTURE_RECT) ||
85           sample_count > 1)
86          return false;
87 
88    /* shared is always supported */
89    bindings &= ~(PIPE_BIND_LINEAR |
90                  PIPE_BIND_SHARED);
91 
92    if (bindings & PIPE_BIND_INDEX_BUFFER) {
93       if (format != PIPE_FORMAT_R8_UINT &&
94           format != PIPE_FORMAT_R16_UINT &&
95           format != PIPE_FORMAT_R32_UINT)
96          return false;
97       bindings &= ~PIPE_BIND_INDEX_BUFFER;
98    }
99 
100    return (( nv50_format_table[format].usage |
101             nv50_vertex_format[format].usage) & bindings) == bindings;
102 }
103 
104 static int
nv50_screen_get_shader_param(struct pipe_screen * pscreen,enum pipe_shader_type shader,enum pipe_shader_cap param)105 nv50_screen_get_shader_param(struct pipe_screen *pscreen,
106                              enum pipe_shader_type shader,
107                              enum pipe_shader_cap param)
108 {
109    switch (shader) {
110    case PIPE_SHADER_VERTEX:
111    case PIPE_SHADER_GEOMETRY:
112    case PIPE_SHADER_FRAGMENT:
113    case PIPE_SHADER_COMPUTE:
114       break;
115    default:
116       return 0;
117    }
118 
119    switch (param) {
120    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
121    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
122    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
123    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
124       return 16384;
125    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
126       return 4;
127    case PIPE_SHADER_CAP_MAX_INPUTS:
128       if (shader == PIPE_SHADER_VERTEX)
129          return 32;
130       return 15;
131    case PIPE_SHADER_CAP_MAX_OUTPUTS:
132       return 16;
133    case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
134       return 65536;
135    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
136       return NV50_MAX_PIPE_CONSTBUFS;
137    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
138    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
139       return 1;
140    case PIPE_SHADER_CAP_MAX_TEMPS:
141       return nv50_screen(pscreen)->max_tls_space / ONE_TEMP_SIZE;
142    case PIPE_SHADER_CAP_CONT_SUPPORTED:
143       return 1;
144    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
145       return 1;
146    case PIPE_SHADER_CAP_INT64_ATOMICS:
147    case PIPE_SHADER_CAP_FP16:
148    case PIPE_SHADER_CAP_FP16_DERIVATIVES:
149    case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
150    case PIPE_SHADER_CAP_INT16:
151    case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
152    case PIPE_SHADER_CAP_SUBROUTINES:
153       return 0; /* please inline, or provide function declarations */
154    case PIPE_SHADER_CAP_INTEGERS:
155       return 1;
156    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
157       /* The chip could handle more sampler views than samplers */
158    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
159       return MIN2(16, PIPE_MAX_SAMPLERS);
160    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
161       return shader == PIPE_SHADER_COMPUTE ? NV50_MAX_GLOBALS - 1 : 0;
162    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
163       return shader == PIPE_SHADER_COMPUTE ? NV50_MAX_GLOBALS - 1 : 0;
164    case PIPE_SHADER_CAP_SUPPORTED_IRS:
165       return 1 << PIPE_SHADER_IR_NIR;
166    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
167    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
168    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
169       return 0;
170    default:
171       NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
172       return 0;
173    }
174 }
175 
176 static int
nv50_screen_get_compute_param(struct pipe_screen * pscreen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * data)177 nv50_screen_get_compute_param(struct pipe_screen *pscreen,
178                               enum pipe_shader_ir ir_type,
179                               enum pipe_compute_cap param, void *data)
180 {
181    struct nv50_screen *screen = nv50_screen(pscreen);
182    struct nouveau_device *dev = screen->base.device;
183 
184 #define RET(x) do {                  \
185    if (data)                         \
186       memcpy(data, x, sizeof(x));    \
187    return sizeof(x);                 \
188 } while (0)
189 
190    switch (param) {
191    case PIPE_COMPUTE_CAP_GRID_DIMENSION:
192       RET((uint64_t []) { 3 });
193    case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
194       RET(((uint64_t []) { 65535, 65535, 65535 }));
195    case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
196       RET(((uint64_t []) { 512, 512, 64 }));
197    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
198       RET((uint64_t []) { 512 });
199    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */
200       RET((uint64_t []) { nouveau_device_get_global_mem_size(dev) });
201    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
202       RET((uint64_t []) { 16 << 10 });
203    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
204       RET((uint64_t []) { 16 << 10 });
205    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
206       RET((uint64_t []) { 4096 });
207    case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
208       RET((uint32_t []) { 32 });
209    case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
210       RET((uint32_t []) { 0 });
211    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
212       RET((uint64_t []) { nouveau_device_get_global_mem_size(dev) });
213    case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
214       RET((uint32_t []) { 0 });
215    case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
216       RET((uint32_t []) { screen->mp_count });
217    case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
218       RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */
219    case PIPE_COMPUTE_CAP_ADDRESS_BITS:
220       RET((uint32_t []) { 32 });
221    case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
222       RET((uint64_t []) { 0 });
223    default:
224       return 0;
225    }
226 
227 #undef RET
228 }
229 
230 static void
nv50_init_screen_caps(struct nv50_screen * screen)231 nv50_init_screen_caps(struct nv50_screen *screen)
232 {
233    struct pipe_caps *caps = (struct pipe_caps *)&screen->base.base.caps;
234 
235    u_init_pipe_screen_caps(&screen->base.base, 1);
236 
237    const uint16_t class_3d = screen->base.class_3d;
238    struct nouveau_device *dev = screen->base.device;
239 
240    /* Non-boolean caps */
241    caps->max_texture_2d_size = 8192;
242    caps->max_texture_3d_levels = 12;
243    caps->max_texture_cube_levels = 14;
244    caps->max_texture_array_layers = 512;
245    caps->min_texture_gather_offset =
246    caps->min_texel_offset = -8;
247    caps->max_texture_gather_offset =
248    caps->max_texel_offset = 7;
249    caps->max_texel_buffer_elements = 128 * 1024 * 1024;
250    caps->glsl_feature_level = 330;
251    caps->glsl_feature_level_compatibility = 330;
252    caps->essl_feature_level = class_3d >= NVA3_3D_CLASS ? 310 : 300;
253    caps->max_render_targets = 8;
254    caps->max_dual_source_render_targets = 1;
255    caps->max_combined_shader_output_resources = NV50_MAX_GLOBALS - 1;
256    caps->viewport_subpixel_bits =
257    caps->rasterizer_subpixel_bits = 8;
258    caps->max_stream_output_buffers = 4;
259    caps->max_stream_output_interleaved_components = 64;
260    caps->max_stream_output_separate_components = 4;
261    caps->max_geometry_output_vertices =
262    caps->max_geometry_total_output_components = 1024;
263    caps->max_vertex_streams = 1;
264    caps->max_gs_invocations = 0;
265    caps->max_shader_buffer_size = 1 << 27;
266    caps->max_vertex_attrib_stride = 2048;
267    caps->max_vertex_element_src_offset = 2047;
268    caps->constant_buffer_offset_alignment = 256;
269    caps->texture_buffer_offset_alignment = 16; /* 256 for binding as RT, but that's not possible in GL */
270    caps->shader_buffer_offset_alignment = 256; /* the access limit is aligned to 256 */
271    caps->min_map_buffer_alignment = NOUVEAU_MIN_BUFFER_MAP_ALIGN;
272    caps->max_viewports = NV50_MAX_VIEWPORTS;
273    caps->texture_border_color_quirk = PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
274    caps->endianness = PIPE_ENDIAN_LITTLE;
275    caps->max_texture_gather_components = (class_3d >= NVA3_3D_CLASS) ? 4 : 0;
276    caps->max_window_rectangles = NV50_MAX_WINDOW_RECTANGLES;
277    caps->max_texture_upload_memory_budget = 16 * 1024 * 1024;
278    caps->max_varyings = 15;
279    caps->max_vertex_buffers = 16;
280    caps->gl_begin_end_buffer_size = 512 * 1024; /* TODO: Investigate tuning this */
281    caps->max_texture_mb = 0; /* TODO: use 1/2 of VRAM for this? */
282 
283    caps->supported_prim_modes_with_restart =
284    caps->supported_prim_modes = BITFIELD_MASK(MESA_PRIM_COUNT);
285 
286    /* supported caps */
287    caps->texture_mirror_clamp = true;
288    caps->texture_mirror_clamp_to_edge = true;
289    caps->texture_swizzle = true;
290    caps->npot_textures = true;
291    caps->mixed_framebuffer_sizes = true;
292    caps->mixed_color_depth_bits = true;
293    caps->anisotropic_filter = true;
294    caps->texture_buffer_objects = true;
295    caps->depth_clip_disable = true;
296    caps->fragment_shader_texture_lod = true;
297    caps->fragment_shader_derivatives = true;
298    caps->fragment_color_clamped = true;
299    caps->vertex_color_unclamped = true;
300    caps->vertex_color_clamped = true;
301    caps->query_timestamp = true;
302    caps->query_time_elapsed = true;
303    caps->occlusion_query = true;
304    caps->blend_equation_separate = true;
305    caps->indep_blend_enable = true;
306    caps->fs_coord_origin_upper_left = true;
307    caps->fs_coord_pixel_center_half_integer = true;
308    caps->primitive_restart = true;
309    caps->primitive_restart_fixed_index = true;
310    caps->vs_instanceid = true;
311    caps->vertex_element_instance_divisor = true;
312    caps->conditional_render = true;
313    caps->texture_barrier = true;
314    caps->quads_follow_provoking_vertex_convention = true;
315    caps->start_instance = true;
316    caps->user_vertex_buffers = true;
317    caps->texture_multisample = true;
318    caps->fs_fine_derivative = true;
319    caps->sampler_view_target = true;
320    caps->conditional_render_inverted = true;
321    caps->clip_halfz = true;
322    caps->memobj = true;
323    caps->polygon_offset_clamp = true;
324    caps->query_pipeline_statistics = true;
325    caps->texture_float_linear = true;
326    caps->texture_half_float_linear = true;
327    caps->depth_bounds_test = true;
328    caps->texture_query_samples = true;
329    caps->copy_between_compressed_and_plain_formats = true;
330    caps->fs_face_is_integer_sysval = true;
331    caps->invalidate_buffer = true;
332    caps->string_marker = true;
333    caps->cull_distance = true;
334    caps->shader_array_components = true;
335    caps->legacy_math_rules = true;
336    caps->tgsi_tex_txf_lz = true;
337    caps->shader_clock = true;
338    caps->can_bind_const_buffer_as_vertex = true;
339    caps->tgsi_div = true;
340    caps->clear_scissored = true;
341    caps->framebuffer_no_attachment = true;
342    caps->compute = true;
343    caps->query_memory_info = true;
344 
345    /* nvc0 has fixed function alpha test support, but nv50 doesn't.  If we
346     * don't have it, then the frontend will lower it for us.
347     */
348    caps->alpha_test = class_3d >= NVC0_3D_CLASS;
349 
350    caps->texture_transfer_modes = PIPE_TEXTURE_TRANSFER_BLIT;
351    caps->seamless_cube_map = true; /* class_3d >= NVA0_3D_CLASS; */
352    /* supported on nva0+ */
353    caps->stream_output_pause_resume = class_3d >= NVA0_3D_CLASS;
354    /* supported on nva3+ */
355    caps->cube_map_array =
356    caps->indep_blend_func =
357    caps->texture_query_lod =
358    caps->sample_shading =
359    caps->force_persample_interp = class_3d >= NVA3_3D_CLASS;
360 
361    caps->pci_group = dev->info.pci.domain;
362    caps->pci_bus = dev->info.pci.bus;
363    caps->pci_device = dev->info.pci.dev;
364    caps->pci_function = dev->info.pci.func;
365 
366    caps->multisample_z_resolve = false; /* potentially supported on some hw */
367    caps->integer_multiply_32x16 = false; /* could be done */
368    caps->map_unsynchronized_thread_safe = false; /* when we fix MT stuff */
369    caps->nir_images_as_deref = false;
370    caps->hardware_gl_select = false;
371 
372    caps->vendor_id = 0x10de;
373    caps->device_id = dev->info.device_id;
374    caps->video_memory = dev->vram_size >> 20;
375    caps->uma = screen->base.is_uma;
376 
377    caps->min_line_width =
378    caps->min_line_width_aa =
379    caps->min_point_size =
380    caps->min_point_size_aa = 1;
381    caps->point_size_granularity =
382    caps->line_width_granularity = 0.1;
383    caps->max_line_width =
384    caps->max_line_width_aa = 10.0f;
385    caps->max_point_size =
386    caps->max_point_size_aa = 64.0f;
387    caps->max_texture_anisotropy = 16.0f;
388    caps->max_texture_lod_bias = 15.0f;
389 }
390 
391 static void
nv50_screen_destroy(struct pipe_screen * pscreen)392 nv50_screen_destroy(struct pipe_screen *pscreen)
393 {
394    struct nv50_screen *screen = nv50_screen(pscreen);
395 
396    if (!screen->base.initialized)
397       return;
398 
399    if (screen->blitter)
400       nv50_blitter_destroy(screen);
401    if (screen->pm.prog) {
402       screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
403       nv50_program_destroy(NULL, screen->pm.prog);
404       FREE(screen->pm.prog);
405    }
406 
407    nouveau_bo_ref(NULL, &screen->code);
408    nouveau_bo_ref(NULL, &screen->tls_bo);
409    nouveau_bo_ref(NULL, &screen->stack_bo);
410    nouveau_bo_ref(NULL, &screen->txc);
411    nouveau_bo_ref(NULL, &screen->uniforms);
412    nouveau_bo_ref(NULL, &screen->fence.bo);
413 
414    nouveau_heap_destroy(&screen->vp_code_heap);
415    nouveau_heap_destroy(&screen->gp_code_heap);
416    nouveau_heap_destroy(&screen->fp_code_heap);
417 
418    FREE(screen->tic.entries);
419 
420    nouveau_object_del(&screen->tesla);
421    nouveau_object_del(&screen->eng2d);
422    nouveau_object_del(&screen->m2mf);
423    nouveau_object_del(&screen->compute);
424    nouveau_object_del(&screen->sync);
425 
426    nouveau_screen_fini(&screen->base);
427    simple_mtx_destroy(&screen->state_lock);
428 
429    FREE(screen);
430 }
431 
432 static void
nv50_screen_fence_emit(struct pipe_context * pcontext,u32 * sequence,struct nouveau_bo * wait)433 nv50_screen_fence_emit(struct pipe_context *pcontext, u32 *sequence,
434                        struct nouveau_bo *wait)
435 {
436    struct nv50_context *nv50 = nv50_context(pcontext);
437    struct nv50_screen *screen = nv50->screen;
438    struct nouveau_pushbuf *push = nv50->base.pushbuf;
439    struct nouveau_pushbuf_refn ref = { wait, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR };
440 
441    /* we need to do it after possible flush in MARK_RING */
442    *sequence = ++screen->base.fence.sequence;
443 
444    assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);
445    PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
446    PUSH_DATAh(push, screen->fence.bo->offset);
447    PUSH_DATA (push, screen->fence.bo->offset);
448    PUSH_DATA (push, *sequence);
449    PUSH_DATA (push, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 |
450                     NV50_3D_QUERY_GET_UNK4 |
451                     NV50_3D_QUERY_GET_UNIT_CROP |
452                     NV50_3D_QUERY_GET_TYPE_QUERY |
453                     NV50_3D_QUERY_GET_QUERY_SELECT_ZERO |
454                     NV50_3D_QUERY_GET_SHORT);
455 
456    nouveau_pushbuf_refn(push, &ref, 1);
457 }
458 
459 static u32
nv50_screen_fence_update(struct pipe_screen * pscreen)460 nv50_screen_fence_update(struct pipe_screen *pscreen)
461 {
462    return nv50_screen(pscreen)->fence.map[0];
463 }
464 
465 static void
nv50_screen_init_hwctx(struct nv50_screen * screen)466 nv50_screen_init_hwctx(struct nv50_screen *screen)
467 {
468    struct nouveau_pushbuf *push = screen->base.pushbuf;
469    struct nv04_fifo *fifo;
470    unsigned i;
471 
472    fifo = (struct nv04_fifo *)screen->base.channel->data;
473 
474    BEGIN_NV04(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);
475    PUSH_DATA (push, screen->m2mf->handle);
476    BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_DMA_NOTIFY), 3);
477    PUSH_DATA (push, screen->sync->handle);
478    PUSH_DATA (push, fifo->vram);
479    PUSH_DATA (push, fifo->vram);
480 
481    BEGIN_NV04(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1);
482    PUSH_DATA (push, screen->eng2d->handle);
483    BEGIN_NV04(push, NV50_2D(DMA_NOTIFY), 4);
484    PUSH_DATA (push, screen->sync->handle);
485    PUSH_DATA (push, fifo->vram);
486    PUSH_DATA (push, fifo->vram);
487    PUSH_DATA (push, fifo->vram);
488    BEGIN_NV04(push, NV50_2D(OPERATION), 1);
489    PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY);
490    BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1);
491    PUSH_DATA (push, 0);
492    BEGIN_NV04(push, NV50_2D(COLOR_KEY_ENABLE), 1);
493    PUSH_DATA (push, 0);
494    BEGIN_NV04(push, NV50_2D(SET_PIXELS_FROM_MEMORY_SAFE_OVERLAP), 1);
495    PUSH_DATA (push, 1);
496    BEGIN_NV04(push, NV50_2D(COND_MODE), 1);
497    PUSH_DATA (push, NV50_2D_COND_MODE_ALWAYS);
498 
499    BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
500    PUSH_DATA (push, screen->tesla->handle);
501 
502    BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
503    PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
504 
505    BEGIN_NV04(push, NV50_3D(DMA_NOTIFY), 1);
506    PUSH_DATA (push, screen->sync->handle);
507    BEGIN_NV04(push, NV50_3D(DMA_ZETA), 11);
508    for (i = 0; i < 11; ++i)
509       PUSH_DATA(push, fifo->vram);
510    BEGIN_NV04(push, NV50_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN);
511    for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i)
512       PUSH_DATA(push, fifo->vram);
513 
514    BEGIN_NV04(push, NV50_3D(REG_MODE), 1);
515    PUSH_DATA (push, NV50_3D_REG_MODE_STRIPED);
516    BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1);
517    PUSH_DATA (push, 0xf);
518 
519    if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) {
520       BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1);
521       PUSH_DATA (push, 0x18);
522    }
523 
524    BEGIN_NV04(push, NV50_3D(ZETA_COMP_ENABLE), 1);
525    PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
526 
527    BEGIN_NV04(push, NV50_3D(RT_COMP_ENABLE(0)), 8);
528    for (i = 0; i < 8; ++i)
529       PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
530 
531    BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
532    PUSH_DATA (push, 1);
533 
534    BEGIN_NV04(push, NV50_3D(CSAA_ENABLE), 1);
535    PUSH_DATA (push, 0);
536    BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1);
537    PUSH_DATA (push, 0);
538    BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
539    PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1);
540    BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
541    PUSH_DATA (push, 0);
542    BEGIN_NV04(push, NV50_3D(PRIM_RESTART_WITH_DRAW_ARRAYS), 1);
543    PUSH_DATA (push, 1);
544    BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1);
545    PUSH_DATA (push, 1);
546 
547    if (screen->tesla->oclass >= NVA0_3D_CLASS) {
548       BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);
549       PUSH_DATA (push, 0);
550    }
551 
552    BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1);
553    PUSH_DATA (push, 0);
554    BEGIN_NV04(push, NV50_3D(WINDOW_OFFSET_X), 2);
555    PUSH_DATA (push, 0);
556    PUSH_DATA (push, 0);
557    BEGIN_NV04(push, NV50_3D(ZCULL_REGION), 1);
558    PUSH_DATA (push, 0x3f);
559 
560    BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2);
561    PUSH_DATAh(push, screen->code->offset + (NV50_SHADER_STAGE_VERTEX << NV50_CODE_BO_SIZE_LOG2));
562    PUSH_DATA (push, screen->code->offset + (NV50_SHADER_STAGE_VERTEX << NV50_CODE_BO_SIZE_LOG2));
563 
564    BEGIN_NV04(push, NV50_3D(FP_ADDRESS_HIGH), 2);
565    PUSH_DATAh(push, screen->code->offset + (NV50_SHADER_STAGE_FRAGMENT << NV50_CODE_BO_SIZE_LOG2));
566    PUSH_DATA (push, screen->code->offset + (NV50_SHADER_STAGE_FRAGMENT << NV50_CODE_BO_SIZE_LOG2));
567 
568    BEGIN_NV04(push, NV50_3D(GP_ADDRESS_HIGH), 2);
569    PUSH_DATAh(push, screen->code->offset + (NV50_SHADER_STAGE_GEOMETRY << NV50_CODE_BO_SIZE_LOG2));
570    PUSH_DATA (push, screen->code->offset + (NV50_SHADER_STAGE_GEOMETRY << NV50_CODE_BO_SIZE_LOG2));
571 
572    BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
573    PUSH_DATAh(push, screen->tls_bo->offset);
574    PUSH_DATA (push, screen->tls_bo->offset);
575    PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
576 
577    BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3);
578    PUSH_DATAh(push, screen->stack_bo->offset);
579    PUSH_DATA (push, screen->stack_bo->offset);
580    PUSH_DATA (push, 4);
581 
582    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
583    PUSH_DATAh(push, screen->uniforms->offset + (0 << 16));
584    PUSH_DATA (push, screen->uniforms->offset + (0 << 16));
585    PUSH_DATA (push, (NV50_CB_PVP << 16) | 0x0000);
586 
587    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
588    PUSH_DATAh(push, screen->uniforms->offset + (1 << 16));
589    PUSH_DATA (push, screen->uniforms->offset + (1 << 16));
590    PUSH_DATA (push, (NV50_CB_PGP << 16) | 0x0000);
591 
592    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
593    PUSH_DATAh(push, screen->uniforms->offset + (2 << 16));
594    PUSH_DATA (push, screen->uniforms->offset + (2 << 16));
595    PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000);
596 
597    BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
598    PUSH_DATAh(push, screen->uniforms->offset + (4 << 16));
599    PUSH_DATA (push, screen->uniforms->offset + (4 << 16));
600    PUSH_DATA (push, (NV50_CB_AUX << 16) | (NV50_CB_AUX_SIZE & 0xffff));
601 
602    BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3);
603    PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf01);
604    PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21);
605    PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31);
606 
607    /* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */
608    BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
609    PUSH_DATA (push, (NV50_CB_AUX_RUNOUT_OFFSET << (8 - 2)) | NV50_CB_AUX);
610    BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4);
611    PUSH_DATAf(push, 0.0f);
612    PUSH_DATAf(push, 0.0f);
613    PUSH_DATAf(push, 0.0f);
614    PUSH_DATAf(push, 0.0f);
615    BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
616    PUSH_DATAh(push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
617    PUSH_DATA (push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);
618 
619    /* set the membar offset */
620    BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
621    PUSH_DATA (push, (NV50_CB_AUX_MEMBAR_OFFSET << (8 - 2)) | NV50_CB_AUX);
622    BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 1);
623    PUSH_DATA (push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_MEMBAR_OFFSET);
624 
625    nv50_upload_ms_info(push);
626 
627    /* max TIC (bits 4:8) & TSC bindings, per program type */
628    for (i = 0; i < NV50_MAX_3D_SHADER_STAGES; ++i) {
629       BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1);
630       PUSH_DATA (push, 0x54);
631    }
632 
633    BEGIN_NV04(push, NV50_3D(TIC_ADDRESS_HIGH), 3);
634    PUSH_DATAh(push, screen->txc->offset);
635    PUSH_DATA (push, screen->txc->offset);
636    PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);
637 
638    BEGIN_NV04(push, NV50_3D(TSC_ADDRESS_HIGH), 3);
639    PUSH_DATAh(push, screen->txc->offset + 65536);
640    PUSH_DATA (push, screen->txc->offset + 65536);
641    PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);
642 
643    BEGIN_NV04(push, NV50_3D(LINKED_TSC), 1);
644    PUSH_DATA (push, 0);
645 
646    BEGIN_NV04(push, NV50_3D(CLIP_RECTS_EN), 1);
647    PUSH_DATA (push, 0);
648    BEGIN_NV04(push, NV50_3D(CLIP_RECTS_MODE), 1);
649    PUSH_DATA (push, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY);
650    BEGIN_NV04(push, NV50_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
651    for (i = 0; i < 8 * 2; ++i)
652       PUSH_DATA(push, 0);
653    BEGIN_NV04(push, NV50_3D(CLIPID_ENABLE), 1);
654    PUSH_DATA (push, 0);
655 
656    BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
657    PUSH_DATA (push, 1);
658    for (i = 0; i < NV50_MAX_VIEWPORTS; i++) {
659       BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(i)), 2);
660       PUSH_DATAf(push, 0.0f);
661       PUSH_DATAf(push, 1.0f);
662       BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(i)), 2);
663       PUSH_DATA (push, 8192 << 16);
664       PUSH_DATA (push, 8192 << 16);
665    }
666 
667    BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);
668 #ifdef NV50_SCISSORS_CLIPPING
669    PUSH_DATA (push, 0x0000);
670 #else
671    PUSH_DATA (push, 0x1080);
672 #endif
673 
674    BEGIN_NV04(push, NV50_3D(CLEAR_FLAGS), 1);
675    PUSH_DATA (push, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT);
676 
677    /* We use scissors instead of exact view volume clipping,
678     * so they're always enabled.
679     */
680    for (i = 0; i < NV50_MAX_VIEWPORTS; i++) {
681       BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(i)), 3);
682       PUSH_DATA (push, 1);
683       PUSH_DATA (push, 8192 << 16);
684       PUSH_DATA (push, 8192 << 16);
685    }
686 
687    BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
688    PUSH_DATA (push, 1);
689    BEGIN_NV04(push, NV50_3D(POINT_RASTER_RULES), 1);
690    PUSH_DATA (push, NV50_3D_POINT_RASTER_RULES_OGL);
691    BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1);
692    PUSH_DATA (push, 0x11111111);
693    BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1);
694    PUSH_DATA (push, 1);
695 
696    BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);
697    PUSH_DATA (push, 0);
698    if (screen->base.class_3d >= NV84_3D_CLASS) {
699       BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);
700       PUSH_DATA (push, 0);
701    }
702 
703    BEGIN_NV04(push, NV50_3D(UNK0FDC), 1);
704    PUSH_DATA (push, 1);
705    BEGIN_NV04(push, NV50_3D(UNK19C0), 1);
706    PUSH_DATA (push, 1);
707 }
708 
nv50_tls_alloc(struct nv50_screen * screen,unsigned tls_space,uint64_t * tls_size)709 static int nv50_tls_alloc(struct nv50_screen *screen, unsigned tls_space,
710       uint64_t *tls_size)
711 {
712    struct nouveau_device *dev = screen->base.device;
713    int ret;
714 
715    assert(tls_space % ONE_TEMP_SIZE == 0);
716    screen->cur_tls_space = util_next_power_of_two(tls_space / ONE_TEMP_SIZE) *
717          ONE_TEMP_SIZE;
718    if (nouveau_mesa_debug)
719       debug_printf("allocating space for %u temps\n",
720             util_next_power_of_two(tls_space / ONE_TEMP_SIZE));
721    *tls_size = screen->cur_tls_space * util_next_power_of_two(screen->TPs) *
722          screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP;
723 
724    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
725                         *tls_size, NULL, &screen->tls_bo);
726    if (ret) {
727       NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret);
728       return ret;
729    }
730 
731    return 0;
732 }
733 
nv50_tls_realloc(struct nv50_screen * screen,unsigned tls_space)734 int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)
735 {
736    struct nouveau_pushbuf *push = screen->base.pushbuf;
737    int ret;
738    uint64_t tls_size;
739 
740    if (tls_space < screen->cur_tls_space)
741       return 0;
742    if (tls_space > screen->max_tls_space) {
743       /* fixable by limiting number of warps (LOCAL_WARPS_LOG_ALLOC /
744        * LOCAL_WARPS_NO_CLAMP) */
745       NOUVEAU_ERR("Unsupported number of temporaries (%u > %u). Fixable if someone cares.\n",
746             (unsigned)(tls_space / ONE_TEMP_SIZE),
747             (unsigned)(screen->max_tls_space / ONE_TEMP_SIZE));
748       return -ENOMEM;
749    }
750 
751    nouveau_bo_ref(NULL, &screen->tls_bo);
752    ret = nv50_tls_alloc(screen, tls_space, &tls_size);
753    if (ret)
754       return ret;
755 
756    BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);
757    PUSH_DATAh(push, screen->tls_bo->offset);
758    PUSH_DATA (push, screen->tls_bo->offset);
759    PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));
760 
761    return 1;
762 }
763 
764 static const void *
nv50_screen_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)765 nv50_screen_get_compiler_options(struct pipe_screen *pscreen,
766                                  enum pipe_shader_ir ir,
767                                  enum pipe_shader_type shader)
768 {
769    if (ir == PIPE_SHADER_IR_NIR)
770       return nv50_ir_nir_shader_compiler_options(NVISA_G80_CHIPSET, shader);
771    return NULL;
772 }
773 
774 struct nouveau_screen *
nv50_screen_create(struct nouveau_device * dev)775 nv50_screen_create(struct nouveau_device *dev)
776 {
777    struct nv50_screen *screen;
778    struct pipe_screen *pscreen;
779    struct nouveau_object *chan;
780    uint64_t value;
781    uint32_t tesla_class;
782    unsigned stack_size;
783    int ret;
784 
785    screen = CALLOC_STRUCT(nv50_screen);
786    if (!screen)
787       return NULL;
788    pscreen = &screen->base.base;
789    pscreen->destroy = nv50_screen_destroy;
790 
791    simple_mtx_init(&screen->state_lock, mtx_plain);
792    ret = nouveau_screen_init(&screen->base, dev);
793    if (ret) {
794       NOUVEAU_ERR("nouveau_screen_init failed: %d\n", ret);
795       goto fail;
796    }
797 
798    /* TODO: Prevent FIFO prefetch before transfer of index buffers and
799     *  admit them to VRAM.
800     */
801    screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
802       PIPE_BIND_VERTEX_BUFFER;
803    screen->base.sysmem_bindings |=
804       PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
805 
806    screen->base.pushbuf->rsvd_kick = 5;
807 
808    chan = screen->base.channel;
809 
810    pscreen->context_create = nv50_create;
811    pscreen->is_format_supported = nv50_screen_is_format_supported;
812    pscreen->get_shader_param = nv50_screen_get_shader_param;
813    pscreen->get_compute_param = nv50_screen_get_compute_param;
814    pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;
815    pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info;
816 
817    /* nir stuff */
818    pscreen->get_compiler_options = nv50_screen_get_compiler_options;
819 
820    nv50_screen_init_resource_functions(pscreen);
821 
822    if (screen->base.device->chipset < 0x84 ||
823        debug_get_bool_option("NOUVEAU_PMPEG", false)) {
824       /* PMPEG */
825       nouveau_screen_init_vdec(&screen->base);
826    } else if (screen->base.device->chipset < 0x98 ||
827               screen->base.device->chipset == 0xa0) {
828       /* VP2 */
829       screen->base.base.get_video_param = nv84_screen_get_video_param;
830       screen->base.base.is_video_format_supported = nv84_screen_video_supported;
831    } else {
832       /* VP3/4 */
833       screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param;
834       screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
835    }
836 
837    ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,
838                         NULL, &screen->fence.bo);
839    if (ret) {
840       NOUVEAU_ERR("Failed to allocate fence bo: %d\n", ret);
841       goto fail;
842    }
843 
844    BO_MAP(&screen->base, screen->fence.bo, 0, NULL);
845    screen->fence.map = screen->fence.bo->map;
846    screen->base.fence.emit = nv50_screen_fence_emit;
847    screen->base.fence.update = nv50_screen_fence_update;
848 
849    ret = nouveau_object_new(chan, 0xbeef0301, NOUVEAU_NOTIFIER_CLASS,
850                             &(struct nv04_notify){ .length = 32 },
851                             sizeof(struct nv04_notify), &screen->sync);
852    if (ret) {
853       NOUVEAU_ERR("Failed to allocate notifier: %d\n", ret);
854       goto fail;
855    }
856 
857    ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS,
858                             NULL, 0, &screen->m2mf);
859    if (ret) {
860       NOUVEAU_ERR("Failed to allocate PGRAPH context for M2MF: %d\n", ret);
861       goto fail;
862    }
863 
864    ret = nouveau_object_new(chan, 0xbeef502d, NV50_2D_CLASS,
865                             NULL, 0, &screen->eng2d);
866    if (ret) {
867       NOUVEAU_ERR("Failed to allocate PGRAPH context for 2D: %d\n", ret);
868       goto fail;
869    }
870 
871    switch (dev->chipset & 0xf0) {
872    case 0x50:
873       tesla_class = NV50_3D_CLASS;
874       break;
875    case 0x80:
876    case 0x90:
877       tesla_class = NV84_3D_CLASS;
878       break;
879    case 0xa0:
880       switch (dev->chipset) {
881       case 0xa0:
882       case 0xaa:
883       case 0xac:
884          tesla_class = NVA0_3D_CLASS;
885          break;
886       case 0xaf:
887          tesla_class = NVAF_3D_CLASS;
888          break;
889       default:
890          tesla_class = NVA3_3D_CLASS;
891          break;
892       }
893       break;
894    default:
895       NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", dev->chipset);
896       goto fail;
897    }
898    screen->base.class_3d = tesla_class;
899 
900    nv50_init_screen_caps(screen);
901 
902    ret = nouveau_object_new(chan, 0xbeef5097, tesla_class,
903                             NULL, 0, &screen->tesla);
904    if (ret) {
905       NOUVEAU_ERR("Failed to allocate PGRAPH context for 3D: %d\n", ret);
906       goto fail;
907    }
908 
909    /* This over-allocates by a page. The GP, which would execute at the end of
910     * the last page, would trigger faults. The going theory is that it
911     * prefetches up to a certain amount.
912     */
913    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,
914                         (3 << NV50_CODE_BO_SIZE_LOG2) + 0x1000,
915                         NULL, &screen->code);
916    if (ret) {
917       NOUVEAU_ERR("Failed to allocate code bo: %d\n", ret);
918       goto fail;
919    }
920 
921    nouveau_heap_init(&screen->vp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
922    nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
923    nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);
924 
925    nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
926 
927    screen->TPs = util_bitcount(value & 0xffff);
928    screen->MPsInTP = util_bitcount(value & 0x0f000000);
929 
930    screen->mp_count = screen->TPs * screen->MPsInTP;
931 
932    stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *
933          STACK_WARPS_ALLOC * 64 * 8;
934 
935    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, NULL,
936                         &screen->stack_bo);
937    if (ret) {
938       NOUVEAU_ERR("Failed to allocate stack bo: %d\n", ret);
939       goto fail;
940    }
941 
942    uint64_t size_of_one_temp = util_next_power_of_two(screen->TPs) *
943          screen->MPsInTP * LOCAL_WARPS_ALLOC *  THREADS_IN_WARP *
944          ONE_TEMP_SIZE;
945    screen->max_tls_space = dev->vram_size / size_of_one_temp * ONE_TEMP_SIZE;
946    screen->max_tls_space /= 2; /* half of vram */
947 
948    /* hw can address max 64 KiB */
949    screen->max_tls_space = MIN2(screen->max_tls_space, 64 << 10);
950 
951    uint64_t tls_size;
952    unsigned tls_space = 4/*temps*/ * ONE_TEMP_SIZE;
953    ret = nv50_tls_alloc(screen, tls_space, &tls_size);
954    if (ret)
955       goto fail;
956 
957    if (nouveau_mesa_debug)
958       debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n",
959             screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10);
960 
961    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 5 << 16, NULL,
962                         &screen->uniforms);
963    if (ret) {
964       NOUVEAU_ERR("Failed to allocate uniforms bo: %d\n", ret);
965       goto fail;
966    }
967 
968    ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 3 << 16, NULL,
969                         &screen->txc);
970    if (ret) {
971       NOUVEAU_ERR("Failed to allocate TIC/TSC bo: %d\n", ret);
972       goto fail;
973    }
974 
975    screen->tic.entries = CALLOC(4096, sizeof(void *));
976    screen->tsc.entries = screen->tic.entries + 2048;
977 
978    if (!nv50_blitter_create(screen))
979       goto fail;
980 
981    nv50_screen_init_hwctx(screen);
982 
983    ret = nv50_screen_compute_setup(screen, screen->base.pushbuf);
984    if (ret) {
985       NOUVEAU_ERR("Failed to init compute context: %d\n", ret);
986       goto fail;
987    }
988 
989    // submit all initial state
990    PUSH_KICK(screen->base.pushbuf);
991 
992    return &screen->base;
993 
994 fail:
995    screen->base.base.context_create = NULL;
996    return &screen->base;
997 }
998 
999 int
nv50_screen_tic_alloc(struct nv50_screen * screen,void * entry)1000 nv50_screen_tic_alloc(struct nv50_screen *screen, void *entry)
1001 {
1002    int i = screen->tic.next;
1003 
1004    while (screen->tic.lock[i / 32] & (1 << (i % 32)))
1005       i = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
1006 
1007    screen->tic.next = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);
1008 
1009    if (screen->tic.entries[i])
1010       nv50_tic_entry(screen->tic.entries[i])->id = -1;
1011 
1012    screen->tic.entries[i] = entry;
1013    return i;
1014 }
1015 
1016 int
nv50_screen_tsc_alloc(struct nv50_screen * screen,void * entry)1017 nv50_screen_tsc_alloc(struct nv50_screen *screen, void *entry)
1018 {
1019    int i = screen->tsc.next;
1020 
1021    while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
1022       i = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
1023 
1024    screen->tsc.next = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);
1025 
1026    if (screen->tsc.entries[i])
1027       nv50_tsc_entry(screen->tsc.entries[i])->id = -1;
1028 
1029    screen->tsc.entries[i] = entry;
1030    return i;
1031 }
1032