• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2008 Ben Skeggs
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "nvc0/nvc0_context.h"
24 #include "nvc0/nvc0_resource.h"
25 #include "nvc0/gm107_texture.xml.h"
26 #include "nvc0/nvc0_compute.xml.h"
27 #include "nv50/g80_texture.xml.h"
28 #include "nv50/g80_defs.xml.h"
29 
30 #include "util/u_format.h"
31 
32 #define NVE4_TIC_ENTRY_INVALID 0x000fffff
33 #define NVE4_TSC_ENTRY_INVALID 0xfff00000
34 
35 static inline uint32_t
nv50_tic_swizzle(const struct nvc0_format * fmt,unsigned swz,bool tex_int)36 nv50_tic_swizzle(const struct nvc0_format *fmt, unsigned swz, bool tex_int)
37 {
38    switch (swz) {
39    case PIPE_SWIZZLE_X  : return fmt->tic.src_x;
40    case PIPE_SWIZZLE_Y: return fmt->tic.src_y;
41    case PIPE_SWIZZLE_Z : return fmt->tic.src_z;
42    case PIPE_SWIZZLE_W: return fmt->tic.src_w;
43    case PIPE_SWIZZLE_1:
44       return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
45    case PIPE_SWIZZLE_0:
46    default:
47       return G80_TIC_SOURCE_ZERO;
48    }
49 }
50 
51 struct pipe_sampler_view *
nvc0_create_sampler_view(struct pipe_context * pipe,struct pipe_resource * res,const struct pipe_sampler_view * templ)52 nvc0_create_sampler_view(struct pipe_context *pipe,
53                          struct pipe_resource *res,
54                          const struct pipe_sampler_view *templ)
55 {
56    uint32_t flags = 0;
57 
58    if (templ->target == PIPE_TEXTURE_RECT || templ->target == PIPE_BUFFER)
59       flags |= NV50_TEXVIEW_SCALED_COORDS;
60 
61    return nvc0_create_texture_view(pipe, res, templ, flags, templ->target);
62 }
63 
64 static struct pipe_sampler_view *
gm107_create_texture_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ,uint32_t flags,enum pipe_texture_target target)65 gm107_create_texture_view(struct pipe_context *pipe,
66                           struct pipe_resource *texture,
67                           const struct pipe_sampler_view *templ,
68                           uint32_t flags,
69                           enum pipe_texture_target target)
70 {
71    const struct util_format_description *desc;
72    const struct nvc0_format *fmt;
73    uint64_t address;
74    uint32_t *tic;
75    uint32_t swz[4];
76    uint32_t width, height;
77    uint32_t depth;
78    struct nv50_tic_entry *view;
79    struct nv50_miptree *mt;
80    bool tex_int;
81 
82    view = MALLOC_STRUCT(nv50_tic_entry);
83    if (!view)
84       return NULL;
85    mt = nv50_miptree(texture);
86 
87    view->pipe = *templ;
88    view->pipe.reference.count = 1;
89    view->pipe.texture = NULL;
90    view->pipe.context = pipe;
91 
92    view->id = -1;
93 
94    pipe_resource_reference(&view->pipe.texture, texture);
95 
96    tic = &view->tic[0];
97 
98    desc = util_format_description(view->pipe.format);
99    tex_int = util_format_is_pure_integer(view->pipe.format);
100 
101    fmt = &nvc0_format_table[view->pipe.format];
102    swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
103    swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
104    swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
105    swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
106 
107    tic[0]  = fmt->tic.format << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT;
108    tic[0] |= fmt->tic.type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT;
109    tic[0] |= fmt->tic.type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT;
110    tic[0] |= fmt->tic.type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT;
111    tic[0] |= fmt->tic.type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT;
112    tic[0] |= swz[0] << GM107_TIC2_0_X_SOURCE__SHIFT;
113    tic[0] |= swz[1] << GM107_TIC2_0_Y_SOURCE__SHIFT;
114    tic[0] |= swz[2] << GM107_TIC2_0_Z_SOURCE__SHIFT;
115    tic[0] |= swz[3] << GM107_TIC2_0_W_SOURCE__SHIFT;
116 
117    address = mt->base.address;
118 
119    tic[3]  = GM107_TIC2_3_LOD_ANISO_QUALITY_2;
120    tic[4]  = GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V;
121    tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR;
122 
123    if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
124       tic[4] |= GM107_TIC2_4_SRGB_CONVERSION;
125 
126    if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
127       tic[5] = GM107_TIC2_5_NORMALIZED_COORDS;
128    else
129       tic[5] = 0;
130 
131    /* check for linear storage type */
132    if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
133       if (texture->target == PIPE_BUFFER) {
134          assert(!(tic[5] & GM107_TIC2_5_NORMALIZED_COORDS));
135          width = view->pipe.u.buf.size / (desc->block.bits / 8) - 1;
136          address +=
137             view->pipe.u.buf.offset;
138          tic[2]  = GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER;
139          tic[3] |= width >> 16;
140          tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER;
141          tic[4] |= width & 0xffff;
142       } else {
143          assert(!(mt->level[0].pitch & 0x1f));
144          /* must be 2D texture without mip maps */
145          tic[2]  = GM107_TIC2_2_HEADER_VERSION_PITCH;
146          tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
147          tic[3] |= mt->level[0].pitch >> 5;
148          tic[4] |= mt->base.base.width0 - 1;
149          tic[5] |= 0 << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
150          tic[5] |= mt->base.base.height0 - 1;
151       }
152       tic[1]  = address;
153       tic[2] |= address >> 32;
154       tic[6]  = 0;
155       tic[7]  = 0;
156       return &view->pipe;
157    }
158 
159    tic[2]  = GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR;
160    tic[3] |=
161       ((mt->level[0].tile_mode & 0x0f0) >> 4 << 3) |
162       ((mt->level[0].tile_mode & 0xf00) >> 8 << 6);
163 
164    depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
165 
166    if (mt->base.base.array_size > 1) {
167       /* there doesn't seem to be a base layer field in TIC */
168       address += view->pipe.u.tex.first_layer * mt->layer_stride;
169       depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
170    }
171    tic[1]  = address;
172    tic[2] |= address >> 32;
173 
174    switch (target) {
175    case PIPE_TEXTURE_1D:
176       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D;
177       break;
178    case PIPE_TEXTURE_2D:
179       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
180       break;
181    case PIPE_TEXTURE_RECT:
182       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
183       break;
184    case PIPE_TEXTURE_3D:
185       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_THREE_D;
186       break;
187    case PIPE_TEXTURE_CUBE:
188       depth /= 6;
189       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP;
190       break;
191    case PIPE_TEXTURE_1D_ARRAY:
192       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY;
193       break;
194    case PIPE_TEXTURE_2D_ARRAY:
195       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY;
196       break;
197    case PIPE_TEXTURE_CUBE_ARRAY:
198       depth /= 6;
199       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY;
200       break;
201    default:
202       unreachable("unexpected/invalid texture target");
203    }
204 
205    tic[3] |= (flags & NV50_TEXVIEW_FILTER_MSAA8) ?
206              GM107_TIC2_3_USE_HEADER_OPT_CONTROL :
207              GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH |
208              GM107_TIC2_3_LOD_ISO_QUALITY_HIGH;
209 
210    if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
211       width = mt->base.base.width0 << mt->ms_x;
212       height = mt->base.base.height0 << mt->ms_y;
213    } else {
214       width = mt->base.base.width0;
215       height = mt->base.base.height0;
216    }
217 
218    tic[4] |= width - 1;
219 
220    tic[5] |= (height - 1) & 0xffff;
221    tic[5] |= (depth - 1) << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
222    tic[3] |= mt->base.base.last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT;
223 
224    /* sampling points: (?) */
225    if ((flags & NV50_TEXVIEW_ACCESS_RESOLVE) && mt->ms_x > 1) {
226       tic[6]  = GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO;
227       tic[6] |= GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1;
228    } else {
229       tic[6]  = GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO;
230       tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE;
231    }
232 
233    tic[7]  = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
234    tic[7] |= mt->ms_mode << GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT;
235 
236    return &view->pipe;
237 }
238 
239 struct pipe_sampler_view *
gm107_create_texture_view_from_image(struct pipe_context * pipe,const struct pipe_image_view * view)240 gm107_create_texture_view_from_image(struct pipe_context *pipe,
241                                      const struct pipe_image_view *view)
242 {
243    struct nv04_resource *res = nv04_resource(view->resource);
244    struct pipe_sampler_view templ = {};
245    enum pipe_texture_target target;
246    uint32_t flags = 0;
247 
248    if (!res)
249       return NULL;
250    target = res->base.target;
251 
252    if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY)
253       target = PIPE_TEXTURE_2D_ARRAY;
254 
255    templ.format = view->format;
256    templ.swizzle_r = PIPE_SWIZZLE_X;
257    templ.swizzle_g = PIPE_SWIZZLE_Y;
258    templ.swizzle_b = PIPE_SWIZZLE_Z;
259    templ.swizzle_a = PIPE_SWIZZLE_W;
260 
261    if (target == PIPE_BUFFER) {
262       templ.u.buf.offset = view->u.buf.offset;
263       templ.u.buf.size = view->u.buf.size;
264    } else {
265       templ.u.tex.first_layer = view->u.tex.first_layer;
266       templ.u.tex.last_layer = view->u.tex.last_layer;
267       templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level;
268    }
269 
270    flags = NV50_TEXVIEW_SCALED_COORDS;
271 
272    return nvc0_create_texture_view(pipe, &res->base, &templ, flags, target);
273 }
274 
275 static struct pipe_sampler_view *
gf100_create_texture_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ,uint32_t flags,enum pipe_texture_target target)276 gf100_create_texture_view(struct pipe_context *pipe,
277                           struct pipe_resource *texture,
278                           const struct pipe_sampler_view *templ,
279                           uint32_t flags,
280                           enum pipe_texture_target target)
281 {
282    const struct util_format_description *desc;
283    const struct nvc0_format *fmt;
284    uint64_t address;
285    uint32_t *tic;
286    uint32_t swz[4];
287    uint32_t width, height;
288    uint32_t depth;
289    uint32_t tex_fmt;
290    struct nv50_tic_entry *view;
291    struct nv50_miptree *mt;
292    bool tex_int;
293 
294    view = MALLOC_STRUCT(nv50_tic_entry);
295    if (!view)
296       return NULL;
297    mt = nv50_miptree(texture);
298 
299    view->pipe = *templ;
300    view->pipe.reference.count = 1;
301    view->pipe.texture = NULL;
302    view->pipe.context = pipe;
303 
304    view->id = -1;
305 
306    pipe_resource_reference(&view->pipe.texture, texture);
307 
308    tic = &view->tic[0];
309 
310    desc = util_format_description(view->pipe.format);
311 
312    fmt = &nvc0_format_table[view->pipe.format];
313 
314    tex_int = util_format_is_pure_integer(view->pipe.format);
315    tex_fmt = fmt->tic.format & 0x3f;
316 
317    swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
318    swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
319    swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
320    swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
321    tic[0] = (tex_fmt << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
322             (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
323             (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
324             (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
325             (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |
326             (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
327             (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
328             (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
329             (swz[3] << G80_TIC_0_W_SOURCE__SHIFT) |
330             ((fmt->tic.format & 0x40) << (GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT - 6));
331 
332    address = mt->base.address;
333 
334    tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;
335 
336    if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
337       tic[2] |= G80_TIC_2_SRGB_CONVERSION;
338 
339    if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
340       tic[2] |= G80_TIC_2_NORMALIZED_COORDS;
341 
342    /* check for linear storage type */
343    if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
344       if (texture->target == PIPE_BUFFER) {
345          assert(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS));
346          address +=
347             view->pipe.u.buf.offset;
348          tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;
349          tic[3] = 0;
350          tic[4] = /* width */
351             view->pipe.u.buf.size / (desc->block.bits / 8);
352          tic[5] = 0;
353       } else {
354          /* must be 2D texture without mip maps */
355          tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
356          tic[3] = mt->level[0].pitch;
357          tic[4] = mt->base.base.width0;
358          tic[5] = (1 << 16) | mt->base.base.height0;
359       }
360       tic[6] =
361       tic[7] = 0;
362       tic[1] = address;
363       tic[2] |= address >> 32;
364       return &view->pipe;
365    }
366 
367    tic[2] |=
368       ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |
369       ((mt->level[0].tile_mode & 0xf00) << (25 - 8));
370 
371    depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
372 
373    if (mt->base.base.array_size > 1) {
374       /* there doesn't seem to be a base layer field in TIC */
375       address += view->pipe.u.tex.first_layer * mt->layer_stride;
376       depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
377    }
378    tic[1] = address;
379    tic[2] |= address >> 32;
380 
381    switch (target) {
382    case PIPE_TEXTURE_1D:
383       tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;
384       break;
385    case PIPE_TEXTURE_2D:
386       tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
387       break;
388    case PIPE_TEXTURE_RECT:
389       tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
390       break;
391    case PIPE_TEXTURE_3D:
392       tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;
393       break;
394    case PIPE_TEXTURE_CUBE:
395       depth /= 6;
396       tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;
397       break;
398    case PIPE_TEXTURE_1D_ARRAY:
399       tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;
400       break;
401    case PIPE_TEXTURE_2D_ARRAY:
402       tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;
403       break;
404    case PIPE_TEXTURE_CUBE_ARRAY:
405       depth /= 6;
406       tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;
407       break;
408    default:
409       unreachable("unexpected/invalid texture target");
410    }
411 
412    tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
413 
414    if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
415       width = mt->base.base.width0 << mt->ms_x;
416       height = mt->base.base.height0 << mt->ms_y;
417    } else {
418       width = mt->base.base.width0;
419       height = mt->base.base.height0;
420    }
421 
422    tic[4] = (1 << 31) | width;
423 
424    tic[5] = height & 0xffff;
425    tic[5] |= depth << 16;
426    tic[5] |= mt->base.base.last_level << 28;
427 
428    /* sampling points: (?) */
429    if (flags & NV50_TEXVIEW_ACCESS_RESOLVE)
430       tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000;
431    else
432       tic[6] = 0x03000000;
433 
434    tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
435    tic[7] |= mt->ms_mode << 12;
436 
437    return &view->pipe;
438 }
439 
440 struct pipe_sampler_view *
nvc0_create_texture_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ,uint32_t flags,enum pipe_texture_target target)441 nvc0_create_texture_view(struct pipe_context *pipe,
442                          struct pipe_resource *texture,
443                          const struct pipe_sampler_view *templ,
444                          uint32_t flags,
445                          enum pipe_texture_target target)
446 {
447    if (nvc0_context(pipe)->screen->tic.maxwell)
448       return gm107_create_texture_view(pipe, texture, templ, flags, target);
449    return gf100_create_texture_view(pipe, texture, templ, flags, target);
450 }
451 
452 void
nvc0_update_tic(struct nvc0_context * nvc0,struct nv50_tic_entry * tic,struct nv04_resource * res)453 nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
454                 struct nv04_resource *res)
455 {
456    uint64_t address = res->address;
457    if (res->base.target != PIPE_BUFFER)
458       return;
459    address += tic->pipe.u.buf.offset;
460    if (tic->tic[1] == (uint32_t)address &&
461        (tic->tic[2] & 0xff) == address >> 32)
462       return;
463 
464    nvc0_screen_tic_unlock(nvc0->screen, tic);
465    tic->id = -1;
466    tic->tic[1] = address;
467    tic->tic[2] &= 0xffffff00;
468    tic->tic[2] |= address >> 32;
469 }
470 
471 bool
nvc0_validate_tic(struct nvc0_context * nvc0,int s)472 nvc0_validate_tic(struct nvc0_context *nvc0, int s)
473 {
474    uint32_t commands[32];
475    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
476    unsigned i;
477    unsigned n = 0;
478    bool need_flush = false;
479 
480    for (i = 0; i < nvc0->num_textures[s]; ++i) {
481       struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
482       struct nv04_resource *res;
483       const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
484 
485       if (!tic) {
486          if (dirty)
487             commands[n++] = (i << 1) | 0;
488          continue;
489       }
490       res = nv04_resource(tic->pipe.texture);
491       nvc0_update_tic(nvc0, tic, res);
492 
493       if (tic->id < 0) {
494          tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
495 
496          nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
497                                NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
498                                tic->tic);
499          need_flush = true;
500       } else
501       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
502          if (unlikely(s == 5))
503             BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1);
504          else
505             BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
506          PUSH_DATA (push, (tic->id << 4) | 1);
507          NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
508       }
509       nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
510 
511       res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
512       res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;
513 
514       if (!dirty)
515          continue;
516       commands[n++] = (tic->id << 9) | (i << 1) | 1;
517 
518       if (unlikely(s == 5))
519          BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
520       else
521          BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
522    }
523    for (; i < nvc0->state.num_textures[s]; ++i)
524       commands[n++] = (i << 1) | 0;
525 
526    nvc0->state.num_textures[s] = nvc0->num_textures[s];
527 
528    if (n) {
529       if (unlikely(s == 5))
530          BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n);
531       else
532          BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
533       PUSH_DATAp(push, commands, n);
534    }
535    nvc0->textures_dirty[s] = 0;
536 
537    return need_flush;
538 }
539 
540 static bool
nve4_validate_tic(struct nvc0_context * nvc0,unsigned s)541 nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
542 {
543    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
544    unsigned i;
545    bool need_flush = false;
546 
547    for (i = 0; i < nvc0->num_textures[s]; ++i) {
548       struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
549       struct nv04_resource *res;
550       const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
551 
552       if (!tic) {
553          nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
554          continue;
555       }
556       res = nv04_resource(tic->pipe.texture);
557       nvc0_update_tic(nvc0, tic, res);
558 
559       if (tic->id < 0) {
560          tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
561 
562          nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
563                                NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
564                                tic->tic);
565          need_flush = true;
566       } else
567       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
568          BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
569          PUSH_DATA (push, (tic->id << 4) | 1);
570       }
571       nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
572 
573       res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
574       res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;
575 
576       nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
577       nvc0->tex_handles[s][i] |= tic->id;
578       if (dirty)
579          BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
580    }
581    for (; i < nvc0->state.num_textures[s]; ++i) {
582       nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
583       nvc0->textures_dirty[s] |= 1 << i;
584    }
585 
586    nvc0->state.num_textures[s] = nvc0->num_textures[s];
587 
588    return need_flush;
589 }
590 
nvc0_validate_textures(struct nvc0_context * nvc0)591 void nvc0_validate_textures(struct nvc0_context *nvc0)
592 {
593    bool need_flush = false;
594    int i;
595 
596    for (i = 0; i < 5; i++) {
597       if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
598          need_flush |= nve4_validate_tic(nvc0, i);
599       else
600          need_flush |= nvc0_validate_tic(nvc0, i);
601    }
602 
603    if (need_flush) {
604       BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
605       PUSH_DATA (nvc0->base.pushbuf, 0);
606    }
607 
608    /* Invalidate all CP textures because they are aliased. */
609    for (int i = 0; i < nvc0->num_textures[5]; i++)
610       nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_TEX(i));
611    nvc0->textures_dirty[5] = ~0;
612    nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
613 }
614 
615 bool
nvc0_validate_tsc(struct nvc0_context * nvc0,int s)616 nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
617 {
618    uint32_t commands[16];
619    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
620    unsigned i;
621    unsigned n = 0;
622    bool need_flush = false;
623 
624    for (i = 0; i < nvc0->num_samplers[s]; ++i) {
625       struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
626 
627       if (!(nvc0->samplers_dirty[s] & (1 << i)))
628          continue;
629       if (!tsc) {
630          commands[n++] = (i << 4) | 0;
631          continue;
632       }
633       nvc0->seamless_cube_map = tsc->seamless_cube_map;
634       if (tsc->id < 0) {
635          tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
636 
637          nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
638                                65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),
639                                32, tsc->tsc);
640          need_flush = true;
641       }
642       nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
643 
644       commands[n++] = (tsc->id << 12) | (i << 4) | 1;
645    }
646    for (; i < nvc0->state.num_samplers[s]; ++i)
647       commands[n++] = (i << 4) | 0;
648 
649    nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
650 
651    if (n) {
652       if (unlikely(s == 5))
653          BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n);
654       else
655          BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
656       PUSH_DATAp(push, commands, n);
657    }
658    nvc0->samplers_dirty[s] = 0;
659 
660    return need_flush;
661 }
662 
663 bool
nve4_validate_tsc(struct nvc0_context * nvc0,int s)664 nve4_validate_tsc(struct nvc0_context *nvc0, int s)
665 {
666    unsigned i;
667    bool need_flush = false;
668 
669    for (i = 0; i < nvc0->num_samplers[s]; ++i) {
670       struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
671 
672       if (!tsc) {
673          nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
674          continue;
675       }
676       if (tsc->id < 0) {
677          tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
678 
679          nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,
680                                65536 + tsc->id * 32,
681                                NV_VRAM_DOMAIN(&nvc0->screen->base),
682                                32, tsc->tsc);
683          need_flush = true;
684       }
685       nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
686 
687       nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
688       nvc0->tex_handles[s][i] |= tsc->id << 20;
689    }
690    for (; i < nvc0->state.num_samplers[s]; ++i) {
691       nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
692       nvc0->samplers_dirty[s] |= 1 << i;
693    }
694 
695    nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
696 
697    return need_flush;
698 }
699 
nvc0_validate_samplers(struct nvc0_context * nvc0)700 void nvc0_validate_samplers(struct nvc0_context *nvc0)
701 {
702    bool need_flush = false;
703    int i;
704 
705    for (i = 0; i < 5; i++) {
706       if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
707          need_flush |= nve4_validate_tsc(nvc0, i);
708       else
709          need_flush |= nvc0_validate_tsc(nvc0, i);
710    }
711 
712    if (need_flush) {
713       BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
714       PUSH_DATA (nvc0->base.pushbuf, 0);
715    }
716 
717    /* Invalidate all CP samplers because they are aliased. */
718    nvc0->samplers_dirty[5] = ~0;
719    nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
720 }
721 
722 /* Upload the "diagonal" entries for the possible texture sources ($t == $s).
723  * At some point we might want to get a list of the combinations used by a
724  * shader and fill in those entries instead of having it extract the handles.
725  */
726 void
nve4_set_tex_handles(struct nvc0_context * nvc0)727 nve4_set_tex_handles(struct nvc0_context *nvc0)
728 {
729    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
730    struct nvc0_screen *screen = nvc0->screen;
731    unsigned s;
732 
733    if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
734       return;
735 
736    for (s = 0; s < 5; ++s) {
737       uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
738       if (!dirty)
739          continue;
740       BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
741       PUSH_DATA (push, NVC0_CB_AUX_SIZE);
742       PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
743       PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
744       do {
745          int i = ffs(dirty) - 1;
746          dirty &= ~(1 << i);
747 
748          BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
749          PUSH_DATA (push, (8 + i) * 4);
750          PUSH_DATA (push, nvc0->tex_handles[s][i]);
751       } while (dirty);
752 
753       nvc0->textures_dirty[s] = 0;
754       nvc0->samplers_dirty[s] = 0;
755    }
756 }
757 
758 
759 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
760 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
761 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
762 
763 static void
nvc0_get_surface_dims(struct pipe_image_view * view,int * width,int * height,int * depth)764 nvc0_get_surface_dims(struct pipe_image_view *view, int *width, int *height,
765                       int *depth)
766 {
767    struct nv04_resource *res = nv04_resource(view->resource);
768    int level;
769 
770    *width = *height = *depth = 1;
771    if (res->base.target == PIPE_BUFFER) {
772       *width = view->u.buf.size / util_format_get_blocksize(view->format);
773       return;
774    }
775 
776    level = view->u.tex.level;
777    *width = u_minify(view->resource->width0, level);
778    *height = u_minify(view->resource->height0, level);
779    *depth = u_minify(view->resource->depth0, level);
780 
781    switch (res->base.target) {
782    case PIPE_TEXTURE_1D_ARRAY:
783    case PIPE_TEXTURE_2D_ARRAY:
784    case PIPE_TEXTURE_CUBE:
785    case PIPE_TEXTURE_CUBE_ARRAY:
786       *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
787       break;
788    case PIPE_TEXTURE_1D:
789    case PIPE_TEXTURE_2D:
790    case PIPE_TEXTURE_RECT:
791    case PIPE_TEXTURE_3D:
792       break;
793    default:
794       assert(!"unexpected texture target");
795       break;
796    }
797 }
798 
799 void
nvc0_mark_image_range_valid(const struct pipe_image_view * view)800 nvc0_mark_image_range_valid(const struct pipe_image_view *view)
801 {
802    struct nv04_resource *res = (struct nv04_resource *)view->resource;
803 
804    assert(view->resource->target == PIPE_BUFFER);
805 
806    util_range_add(&res->valid_buffer_range,
807                   view->u.buf.offset,
808                   view->u.buf.offset + view->u.buf.size);
809 }
810 
811 void
nve4_set_surface_info(struct nouveau_pushbuf * push,struct pipe_image_view * view,struct nvc0_context * nvc0)812 nve4_set_surface_info(struct nouveau_pushbuf *push,
813                       struct pipe_image_view *view,
814                       struct nvc0_context *nvc0)
815 {
816    struct nvc0_screen *screen = nvc0->screen;
817    struct nv04_resource *res;
818    uint64_t address;
819    uint32_t *const info = push->cur;
820    int width, height, depth;
821    uint8_t log2cpp;
822 
823    if (view && !nve4_su_format_map[view->format])
824       NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
825 
826    push->cur += 16;
827 
828    if (!view || !nve4_su_format_map[view->format]) {
829       memset(info, 0, 16 * sizeof(*info));
830 
831       info[0] = 0xbadf0000;
832       info[1] = 0x80004000;
833       info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
834          screen->lib_code->start;
835       return;
836    }
837    res = nv04_resource(view->resource);
838 
839    address = res->address;
840 
841    /* get surface dimensions based on the target. */
842    nvc0_get_surface_dims(view, &width, &height, &depth);
843 
844    info[8] = width;
845    info[9] = height;
846    info[10] = depth;
847    switch (res->base.target) {
848    case PIPE_TEXTURE_1D_ARRAY:
849       info[11] = 1;
850       break;
851    case PIPE_TEXTURE_2D:
852    case PIPE_TEXTURE_RECT:
853       info[11] = 2;
854       break;
855    case PIPE_TEXTURE_3D:
856       info[11] = 3;
857       break;
858    case PIPE_TEXTURE_2D_ARRAY:
859    case PIPE_TEXTURE_CUBE:
860    case PIPE_TEXTURE_CUBE_ARRAY:
861       info[11] = 4;
862       break;
863    default:
864       info[11] = 0;
865       break;
866    }
867    log2cpp = (0xf000 & nve4_su_format_aux_map[view->format]) >> 12;
868 
869    /* Stick the blockwidth (ie. number of bytes per pixel) to check if the
870     * format doesn't mismatch. */
871    info[12] = util_format_get_blocksize(view->format);
872 
873    /* limit in bytes for raw access */
874    info[13] = (0x06 << 22) | ((width << log2cpp) - 1);
875 
876    info[1] = nve4_su_format_map[view->format];
877 
878 #if 0
879    switch (util_format_get_blocksizebits(view->format)) {
880    case  16: info[1] |= 1 << 16; break;
881    case  32: info[1] |= 2 << 16; break;
882    case  64: info[1] |= 3 << 16; break;
883    case 128: info[1] |= 4 << 16; break;
884    default:
885       break;
886    }
887 #else
888    info[1] |= log2cpp << 16;
889    info[1] |=  0x4000;
890    info[1] |= (0x0f00 & nve4_su_format_aux_map[view->format]);
891 #endif
892 
893    if (res->base.target == PIPE_BUFFER) {
894       address += view->u.buf.offset;
895 
896       info[0]  = address >> 8;
897       info[2]  = width - 1;
898       info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
899       info[3]  = 0;
900       info[4]  = 0;
901       info[5]  = 0;
902       info[6]  = 0;
903       info[7]  = 0;
904       info[14] = 0;
905       info[15] = 0;
906    } else {
907       struct nv50_miptree *mt = nv50_miptree(&res->base);
908       struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
909       const unsigned z = view->u.tex.first_layer;
910 
911       if (z) {
912          if (mt->layout_3d) {
913             address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
914             /* doesn't work if z passes z-tile boundary */
915             if (depth > 1) {
916                pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
917                                   "3D images are not really supported!");
918                debug_printf("3D images are not really supported!\n");
919             }
920          } else {
921             address += mt->layer_stride * z;
922          }
923       }
924       address += lvl->offset;
925 
926       info[0]  = address >> 8;
927       info[2]  = (width << mt->ms_x) - 1;
928       /* NOTE: this is really important: */
929       info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
930       info[3]  = (0x88 << 24) | (lvl->pitch / 64);
931       info[4]  = (height << mt->ms_y) - 1;
932       info[4] |= (lvl->tile_mode & 0x0f0) << 25;
933       info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
934       info[5]  = mt->layer_stride >> 8;
935       info[6]  = depth - 1;
936       info[6] |= (lvl->tile_mode & 0xf00) << 21;
937       info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
938       info[7]  = 0;
939       info[14] = mt->ms_x;
940       info[15] = mt->ms_y;
941    }
942 }
943 
944 static inline void
nvc0_set_surface_info(struct nouveau_pushbuf * push,struct pipe_image_view * view,uint64_t address,int width,int height,int depth)945 nvc0_set_surface_info(struct nouveau_pushbuf *push,
946                       struct pipe_image_view *view, uint64_t address,
947                       int width, int height, int depth)
948 {
949    struct nv04_resource *res;
950    uint32_t *const info = push->cur;
951 
952    push->cur += 16;
953 
954    /* Make sure to always initialize the surface information area because it's
955     * used to check if the given image is bound or not. */
956    memset(info, 0, 16 * sizeof(*info));
957 
958    if (!view || !view->resource)
959       return;
960    res = nv04_resource(view->resource);
961 
962    /* Stick the image dimensions for the imageSize() builtin. */
963    info[8] = width;
964    info[9] = height;
965    info[10] = depth;
966 
967    /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
968     * offset and to check if the format doesn't mismatch. */
969    info[12] = util_format_get_blocksize(view->format);
970 
971    if (res->base.target == PIPE_BUFFER) {
972       info[0]  = address >> 8;
973       info[2]  = width;
974    } else {
975       struct nv50_miptree *mt = nv50_miptree(&res->base);
976 
977       info[0]  = address >> 8;
978       info[2]  = width;
979       info[4]  = height;
980       info[5]  = mt->layer_stride >> 8;
981       info[6]  = depth;
982       info[14] = mt->ms_x;
983       info[15] = mt->ms_y;
984    }
985 }
986 
987 void
nvc0_validate_suf(struct nvc0_context * nvc0,int s)988 nvc0_validate_suf(struct nvc0_context *nvc0, int s)
989 {
990    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
991    struct nvc0_screen *screen = nvc0->screen;
992 
993    for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {
994       struct pipe_image_view *view = &nvc0->images[s][i];
995       int width, height, depth;
996       uint64_t address = 0;
997 
998       if (s == 5)
999          BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);
1000       else
1001          BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);
1002 
1003       if (view->resource) {
1004          struct nv04_resource *res = nv04_resource(view->resource);
1005          unsigned rt = nvc0_format_table[view->format].rt;
1006 
1007          if (util_format_is_depth_or_stencil(view->format))
1008             rt = rt << 12;
1009          else
1010             rt = (rt << 4) | (0x14 << 12);
1011 
1012          /* get surface dimensions based on the target. */
1013          nvc0_get_surface_dims(view, &width, &height, &depth);
1014 
1015          address = res->address;
1016          if (res->base.target == PIPE_BUFFER) {
1017             unsigned blocksize = util_format_get_blocksize(view->format);
1018 
1019             address += view->u.buf.offset;
1020             assert(!(address & 0xff));
1021 
1022             if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1023                nvc0_mark_image_range_valid(view);
1024 
1025             PUSH_DATAh(push, address);
1026             PUSH_DATA (push, address);
1027             PUSH_DATA (push, align(width * blocksize, 0x100));
1028             PUSH_DATA (push, NVC0_3D_IMAGE_HEIGHT_LINEAR | 1);
1029             PUSH_DATA (push, rt);
1030             PUSH_DATA (push, 0);
1031          } else {
1032             struct nv50_miptree *mt = nv50_miptree(view->resource);
1033             struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
1034             const unsigned z = view->u.tex.first_layer;
1035 
1036             if (mt->layout_3d) {
1037                address += nvc0_mt_zslice_offset(mt, view->u.tex.level, z);
1038                if (depth >= 1) {
1039                   pipe_debug_message(&nvc0->base.debug, CONFORMANCE,
1040                                      "3D images are not supported!");
1041                   debug_printf("3D images are not supported!\n");
1042                }
1043             } else {
1044                address += mt->layer_stride * z;
1045             }
1046             address += lvl->offset;
1047 
1048             PUSH_DATAh(push, address);
1049             PUSH_DATA (push, address);
1050             PUSH_DATA (push, width << mt->ms_x);
1051             PUSH_DATA (push, height << mt->ms_y);
1052             PUSH_DATA (push, rt);
1053             PUSH_DATA (push, lvl->tile_mode & 0xff); /* mask out z-tiling */
1054          }
1055 
1056          if (s == 5)
1057             BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
1058          else
1059             BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
1060       } else {
1061          PUSH_DATA(push, 0);
1062          PUSH_DATA(push, 0);
1063          PUSH_DATA(push, 0);
1064          PUSH_DATA(push, 0);
1065          PUSH_DATA(push, 0x14000);
1066          PUSH_DATA(push, 0);
1067       }
1068 
1069       /* stick surface information into the driver constant buffer */
1070       if (s == 5)
1071          BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
1072       else
1073          BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1074       PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1075       PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1076       PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1077       if (s == 5)
1078          BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 16);
1079       else
1080          BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
1081       PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
1082 
1083       nvc0_set_surface_info(push, view, address, width, height, depth);
1084    }
1085 }
1086 
1087 static inline void
nvc0_update_surface_bindings(struct nvc0_context * nvc0)1088 nvc0_update_surface_bindings(struct nvc0_context *nvc0)
1089 {
1090    nvc0_validate_suf(nvc0, 4);
1091 
1092    /* Invalidate all COMPUTE images because they are aliased with FRAGMENT. */
1093    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
1094    nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
1095    nvc0->images_dirty[5] |= nvc0->images_valid[5];
1096 }
1097 
1098 static void
gm107_validate_surfaces(struct nvc0_context * nvc0,struct pipe_image_view * view,int stage,int slot)1099 gm107_validate_surfaces(struct nvc0_context *nvc0,
1100                         struct pipe_image_view *view, int stage, int slot)
1101 {
1102    struct nv04_resource *res = nv04_resource(view->resource);
1103    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1104    struct nvc0_screen *screen = nvc0->screen;
1105    struct nv50_tic_entry *tic;
1106 
1107    tic = nv50_tic_entry(nvc0->images_tic[stage][slot]);
1108 
1109    res = nv04_resource(tic->pipe.texture);
1110    nvc0_update_tic(nvc0, tic, res);
1111 
1112    if (tic->id < 0) {
1113       tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
1114 
1115       /* upload the texture view */
1116       nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
1117                             NV_VRAM_DOMAIN(&nvc0->screen->base), 32, tic->tic);
1118 
1119       BEGIN_NVC0(push, NVC0_3D(TIC_FLUSH), 1);
1120       PUSH_DATA (push, 0);
1121    } else
1122    if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
1123       BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
1124       PUSH_DATA (push, (tic->id << 4) | 1);
1125    }
1126    nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
1127 
1128    res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
1129    res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
1130 
1131    BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RD);
1132 
1133    /* upload the texture handle */
1134    BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1135    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1136    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
1137    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
1138    BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
1139    PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32));
1140    PUSH_DATA (push, tic->id);
1141 }
1142 
1143 static inline void
nve4_update_surface_bindings(struct nvc0_context * nvc0)1144 nve4_update_surface_bindings(struct nvc0_context *nvc0)
1145 {
1146    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1147    struct nvc0_screen *screen = nvc0->screen;
1148    int i, j, s;
1149 
1150    for (s = 0; s < 5; s++) {
1151       if (!nvc0->images_dirty[s])
1152          continue;
1153 
1154       for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
1155          struct pipe_image_view *view = &nvc0->images[s][i];
1156 
1157          BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1158          PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1159          PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1160          PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1161          BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
1162          PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
1163 
1164          if (view->resource) {
1165             struct nv04_resource *res = nv04_resource(view->resource);
1166 
1167             if (res->base.target == PIPE_BUFFER) {
1168                if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1169                   nvc0_mark_image_range_valid(view);
1170             }
1171 
1172             nve4_set_surface_info(push, view, nvc0);
1173             BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
1174 
1175             if (nvc0->screen->base.class_3d >= GM107_3D_CLASS)
1176                gm107_validate_surfaces(nvc0, view, s, i);
1177          } else {
1178             for (j = 0; j < 16; j++)
1179                PUSH_DATA(push, 0);
1180          }
1181       }
1182    }
1183 }
1184 
1185 void
nvc0_validate_surfaces(struct nvc0_context * nvc0)1186 nvc0_validate_surfaces(struct nvc0_context *nvc0)
1187 {
1188    if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
1189       nve4_update_surface_bindings(nvc0);
1190    } else {
1191       nvc0_update_surface_bindings(nvc0);
1192    }
1193 }
1194 
1195 
1196 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
1197 {
1198    [PIPE_FORMAT_R32G32B32A32_FLOAT] = GK104_IMAGE_FORMAT_RGBA32_FLOAT,
1199    [PIPE_FORMAT_R32G32B32A32_SINT] = GK104_IMAGE_FORMAT_RGBA32_SINT,
1200    [PIPE_FORMAT_R32G32B32A32_UINT] = GK104_IMAGE_FORMAT_RGBA32_UINT,
1201    [PIPE_FORMAT_R16G16B16A16_FLOAT] = GK104_IMAGE_FORMAT_RGBA16_FLOAT,
1202    [PIPE_FORMAT_R16G16B16A16_UNORM] = GK104_IMAGE_FORMAT_RGBA16_UNORM,
1203    [PIPE_FORMAT_R16G16B16A16_SNORM] = GK104_IMAGE_FORMAT_RGBA16_SNORM,
1204    [PIPE_FORMAT_R16G16B16A16_SINT] = GK104_IMAGE_FORMAT_RGBA16_SINT,
1205    [PIPE_FORMAT_R16G16B16A16_UINT] = GK104_IMAGE_FORMAT_RGBA16_UINT,
1206    [PIPE_FORMAT_B8G8R8A8_UNORM] = GK104_IMAGE_FORMAT_BGRA8_UNORM,
1207    [PIPE_FORMAT_R8G8B8A8_UNORM] = GK104_IMAGE_FORMAT_RGBA8_UNORM,
1208    [PIPE_FORMAT_R8G8B8A8_SNORM] = GK104_IMAGE_FORMAT_RGBA8_SNORM,
1209    [PIPE_FORMAT_R8G8B8A8_SINT] = GK104_IMAGE_FORMAT_RGBA8_SINT,
1210    [PIPE_FORMAT_R8G8B8A8_UINT] = GK104_IMAGE_FORMAT_RGBA8_UINT,
1211    [PIPE_FORMAT_R11G11B10_FLOAT] = GK104_IMAGE_FORMAT_R11G11B10_FLOAT,
1212    [PIPE_FORMAT_R10G10B10A2_UNORM] = GK104_IMAGE_FORMAT_RGB10_A2_UNORM,
1213    [PIPE_FORMAT_R10G10B10A2_UINT] = GK104_IMAGE_FORMAT_RGB10_A2_UINT,
1214    [PIPE_FORMAT_R32G32_FLOAT] = GK104_IMAGE_FORMAT_RG32_FLOAT,
1215    [PIPE_FORMAT_R32G32_SINT] = GK104_IMAGE_FORMAT_RG32_SINT,
1216    [PIPE_FORMAT_R32G32_UINT] = GK104_IMAGE_FORMAT_RG32_UINT,
1217    [PIPE_FORMAT_R16G16_FLOAT] = GK104_IMAGE_FORMAT_RG16_FLOAT,
1218    [PIPE_FORMAT_R16G16_UNORM] = GK104_IMAGE_FORMAT_RG16_UNORM,
1219    [PIPE_FORMAT_R16G16_SNORM] = GK104_IMAGE_FORMAT_RG16_SNORM,
1220    [PIPE_FORMAT_R16G16_SINT] = GK104_IMAGE_FORMAT_RG16_SINT,
1221    [PIPE_FORMAT_R16G16_UINT] = GK104_IMAGE_FORMAT_RG16_UINT,
1222    [PIPE_FORMAT_R8G8_UNORM] = GK104_IMAGE_FORMAT_RG8_UNORM,
1223    [PIPE_FORMAT_R8G8_SNORM] = GK104_IMAGE_FORMAT_RG8_SNORM,
1224    [PIPE_FORMAT_R8G8_SINT] = GK104_IMAGE_FORMAT_RG8_SINT,
1225    [PIPE_FORMAT_R8G8_UINT] = GK104_IMAGE_FORMAT_RG8_UINT,
1226    [PIPE_FORMAT_R32_FLOAT] = GK104_IMAGE_FORMAT_R32_FLOAT,
1227    [PIPE_FORMAT_R32_SINT] = GK104_IMAGE_FORMAT_R32_SINT,
1228    [PIPE_FORMAT_R32_UINT] = GK104_IMAGE_FORMAT_R32_UINT,
1229    [PIPE_FORMAT_R16_FLOAT] = GK104_IMAGE_FORMAT_R16_FLOAT,
1230    [PIPE_FORMAT_R16_UNORM] = GK104_IMAGE_FORMAT_R16_UNORM,
1231    [PIPE_FORMAT_R16_SNORM] = GK104_IMAGE_FORMAT_R16_SNORM,
1232    [PIPE_FORMAT_R16_SINT] = GK104_IMAGE_FORMAT_R16_SINT,
1233    [PIPE_FORMAT_R16_UINT] = GK104_IMAGE_FORMAT_R16_UINT,
1234    [PIPE_FORMAT_R8_UNORM] = GK104_IMAGE_FORMAT_R8_UNORM,
1235    [PIPE_FORMAT_R8_SNORM] = GK104_IMAGE_FORMAT_R8_SNORM,
1236    [PIPE_FORMAT_R8_SINT] = GK104_IMAGE_FORMAT_R8_SINT,
1237    [PIPE_FORMAT_R8_UINT] = GK104_IMAGE_FORMAT_R8_UINT,
1238 };
1239 
1240 /* Auxiliary format description values for surface instructions.
1241  * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
1242  */
1243 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
1244 {
1245    [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
1246    [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
1247    [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,
1248 
1249    [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
1250    [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
1251    [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
1252    [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
1253    [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,
1254 
1255    [PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
1256    [PIPE_FORMAT_R32G32_SINT] = 0x3433,
1257    [PIPE_FORMAT_R32G32_UINT] = 0x3433,
1258 
1259    [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
1260    [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24,
1261    [PIPE_FORMAT_B8G8R8A8_UNORM] = 0x2a24,
1262    [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
1263    [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
1264    [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
1265    [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
1266    [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,
1267 
1268    [PIPE_FORMAT_R16G16_UNORM] = 0x2524,
1269    [PIPE_FORMAT_R16G16_SNORM] = 0x2524,
1270    [PIPE_FORMAT_R16G16_SINT] = 0x2524,
1271    [PIPE_FORMAT_R16G16_UINT] = 0x2524,
1272    [PIPE_FORMAT_R16G16_FLOAT] = 0x2524,
1273 
1274    [PIPE_FORMAT_R32_SINT] = 0x2024,
1275    [PIPE_FORMAT_R32_UINT] = 0x2024,
1276    [PIPE_FORMAT_R32_FLOAT] = 0x2024,
1277 
1278    [PIPE_FORMAT_R8G8_UNORM] = 0x1615,
1279    [PIPE_FORMAT_R8G8_SNORM] = 0x1615,
1280    [PIPE_FORMAT_R8G8_SINT] = 0x1615,
1281    [PIPE_FORMAT_R8G8_UINT] = 0x1615,
1282 
1283    [PIPE_FORMAT_R16_UNORM] = 0x1115,
1284    [PIPE_FORMAT_R16_SNORM] = 0x1115,
1285    [PIPE_FORMAT_R16_SINT] = 0x1115,
1286    [PIPE_FORMAT_R16_UINT] = 0x1115,
1287    [PIPE_FORMAT_R16_FLOAT] = 0x1115,
1288 
1289    [PIPE_FORMAT_R8_UNORM] = 0x0206,
1290    [PIPE_FORMAT_R8_SNORM] = 0x0206,
1291    [PIPE_FORMAT_R8_SINT] = 0x0206,
1292    [PIPE_FORMAT_R8_UINT] = 0x0206
1293 };
1294 
1295 /* NOTE: These are hardcoded offsets for the shader library.
1296  * TODO: Automate them.
1297  */
1298 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
1299 {
1300    [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
1301    [PIPE_FORMAT_R32G32B32A32_SINT]  = 0x218,
1302    [PIPE_FORMAT_R32G32B32A32_UINT]  = 0x218,
1303    [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
1304    [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
1305    [PIPE_FORMAT_R16G16B16A16_SINT]  = 0x330,
1306    [PIPE_FORMAT_R16G16B16A16_UINT]  = 0x388,
1307    [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
1308    [PIPE_FORMAT_R32G32_FLOAT]       = 0x428,
1309    [PIPE_FORMAT_R32G32_SINT]        = 0x468,
1310    [PIPE_FORMAT_R32G32_UINT]        = 0x468,
1311    [PIPE_FORMAT_R10G10B10A2_UNORM]  = 0x4a8,
1312    [PIPE_FORMAT_R10G10B10A2_UINT]   = 0x530,
1313    [PIPE_FORMAT_R8G8B8A8_UNORM]     = 0x588,
1314    [PIPE_FORMAT_R8G8B8A8_SNORM]     = 0x5f8,
1315    [PIPE_FORMAT_R8G8B8A8_SINT]      = 0x670,
1316    [PIPE_FORMAT_R8G8B8A8_UINT]      = 0x6c8,
1317    [PIPE_FORMAT_B5G6R5_UNORM]       = 0x718,
1318    [PIPE_FORMAT_B5G5R5X1_UNORM]     = 0x7a0,
1319    [PIPE_FORMAT_R16G16_UNORM]       = 0x828,
1320    [PIPE_FORMAT_R16G16_SNORM]       = 0x890,
1321    [PIPE_FORMAT_R16G16_SINT]        = 0x8f0,
1322    [PIPE_FORMAT_R16G16_UINT]        = 0x948,
1323    [PIPE_FORMAT_R16G16_FLOAT]       = 0x998,
1324    [PIPE_FORMAT_R32_FLOAT]          = 0x9e8,
1325    [PIPE_FORMAT_R32_SINT]           = 0xa30,
1326    [PIPE_FORMAT_R32_UINT]           = 0xa30,
1327    [PIPE_FORMAT_R8G8_UNORM]         = 0xa78,
1328    [PIPE_FORMAT_R8G8_SNORM]         = 0xae0,
1329    [PIPE_FORMAT_R8G8_UINT]          = 0xb48,
1330    [PIPE_FORMAT_R8G8_SINT]          = 0xb98,
1331    [PIPE_FORMAT_R16_UNORM]          = 0xbe8,
1332    [PIPE_FORMAT_R16_SNORM]          = 0xc48,
1333    [PIPE_FORMAT_R16_SINT]           = 0xca0,
1334    [PIPE_FORMAT_R16_UINT]           = 0xce8,
1335    [PIPE_FORMAT_R16_FLOAT]          = 0xd30,
1336    [PIPE_FORMAT_R8_UNORM]           = 0xd88,
1337    [PIPE_FORMAT_R8_SNORM]           = 0xde0,
1338    [PIPE_FORMAT_R8_SINT]            = 0xe38,
1339    [PIPE_FORMAT_R8_UINT]            = 0xe88,
1340    [PIPE_FORMAT_R11G11B10_FLOAT]    = 0xed0
1341 };
1342