• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2008 Ben Skeggs
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "nvc0/nvc0_context.h"
24 #include "nvc0/nvc0_resource.h"
25 #include "nvc0/gm107_texture.xml.h"
26 #include "nvc0/nvc0_compute.xml.h"
27 #include "nv50/g80_texture.xml.h"
28 #include "nv50/g80_defs.xml.h"
29 
30 #include "util/format/u_format.h"
31 
32 #define NVE4_TIC_ENTRY_INVALID 0x000fffff
33 #define NVE4_TSC_ENTRY_INVALID 0xfff00000
34 
35 static inline uint32_t
nv50_tic_swizzle(const struct nvc0_format * fmt,unsigned swz,bool tex_int)36 nv50_tic_swizzle(const struct nvc0_format *fmt, unsigned swz, bool tex_int)
37 {
38    switch (swz) {
39    case PIPE_SWIZZLE_X  : return fmt->tic.src_x;
40    case PIPE_SWIZZLE_Y: return fmt->tic.src_y;
41    case PIPE_SWIZZLE_Z : return fmt->tic.src_z;
42    case PIPE_SWIZZLE_W: return fmt->tic.src_w;
43    case PIPE_SWIZZLE_1:
44       return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
45    case PIPE_SWIZZLE_0:
46    default:
47       return G80_TIC_SOURCE_ZERO;
48    }
49 }
50 
51 struct pipe_sampler_view *
nvc0_create_sampler_view(struct pipe_context * pipe,struct pipe_resource * res,const struct pipe_sampler_view * templ)52 nvc0_create_sampler_view(struct pipe_context *pipe,
53                          struct pipe_resource *res,
54                          const struct pipe_sampler_view *templ)
55 {
56    uint32_t flags = 0;
57 
58    if (templ->target == PIPE_TEXTURE_RECT || templ->target == PIPE_BUFFER)
59       flags |= NV50_TEXVIEW_SCALED_COORDS;
60 
61    return nvc0_create_texture_view(pipe, res, templ, flags);
62 }
63 
64 static struct pipe_sampler_view *
gm107_create_texture_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ,uint32_t flags)65 gm107_create_texture_view(struct pipe_context *pipe,
66                           struct pipe_resource *texture,
67                           const struct pipe_sampler_view *templ,
68                           uint32_t flags)
69 {
70    const struct util_format_description *desc;
71    const struct nvc0_format *fmt;
72    uint64_t address;
73    uint32_t *tic;
74    uint32_t swz[4];
75    uint32_t width, height;
76    uint32_t depth;
77    struct nv50_tic_entry *view;
78    struct nv50_miptree *mt;
79    bool tex_int;
80 
81    view = MALLOC_STRUCT(nv50_tic_entry);
82    if (!view)
83       return NULL;
84    mt = nv50_miptree(texture);
85 
86    view->pipe = *templ;
87    view->pipe.reference.count = 1;
88    view->pipe.texture = NULL;
89    view->pipe.context = pipe;
90 
91    view->id = -1;
92    view->bindless = 0;
93 
94    pipe_resource_reference(&view->pipe.texture, texture);
95 
96    tic = &view->tic[0];
97 
98    desc = util_format_description(view->pipe.format);
99    tex_int = util_format_is_pure_integer(view->pipe.format);
100 
101    fmt = &nvc0_format_table[view->pipe.format];
102    swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
103    swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
104    swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
105    swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
106 
107    tic[0]  = fmt->tic.format << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT;
108    tic[0] |= fmt->tic.type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT;
109    tic[0] |= fmt->tic.type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT;
110    tic[0] |= fmt->tic.type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT;
111    tic[0] |= fmt->tic.type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT;
112    tic[0] |= swz[0] << GM107_TIC2_0_X_SOURCE__SHIFT;
113    tic[0] |= swz[1] << GM107_TIC2_0_Y_SOURCE__SHIFT;
114    tic[0] |= swz[2] << GM107_TIC2_0_Z_SOURCE__SHIFT;
115    tic[0] |= swz[3] << GM107_TIC2_0_W_SOURCE__SHIFT;
116 
117    address = mt->base.address;
118 
119    tic[3]  = GM107_TIC2_3_LOD_ANISO_QUALITY_2;
120    tic[4]  = GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V;
121    tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR;
122 
123    if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
124       tic[4] |= GM107_TIC2_4_SRGB_CONVERSION;
125 
126    if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
127       tic[5] = GM107_TIC2_5_NORMALIZED_COORDS;
128    else
129       tic[5] = 0;
130 
131    /* check for linear storage type */
132    if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
133       if (texture->target == PIPE_BUFFER) {
134          assert(!(tic[5] & GM107_TIC2_5_NORMALIZED_COORDS));
135          width = view->pipe.u.buf.size / (desc->block.bits / 8) - 1;
136          address +=
137             view->pipe.u.buf.offset;
138          tic[2]  = GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER;
139          tic[3] |= width >> 16;
140          tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER;
141          tic[4] |= width & 0xffff;
142       } else {
143          assert(!(mt->level[0].pitch & 0x1f));
144          /* must be 2D texture without mip maps */
145          tic[2]  = GM107_TIC2_2_HEADER_VERSION_PITCH;
146          tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
147          tic[3] |= mt->level[0].pitch >> 5;
148          tic[4] |= mt->base.base.width0 - 1;
149          tic[5] |= 0 << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
150          tic[5] |= mt->base.base.height0 - 1;
151       }
152       tic[1]  = address;
153       tic[2] |= address >> 32;
154       tic[6]  = 0;
155       tic[7]  = 0;
156       return &view->pipe;
157    }
158 
159    tic[2]  = GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR;
160    tic[3] |=
161       ((mt->level[0].tile_mode & 0x0f0) >> 4 << 3) |
162       ((mt->level[0].tile_mode & 0xf00) >> 8 << 6);
163 
164    depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
165 
166    if (mt->base.base.array_size > 1) {
167       /* there doesn't seem to be a base layer field in TIC */
168       address += view->pipe.u.tex.first_layer * mt->layer_stride;
169       depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
170    }
171    tic[1]  = address;
172    tic[2] |= address >> 32;
173 
174    switch (templ->target) {
175    case PIPE_TEXTURE_1D:
176       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D;
177       break;
178    case PIPE_TEXTURE_2D:
179       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
180       break;
181    case PIPE_TEXTURE_RECT:
182       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
183       break;
184    case PIPE_TEXTURE_3D:
185       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_THREE_D;
186       break;
187    case PIPE_TEXTURE_CUBE:
188       depth /= 6;
189       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP;
190       break;
191    case PIPE_TEXTURE_1D_ARRAY:
192       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY;
193       break;
194    case PIPE_TEXTURE_2D_ARRAY:
195       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY;
196       break;
197    case PIPE_TEXTURE_CUBE_ARRAY:
198       depth /= 6;
199       tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY;
200       break;
201    default:
202       unreachable("unexpected/invalid texture target");
203    }
204 
205    tic[3] |= (flags & NV50_TEXVIEW_FILTER_MSAA8) ?
206              GM107_TIC2_3_USE_HEADER_OPT_CONTROL :
207              GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH |
208              GM107_TIC2_3_LOD_ISO_QUALITY_HIGH;
209 
210    if (flags & (NV50_TEXVIEW_ACCESS_RESOLVE | NV50_TEXVIEW_IMAGE_GM107)) {
211       width = mt->base.base.width0 << mt->ms_x;
212       height = mt->base.base.height0 << mt->ms_y;
213    } else {
214       width = mt->base.base.width0;
215       height = mt->base.base.height0;
216    }
217 
218    tic[4] |= width - 1;
219 
220    tic[5] |= (height - 1) & 0xffff;
221    tic[5] |= (depth - 1) << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
222    tic[3] |= mt->base.base.last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT;
223 
224    /* sampling points: (?) */
225    if ((flags & NV50_TEXVIEW_ACCESS_RESOLVE) && mt->ms_x > 1) {
226       tic[6]  = GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO;
227       tic[6] |= GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1;
228    } else {
229       tic[6]  = GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO;
230       tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE;
231    }
232 
233    tic[7]  = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
234    tic[7] |= mt->ms_mode << GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT;
235 
236    return &view->pipe;
237 }
238 
239 struct pipe_sampler_view *
gm107_create_texture_view_from_image(struct pipe_context * pipe,const struct pipe_image_view * view)240 gm107_create_texture_view_from_image(struct pipe_context *pipe,
241                                      const struct pipe_image_view *view)
242 {
243    struct nv04_resource *res = nv04_resource(view->resource);
244    struct pipe_sampler_view templ = {};
245    enum pipe_texture_target target;
246    uint32_t flags = 0;
247 
248    if (!res)
249       return NULL;
250    target = res->base.target;
251 
252    if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY)
253       target = PIPE_TEXTURE_2D_ARRAY;
254 
255    templ.target = target;
256    templ.format = view->format;
257    templ.swizzle_r = PIPE_SWIZZLE_X;
258    templ.swizzle_g = PIPE_SWIZZLE_Y;
259    templ.swizzle_b = PIPE_SWIZZLE_Z;
260    templ.swizzle_a = PIPE_SWIZZLE_W;
261 
262    if (target == PIPE_BUFFER) {
263       templ.u.buf.offset = view->u.buf.offset;
264       templ.u.buf.size = view->u.buf.size;
265    } else {
266       templ.u.tex.first_layer = view->u.tex.first_layer;
267       templ.u.tex.last_layer = view->u.tex.last_layer;
268       templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level;
269    }
270 
271    flags = NV50_TEXVIEW_SCALED_COORDS | NV50_TEXVIEW_IMAGE_GM107;
272 
273    return nvc0_create_texture_view(pipe, &res->base, &templ, flags);
274 }
275 
276 static struct pipe_sampler_view *
gf100_create_texture_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ,uint32_t flags)277 gf100_create_texture_view(struct pipe_context *pipe,
278                           struct pipe_resource *texture,
279                           const struct pipe_sampler_view *templ,
280                           uint32_t flags)
281 {
282    const struct util_format_description *desc;
283    const struct nvc0_format *fmt;
284    uint64_t address;
285    uint32_t *tic;
286    uint32_t swz[4];
287    uint32_t width, height;
288    uint32_t depth;
289    uint32_t tex_fmt;
290    struct nv50_tic_entry *view;
291    struct nv50_miptree *mt;
292    bool tex_int;
293 
294    view = MALLOC_STRUCT(nv50_tic_entry);
295    if (!view)
296       return NULL;
297    mt = nv50_miptree(texture);
298 
299    view->pipe = *templ;
300    view->pipe.reference.count = 1;
301    view->pipe.texture = NULL;
302    view->pipe.context = pipe;
303 
304    view->id = -1;
305    view->bindless = 0;
306 
307    pipe_resource_reference(&view->pipe.texture, texture);
308 
309    tic = &view->tic[0];
310 
311    desc = util_format_description(view->pipe.format);
312 
313    fmt = &nvc0_format_table[view->pipe.format];
314 
315    tex_int = util_format_is_pure_integer(view->pipe.format);
316    tex_fmt = fmt->tic.format & 0x3f;
317 
318    swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
319    swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
320    swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
321    swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
322    tic[0] = (tex_fmt << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
323             (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
324             (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
325             (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
326             (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |
327             (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
328             (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
329             (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
330             (swz[3] << G80_TIC_0_W_SOURCE__SHIFT) |
331             ((fmt->tic.format & 0x40) << (GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT - 6));
332 
333    address = mt->base.address;
334 
335    tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;
336 
337    if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
338       tic[2] |= G80_TIC_2_SRGB_CONVERSION;
339 
340    if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
341       tic[2] |= G80_TIC_2_NORMALIZED_COORDS;
342 
343    /* check for linear storage type */
344    if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
345       if (texture->target == PIPE_BUFFER) {
346          assert(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS));
347          address +=
348             view->pipe.u.buf.offset;
349          tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;
350          tic[3] = 0;
351          tic[4] = /* width */
352             view->pipe.u.buf.size / (desc->block.bits / 8);
353          tic[5] = 0;
354       } else {
355          /* must be 2D texture without mip maps */
356          tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
357          tic[3] = mt->level[0].pitch;
358          tic[4] = mt->base.base.width0;
359          tic[5] = (1 << 16) | mt->base.base.height0;
360       }
361       tic[6] =
362       tic[7] = 0;
363       tic[1] = address;
364       tic[2] |= address >> 32;
365       return &view->pipe;
366    }
367 
368    tic[2] |=
369       ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |
370       ((mt->level[0].tile_mode & 0xf00) << (25 - 8));
371 
372    depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
373 
374    if (mt->base.base.array_size > 1) {
375       /* there doesn't seem to be a base layer field in TIC */
376       address += view->pipe.u.tex.first_layer * mt->layer_stride;
377       depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
378    }
379    tic[1] = address;
380    tic[2] |= address >> 32;
381 
382    switch (templ->target) {
383    case PIPE_TEXTURE_1D:
384       tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;
385       break;
386    case PIPE_TEXTURE_2D:
387       tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
388       break;
389    case PIPE_TEXTURE_RECT:
390       tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
391       break;
392    case PIPE_TEXTURE_3D:
393       tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;
394       break;
395    case PIPE_TEXTURE_CUBE:
396       depth /= 6;
397       tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;
398       break;
399    case PIPE_TEXTURE_1D_ARRAY:
400       tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;
401       break;
402    case PIPE_TEXTURE_2D_ARRAY:
403       tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;
404       break;
405    case PIPE_TEXTURE_CUBE_ARRAY:
406       depth /= 6;
407       tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;
408       break;
409    default:
410       unreachable("unexpected/invalid texture target");
411    }
412 
413    tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
414 
415    if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
416       width = mt->base.base.width0 << mt->ms_x;
417       height = mt->base.base.height0 << mt->ms_y;
418    } else {
419       width = mt->base.base.width0;
420       height = mt->base.base.height0;
421    }
422 
423    tic[4] = (1 << 31) | width;
424 
425    tic[5] = height & 0xffff;
426    tic[5] |= depth << 16;
427    tic[5] |= mt->base.base.last_level << 28;
428 
429    /* sampling points: (?) */
430    if (flags & NV50_TEXVIEW_ACCESS_RESOLVE)
431       tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000;
432    else
433       tic[6] = 0x03000000;
434 
435    tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
436    tic[7] |= mt->ms_mode << 12;
437 
438    return &view->pipe;
439 }
440 
441 struct pipe_sampler_view *
nvc0_create_texture_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ,uint32_t flags)442 nvc0_create_texture_view(struct pipe_context *pipe,
443                          struct pipe_resource *texture,
444                          const struct pipe_sampler_view *templ,
445                          uint32_t flags)
446 {
447    if (nvc0_context(pipe)->screen->tic.maxwell)
448       return gm107_create_texture_view(pipe, texture, templ, flags);
449    return gf100_create_texture_view(pipe, texture, templ, flags);
450 }
451 
452 bool
nvc0_update_tic(struct nvc0_context * nvc0,struct nv50_tic_entry * tic,struct nv04_resource * res)453 nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
454                 struct nv04_resource *res)
455 {
456    uint64_t address = res->address;
457    if (res->base.target != PIPE_BUFFER)
458       return false;
459    address += tic->pipe.u.buf.offset;
460    if (tic->tic[1] == (uint32_t)address &&
461        (tic->tic[2] & 0xff) == address >> 32)
462       return false;
463 
464    tic->tic[1] = address;
465    tic->tic[2] &= 0xffffff00;
466    tic->tic[2] |= address >> 32;
467 
468    if (tic->id >= 0) {
469       nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
470                            NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
471                            tic->tic);
472       return true;
473    }
474 
475    return false;
476 }
477 
478 bool
nvc0_validate_tic(struct nvc0_context * nvc0,int s)479 nvc0_validate_tic(struct nvc0_context *nvc0, int s)
480 {
481    uint32_t commands[32];
482    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
483    unsigned i;
484    unsigned n = 0;
485    bool need_flush = false;
486 
487    for (i = 0; i < nvc0->num_textures[s]; ++i) {
488       struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
489       struct nv04_resource *res;
490       const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
491 
492       if (!tic) {
493          if (dirty)
494             commands[n++] = (i << 1) | 0;
495          continue;
496       }
497       res = nv04_resource(tic->pipe.texture);
498       need_flush |= nvc0_update_tic(nvc0, tic, res);
499 
500       if (tic->id < 0) {
501          tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
502 
503          nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
504                               NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
505                               tic->tic);
506          need_flush = true;
507       } else
508       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
509          if (unlikely(s == 5))
510             BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1);
511          else
512             BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
513          PUSH_DATA (push, (tic->id << 4) | 1);
514          NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
515       }
516       nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
517 
518       res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
519       res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;
520 
521       if (!dirty)
522          continue;
523       commands[n++] = (tic->id << 9) | (i << 1) | 1;
524 
525       if (unlikely(s == 5))
526          BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
527       else
528          BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
529    }
530    for (; i < nvc0->state.num_textures[s]; ++i)
531       commands[n++] = (i << 1) | 0;
532 
533    nvc0->state.num_textures[s] = nvc0->num_textures[s];
534 
535    if (n) {
536       if (unlikely(s == 5))
537          BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n);
538       else
539          BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
540       PUSH_DATAp(push, commands, n);
541    }
542    nvc0->textures_dirty[s] = 0;
543 
544    return need_flush;
545 }
546 
547 static bool
nve4_validate_tic(struct nvc0_context * nvc0,unsigned s)548 nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
549 {
550    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
551    unsigned i;
552    bool need_flush = false;
553 
554    for (i = 0; i < nvc0->num_textures[s]; ++i) {
555       struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
556       struct nv04_resource *res;
557       const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
558 
559       if (!tic) {
560          nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
561          continue;
562       }
563       res = nv04_resource(tic->pipe.texture);
564       need_flush |= nvc0_update_tic(nvc0, tic, res);
565 
566       if (tic->id < 0) {
567          tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
568 
569          nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
570                               NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
571                               tic->tic);
572          need_flush = true;
573       } else
574       if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
575          BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
576          PUSH_DATA (push, (tic->id << 4) | 1);
577       }
578       nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
579 
580       res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
581       res->status |=  NOUVEAU_BUFFER_STATUS_GPU_READING;
582 
583       nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
584       nvc0->tex_handles[s][i] |= tic->id;
585       if (dirty)
586          BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
587    }
588    for (; i < nvc0->state.num_textures[s]; ++i) {
589       nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
590       nvc0->textures_dirty[s] |= 1 << i;
591    }
592 
593    nvc0->state.num_textures[s] = nvc0->num_textures[s];
594 
595    return need_flush;
596 }
597 
nvc0_validate_textures(struct nvc0_context * nvc0)598 void nvc0_validate_textures(struct nvc0_context *nvc0)
599 {
600    bool need_flush = false;
601    int i;
602 
603    for (i = 0; i < 5; i++) {
604       if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
605          need_flush |= nve4_validate_tic(nvc0, i);
606       else
607          need_flush |= nvc0_validate_tic(nvc0, i);
608    }
609 
610    if (need_flush) {
611       BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
612       PUSH_DATA (nvc0->base.pushbuf, 0);
613    }
614 
615    /* Invalidate all CP textures because they are aliased. */
616    for (int i = 0; i < nvc0->num_textures[5]; i++)
617       nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_TEX(i));
618    nvc0->textures_dirty[5] = ~0;
619    nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
620 }
621 
622 bool
nvc0_validate_tsc(struct nvc0_context * nvc0,int s)623 nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
624 {
625    uint32_t commands[16];
626    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
627    unsigned i;
628    unsigned n = 0;
629    bool need_flush = false;
630 
631    for (i = 0; i < nvc0->num_samplers[s]; ++i) {
632       struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
633 
634       if (!(nvc0->samplers_dirty[s] & (1 << i)))
635          continue;
636       if (!tsc) {
637          commands[n++] = (i << 4) | 0;
638          continue;
639       }
640       nvc0->seamless_cube_map = tsc->seamless_cube_map;
641       if (tsc->id < 0) {
642          tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
643 
644          nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
645                                65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),
646                                32, tsc->tsc);
647          need_flush = true;
648       }
649       nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
650 
651       commands[n++] = (tsc->id << 12) | (i << 4) | 1;
652    }
653    for (; i < nvc0->state.num_samplers[s]; ++i)
654       commands[n++] = (i << 4) | 0;
655 
656    nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
657 
658    // TXF, in unlinked tsc mode, will always use sampler 0. So we have to
659    // ensure that it remains bound. Its contents don't matter, all samplers we
660    // ever create have the SRGB_CONVERSION bit set, so as long as the first
661    // entry is initialized, we're good to go. This is the only bit that has
662    // any effect on what TXF does.
663    if ((nvc0->samplers_dirty[s] & 1) && !nvc0->samplers[s][0]) {
664       if (n == 0)
665          n = 1;
666       // We're guaranteed that the first command refers to the first slot, so
667       // we're not overwriting a valid entry.
668       commands[0] = (0 << 12) | (0 << 4) | 1;
669    }
670 
671    if (n) {
672       if (unlikely(s == 5))
673          BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n);
674       else
675          BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
676       PUSH_DATAp(push, commands, n);
677    }
678    nvc0->samplers_dirty[s] = 0;
679 
680    return need_flush;
681 }
682 
683 bool
nve4_validate_tsc(struct nvc0_context * nvc0,int s)684 nve4_validate_tsc(struct nvc0_context *nvc0, int s)
685 {
686    unsigned i;
687    bool need_flush = false;
688 
689    for (i = 0; i < nvc0->num_samplers[s]; ++i) {
690       struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
691 
692       if (!tsc) {
693          nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
694          continue;
695       }
696       if (tsc->id < 0) {
697          tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
698 
699          nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,
700                                65536 + tsc->id * 32,
701                                NV_VRAM_DOMAIN(&nvc0->screen->base),
702                                32, tsc->tsc);
703          need_flush = true;
704       }
705       nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
706 
707       nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
708       nvc0->tex_handles[s][i] |= tsc->id << 20;
709    }
710    for (; i < nvc0->state.num_samplers[s]; ++i) {
711       nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
712       nvc0->samplers_dirty[s] |= 1 << i;
713    }
714 
715    nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
716 
717    return need_flush;
718 }
719 
nvc0_validate_samplers(struct nvc0_context * nvc0)720 void nvc0_validate_samplers(struct nvc0_context *nvc0)
721 {
722    bool need_flush = false;
723    int i;
724 
725    for (i = 0; i < 5; i++) {
726       if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
727          need_flush |= nve4_validate_tsc(nvc0, i);
728       else
729          need_flush |= nvc0_validate_tsc(nvc0, i);
730    }
731 
732    if (need_flush) {
733       BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
734       PUSH_DATA (nvc0->base.pushbuf, 0);
735    }
736 
737    /* Invalidate all CP samplers because they are aliased. */
738    nvc0->samplers_dirty[5] = ~0;
739    nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
740 }
741 
742 void
nvc0_upload_tsc0(struct nvc0_context * nvc0)743 nvc0_upload_tsc0(struct nvc0_context *nvc0)
744 {
745    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
746    u32 data[8] = { G80_TSC_0_SRGB_CONVERSION };
747    nvc0->base.push_data(&nvc0->base, nvc0->screen->txc,
748                         65536 /*+ tsc->id * 32*/,
749                         NV_VRAM_DOMAIN(&nvc0->screen->base), 32, data);
750    BEGIN_NVC0(push, NVC0_3D(TSC_FLUSH), 1);
751    PUSH_DATA (push, 0);
752 }
753 
754 /* Upload the "diagonal" entries for the possible texture sources ($t == $s).
755  * At some point we might want to get a list of the combinations used by a
756  * shader and fill in those entries instead of having it extract the handles.
757  */
758 void
nve4_set_tex_handles(struct nvc0_context * nvc0)759 nve4_set_tex_handles(struct nvc0_context *nvc0)
760 {
761    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
762    struct nvc0_screen *screen = nvc0->screen;
763    unsigned s;
764 
765    if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
766       return;
767 
768    for (s = 0; s < 5; ++s) {
769       uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
770       if (!dirty)
771          continue;
772       BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
773       PUSH_DATA (push, NVC0_CB_AUX_SIZE);
774       PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
775       PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
776       do {
777          int i = ffs(dirty) - 1;
778          dirty &= ~(1 << i);
779 
780          BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
781          PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(i));
782          PUSH_DATA (push, nvc0->tex_handles[s][i]);
783       } while (dirty);
784 
785       nvc0->textures_dirty[s] = 0;
786       nvc0->samplers_dirty[s] = 0;
787    }
788 }
789 
790 static uint64_t
nve4_create_texture_handle(struct pipe_context * pipe,struct pipe_sampler_view * view,const struct pipe_sampler_state * sampler)791 nve4_create_texture_handle(struct pipe_context *pipe,
792                            struct pipe_sampler_view *view,
793                            const struct pipe_sampler_state *sampler)
794 {
795    /* We have to create persistent handles that won't change for these objects
796     * That means that we have to upload them into place and lock them so that
797     * they can't be kicked out later.
798     */
799    struct nvc0_context *nvc0 = nvc0_context(pipe);
800    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
801    struct nv50_tic_entry *tic = nv50_tic_entry(view);
802    struct nv50_tsc_entry *tsc = pipe->create_sampler_state(pipe, sampler);
803    struct pipe_sampler_view *v = NULL;
804 
805    tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
806    if (tsc->id < 0)
807       goto fail;
808 
809    if (tic->id < 0) {
810       tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
811       if (tic->id < 0)
812          goto fail;
813 
814       nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
815                             NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
816                             tic->tic);
817 
818       IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
819    }
820 
821    nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,
822                          65536 + tsc->id * 32,
823                          NV_VRAM_DOMAIN(&nvc0->screen->base),
824                          32, tsc->tsc);
825 
826    IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0);
827 
828    // Add an extra reference to this sampler view effectively held by this
829    // texture handle. This is to deal with the sampler view being dereferenced
830    // before the handle is. However we need the view to still be live until the
831    // handle to it is deleted.
832    pipe_sampler_view_reference(&v, view);
833    p_atomic_inc(&tic->bindless);
834 
835    nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
836    nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
837 
838    return 0x100000000ULL | (tsc->id << 20) | tic->id;
839 
840 fail:
841    pipe->delete_sampler_state(pipe, tsc);
842    return 0;
843 }
844 
845 static bool
view_bound(struct nvc0_context * nvc0,struct pipe_sampler_view * view)846 view_bound(struct nvc0_context *nvc0, struct pipe_sampler_view *view) {
847    for (int s = 0; s < 6; s++) {
848       for (int i = 0; i < nvc0->num_textures[s]; i++)
849          if (nvc0->textures[s][i] == view)
850             return true;
851    }
852    return false;
853 }
854 
855 static void
nve4_delete_texture_handle(struct pipe_context * pipe,uint64_t handle)856 nve4_delete_texture_handle(struct pipe_context *pipe, uint64_t handle)
857 {
858    struct nvc0_context *nvc0 = nvc0_context(pipe);
859    uint32_t tic = handle & NVE4_TIC_ENTRY_INVALID;
860    uint32_t tsc = (handle & NVE4_TSC_ENTRY_INVALID) >> 20;
861    struct nv50_tic_entry *entry = nvc0->screen->tic.entries[tic];
862 
863    if (entry) {
864       struct pipe_sampler_view *view = &entry->pipe;
865       assert(entry->bindless);
866       p_atomic_dec(&entry->bindless);
867       if (!view_bound(nvc0, view))
868          nvc0_screen_tic_unlock(nvc0->screen, entry);
869       pipe_sampler_view_reference(&view, NULL);
870    }
871 
872    pipe->delete_sampler_state(pipe, nvc0->screen->tsc.entries[tsc]);
873 }
874 
875 static void
nve4_make_texture_handle_resident(struct pipe_context * pipe,uint64_t handle,bool resident)876 nve4_make_texture_handle_resident(struct pipe_context *pipe,
877                                   uint64_t handle, bool resident)
878 {
879    struct nvc0_context *nvc0 = nvc0_context(pipe);
880    if (resident) {
881       struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
882       struct nv50_tic_entry *tic =
883          nvc0->screen->tic.entries[handle & NVE4_TIC_ENTRY_INVALID];
884       assert(tic);
885       assert(tic->bindless);
886 
887       res->handle = handle;
888       res->buf = nv04_resource(tic->pipe.texture);
889       res->flags = NOUVEAU_BO_RD;
890       list_add(&res->list, &nvc0->tex_head);
891    } else {
892       list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->tex_head, list) {
893          if (pos->handle == handle) {
894             list_del(&pos->list);
895             free(pos);
896             break;
897          }
898       }
899    }
900 }
901 
902 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
903 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
904 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
905 
906 static void
nvc0_get_surface_dims(const struct pipe_image_view * view,int * width,int * height,int * depth)907 nvc0_get_surface_dims(const struct pipe_image_view *view,
908                       int *width, int *height, int *depth)
909 {
910    struct nv04_resource *res = nv04_resource(view->resource);
911    int level;
912 
913    *width = *height = *depth = 1;
914    if (res->base.target == PIPE_BUFFER) {
915       *width = view->u.buf.size / util_format_get_blocksize(view->format);
916       return;
917    }
918 
919    level = view->u.tex.level;
920    *width = u_minify(view->resource->width0, level);
921    *height = u_minify(view->resource->height0, level);
922    *depth = u_minify(view->resource->depth0, level);
923 
924    switch (res->base.target) {
925    case PIPE_TEXTURE_1D_ARRAY:
926    case PIPE_TEXTURE_2D_ARRAY:
927    case PIPE_TEXTURE_CUBE:
928    case PIPE_TEXTURE_CUBE_ARRAY:
929       *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
930       break;
931    case PIPE_TEXTURE_1D:
932    case PIPE_TEXTURE_2D:
933    case PIPE_TEXTURE_RECT:
934    case PIPE_TEXTURE_3D:
935       break;
936    default:
937       assert(!"unexpected texture target");
938       break;
939    }
940 }
941 
942 void
nvc0_mark_image_range_valid(const struct pipe_image_view * view)943 nvc0_mark_image_range_valid(const struct pipe_image_view *view)
944 {
945    struct nv04_resource *res = (struct nv04_resource *)view->resource;
946 
947    assert(view->resource->target == PIPE_BUFFER);
948 
949    util_range_add(&res->base, &res->valid_buffer_range,
950                   view->u.buf.offset,
951                   view->u.buf.offset + view->u.buf.size);
952 }
953 
954 void
nve4_set_surface_info(struct nouveau_pushbuf * push,const struct pipe_image_view * view,struct nvc0_context * nvc0)955 nve4_set_surface_info(struct nouveau_pushbuf *push,
956                       const struct pipe_image_view *view,
957                       struct nvc0_context *nvc0)
958 {
959    struct nvc0_screen *screen = nvc0->screen;
960    struct nv04_resource *res;
961    uint64_t address;
962    uint32_t *const info = push->cur;
963    int width, height, depth;
964    uint8_t log2cpp;
965 
966    if (view && !nve4_su_format_map[view->format])
967       NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
968 
969    push->cur += 16;
970 
971    if (!view || !nve4_su_format_map[view->format]) {
972       memset(info, 0, 16 * sizeof(*info));
973 
974       info[0] = 0xbadf0000;
975       info[1] = 0x80004000;
976       info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
977          screen->lib_code->start;
978       return;
979    }
980    res = nv04_resource(view->resource);
981 
982    address = res->address;
983 
984    /* get surface dimensions based on the target. */
985    nvc0_get_surface_dims(view, &width, &height, &depth);
986 
987    info[8] = width;
988    info[9] = height;
989    info[10] = depth;
990    switch (res->base.target) {
991    case PIPE_TEXTURE_1D_ARRAY:
992       info[11] = 1;
993       break;
994    case PIPE_TEXTURE_2D:
995    case PIPE_TEXTURE_RECT:
996       info[11] = 2;
997       break;
998    case PIPE_TEXTURE_3D:
999       info[11] = 3;
1000       break;
1001    case PIPE_TEXTURE_2D_ARRAY:
1002    case PIPE_TEXTURE_CUBE:
1003    case PIPE_TEXTURE_CUBE_ARRAY:
1004       info[11] = 4;
1005       break;
1006    default:
1007       info[11] = 0;
1008       break;
1009    }
1010    log2cpp = (0xf000 & nve4_su_format_aux_map[view->format]) >> 12;
1011 
1012    /* Stick the blockwidth (ie. number of bytes per pixel) to check if the
1013     * format doesn't mismatch. */
1014    info[12] = util_format_get_blocksize(view->format);
1015 
1016    /* limit in bytes for raw access */
1017    info[13] = (0x06 << 22) | ((width << log2cpp) - 1);
1018 
1019    info[1] = nve4_su_format_map[view->format];
1020 
1021 #if 0
1022    switch (util_format_get_blocksizebits(view->format)) {
1023    case  16: info[1] |= 1 << 16; break;
1024    case  32: info[1] |= 2 << 16; break;
1025    case  64: info[1] |= 3 << 16; break;
1026    case 128: info[1] |= 4 << 16; break;
1027    default:
1028       break;
1029    }
1030 #else
1031    info[1] |= log2cpp << 16;
1032    info[1] |=  0x4000;
1033    info[1] |= (0x0f00 & nve4_su_format_aux_map[view->format]);
1034 #endif
1035 
1036    if (res->base.target == PIPE_BUFFER) {
1037       address += view->u.buf.offset;
1038 
1039       info[0]  = address >> 8;
1040       info[2]  = width - 1;
1041       info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
1042       info[3]  = 0;
1043       info[4]  = 0;
1044       info[5]  = 0;
1045       info[6]  = 0;
1046       info[7]  = 0;
1047       info[14] = 0;
1048       info[15] = 0;
1049    } else {
1050       struct nv50_miptree *mt = nv50_miptree(&res->base);
1051       struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
1052       unsigned z = view->u.tex.first_layer;
1053 
1054       if (!mt->layout_3d) {
1055          address += mt->layer_stride * z;
1056          z = 0;
1057       }
1058 
1059       address += lvl->offset;
1060 
1061       info[0]  = address >> 8;
1062       info[2]  = (width << mt->ms_x) - 1;
1063       /* NOTE: this is really important: */
1064       info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
1065       info[3]  = (0x88 << 24) | (lvl->pitch / 64);
1066       info[4]  = (height << mt->ms_y) - 1;
1067       info[4] |= (lvl->tile_mode & 0x0f0) << 25;
1068       info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
1069       info[5]  = mt->layer_stride >> 8;
1070       info[6]  = depth - 1;
1071       info[6] |= (lvl->tile_mode & 0xf00) << 21;
1072       info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
1073       info[7]  = mt->layout_3d ? 1 : 0;
1074       info[7] |= z << 16;
1075       info[14] = mt->ms_x;
1076       info[15] = mt->ms_y;
1077    }
1078 }
1079 
1080 static inline void
nvc0_set_surface_info(struct nouveau_pushbuf * push,const struct pipe_image_view * view,uint64_t address,int width,int height,int depth)1081 nvc0_set_surface_info(struct nouveau_pushbuf *push,
1082                       const struct pipe_image_view *view, uint64_t address,
1083                       int width, int height, int depth)
1084 {
1085    struct nv04_resource *res;
1086    uint32_t *const info = push->cur;
1087 
1088    push->cur += 16;
1089 
1090    /* Make sure to always initialize the surface information area because it's
1091     * used to check if the given image is bound or not. */
1092    memset(info, 0, 16 * sizeof(*info));
1093 
1094    if (!view || !view->resource)
1095       return;
1096    res = nv04_resource(view->resource);
1097 
1098    /* Stick the image dimensions for the imageSize() builtin. */
1099    info[8] = width;
1100    info[9] = height;
1101    info[10] = depth;
1102 
1103    /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
1104     * offset and to check if the format doesn't mismatch. */
1105    info[12] = ffs(util_format_get_blocksize(view->format)) - 1;
1106 
1107    if (res->base.target == PIPE_BUFFER) {
1108       info[0]  = address >> 8;
1109       info[2]  = width;
1110    } else {
1111       struct nv50_miptree *mt = nv50_miptree(&res->base);
1112       struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
1113       unsigned z = mt->layout_3d ? view->u.tex.first_layer : 0;
1114       unsigned nby = align(util_format_get_nblocksy(view->format, height),
1115                            NVC0_TILE_SIZE_Y(lvl->tile_mode));
1116 
1117       /* NOTE: this does not precisely match nve4; the values are made to be
1118        * easier for the shader to consume.
1119        */
1120       info[0]  = address >> 8;
1121       info[2]  = (NVC0_TILE_SHIFT_X(lvl->tile_mode) - info[12]) << 24;
1122       info[4]  = NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 24 | nby;
1123       info[5]  = mt->layer_stride >> 8;
1124       info[6]  = NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 24;
1125       info[7]  = z;
1126       info[14] = mt->ms_x;
1127       info[15] = mt->ms_y;
1128    }
1129 }
1130 
1131 void
nvc0_validate_suf(struct nvc0_context * nvc0,int s)1132 nvc0_validate_suf(struct nvc0_context *nvc0, int s)
1133 {
1134    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1135    struct nvc0_screen *screen = nvc0->screen;
1136 
1137    for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {
1138       struct pipe_image_view *view = &nvc0->images[s][i];
1139       int width, height, depth;
1140       uint64_t address = 0;
1141 
1142       if (s == 5)
1143          BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);
1144       else
1145          BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);
1146 
1147       if (view->resource) {
1148          struct nv04_resource *res = nv04_resource(view->resource);
1149          unsigned rt = nvc0_format_table[view->format].rt;
1150 
1151          if (util_format_is_depth_or_stencil(view->format))
1152             rt = rt << 12;
1153          else
1154             rt = (rt << 4) | (0x14 << 12);
1155 
1156          /* get surface dimensions based on the target. */
1157          nvc0_get_surface_dims(view, &width, &height, &depth);
1158 
1159          address = res->address;
1160          if (res->base.target == PIPE_BUFFER) {
1161             unsigned blocksize = util_format_get_blocksize(view->format);
1162 
1163             address += view->u.buf.offset;
1164             assert(!(address & 0xff));
1165 
1166             if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1167                nvc0_mark_image_range_valid(view);
1168 
1169             PUSH_DATAh(push, address);
1170             PUSH_DATA (push, address);
1171             PUSH_DATA (push, align(width * blocksize, 0x100));
1172             PUSH_DATA (push, NVC0_3D_IMAGE_HEIGHT_LINEAR | 1);
1173             PUSH_DATA (push, rt);
1174             PUSH_DATA (push, 0);
1175          } else {
1176             struct nv50_miptree *mt = nv50_miptree(view->resource);
1177             struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
1178             unsigned adjusted_width = width, adjusted_height = height;
1179 
1180             if (mt->layout_3d) {
1181                // We have to adjust the size of the 3d surface to be
1182                // accessible within 2d limits. The size of each z tile goes
1183                // into the x direction, while the number of z tiles goes into
1184                // the y direction.
1185                const unsigned nbx = util_format_get_nblocksx(view->format, width);
1186                const unsigned nby = util_format_get_nblocksy(view->format, height);
1187                const unsigned tsx = NVC0_TILE_SIZE_X(lvl->tile_mode);
1188                const unsigned tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode);
1189                const unsigned tsz = NVC0_TILE_SIZE_Z(lvl->tile_mode);
1190 
1191                adjusted_width = align(nbx, tsx / util_format_get_blocksize(view->format)) * tsz;
1192                adjusted_height = align(nby, tsy) * align(depth, tsz) >> NVC0_TILE_SHIFT_Z(lvl->tile_mode);
1193             } else {
1194                const unsigned z = view->u.tex.first_layer;
1195                address += mt->layer_stride * z;
1196             }
1197             address += lvl->offset;
1198 
1199             PUSH_DATAh(push, address);
1200             PUSH_DATA (push, address);
1201             PUSH_DATA (push, adjusted_width << mt->ms_x);
1202             PUSH_DATA (push, adjusted_height << mt->ms_y);
1203             PUSH_DATA (push, rt);
1204             PUSH_DATA (push, lvl->tile_mode & 0xff); /* mask out z-tiling */
1205          }
1206 
1207          if (s == 5)
1208             BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
1209          else
1210             BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
1211       } else {
1212          PUSH_DATA(push, 0);
1213          PUSH_DATA(push, 0);
1214          PUSH_DATA(push, 0);
1215          PUSH_DATA(push, 0);
1216          PUSH_DATA(push, 0x14000);
1217          PUSH_DATA(push, 0);
1218       }
1219 
1220       /* stick surface information into the driver constant buffer */
1221       if (s == 5)
1222          BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
1223       else
1224          BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1225       PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1226       PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1227       PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1228       if (s == 5)
1229          BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 16);
1230       else
1231          BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
1232       PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
1233 
1234       nvc0_set_surface_info(push, view, address, width, height, depth);
1235    }
1236 }
1237 
1238 static inline void
nvc0_update_surface_bindings(struct nvc0_context * nvc0)1239 nvc0_update_surface_bindings(struct nvc0_context *nvc0)
1240 {
1241    nvc0_validate_suf(nvc0, 4);
1242 
1243    /* Invalidate all COMPUTE images because they are aliased with FRAGMENT. */
1244    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
1245    nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
1246    nvc0->images_dirty[5] |= nvc0->images_valid[5];
1247 }
1248 
1249 static void
gm107_validate_surfaces(struct nvc0_context * nvc0,struct pipe_image_view * view,int stage,int slot)1250 gm107_validate_surfaces(struct nvc0_context *nvc0,
1251                         struct pipe_image_view *view, int stage, int slot)
1252 {
1253    struct nv04_resource *res = nv04_resource(view->resource);
1254    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1255    struct nvc0_screen *screen = nvc0->screen;
1256    struct nv50_tic_entry *tic;
1257 
1258    tic = nv50_tic_entry(nvc0->images_tic[stage][slot]);
1259 
1260    res = nv04_resource(tic->pipe.texture);
1261    nvc0_update_tic(nvc0, tic, res);
1262 
1263    if (tic->id < 0) {
1264       tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
1265 
1266       /* upload the texture view */
1267       nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
1268                             NV_VRAM_DOMAIN(&nvc0->screen->base), 32, tic->tic);
1269 
1270       BEGIN_NVC0(push, NVC0_3D(TIC_FLUSH), 1);
1271       PUSH_DATA (push, 0);
1272    } else
1273    if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
1274       BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
1275       PUSH_DATA (push, (tic->id << 4) | 1);
1276    }
1277    nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
1278 
1279    res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
1280    res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
1281 
1282    BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RD);
1283 
1284    /* upload the texture handle */
1285    BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1286    PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1287    PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
1288    PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
1289    BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
1290    PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32));
1291    PUSH_DATA (push, tic->id);
1292 }
1293 
1294 static inline void
nve4_update_surface_bindings(struct nvc0_context * nvc0)1295 nve4_update_surface_bindings(struct nvc0_context *nvc0)
1296 {
1297    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1298    struct nvc0_screen *screen = nvc0->screen;
1299    int i, j, s;
1300 
1301    for (s = 0; s < 5; s++) {
1302       if (!nvc0->images_dirty[s])
1303          continue;
1304 
1305       for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
1306          struct pipe_image_view *view = &nvc0->images[s][i];
1307 
1308          BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1309          PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1310          PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1311          PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1312          BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
1313          PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
1314 
1315          if (view->resource) {
1316             struct nv04_resource *res = nv04_resource(view->resource);
1317 
1318             if (res->base.target == PIPE_BUFFER) {
1319                if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1320                   nvc0_mark_image_range_valid(view);
1321             }
1322 
1323             nve4_set_surface_info(push, view, nvc0);
1324             BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
1325 
1326             if (nvc0->screen->base.class_3d >= GM107_3D_CLASS)
1327                gm107_validate_surfaces(nvc0, view, s, i);
1328          } else {
1329             for (j = 0; j < 16; j++)
1330                PUSH_DATA(push, 0);
1331          }
1332       }
1333    }
1334 }
1335 
1336 void
nvc0_validate_surfaces(struct nvc0_context * nvc0)1337 nvc0_validate_surfaces(struct nvc0_context *nvc0)
1338 {
1339    if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
1340       nve4_update_surface_bindings(nvc0);
1341    } else {
1342       nvc0_update_surface_bindings(nvc0);
1343    }
1344 }
1345 
1346 static uint64_t
nve4_create_image_handle(struct pipe_context * pipe,const struct pipe_image_view * view)1347 nve4_create_image_handle(struct pipe_context *pipe,
1348                          const struct pipe_image_view *view)
1349 {
1350    struct nvc0_context *nvc0 = nvc0_context(pipe);
1351    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1352    struct nvc0_screen *screen = nvc0->screen;
1353    int i = screen->img.next, s;
1354 
1355    while (screen->img.entries[i]) {
1356       i = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);
1357       if (i == screen->img.next)
1358          return 0;
1359    }
1360 
1361    screen->img.next = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);
1362    screen->img.entries[i] = calloc(1, sizeof(struct pipe_image_view));
1363    *screen->img.entries[i] = *view;
1364 
1365    for (s = 0; s < 6; s++) {
1366       BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1367       PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1368       PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1369       PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1370       BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
1371       PUSH_DATA (push, NVC0_CB_AUX_BINDLESS_INFO(i));
1372       nve4_set_surface_info(push, view, nvc0);
1373    }
1374 
1375    return 0x100000000ULL | i;
1376 }
1377 
1378 static void
nve4_delete_image_handle(struct pipe_context * pipe,uint64_t handle)1379 nve4_delete_image_handle(struct pipe_context *pipe, uint64_t handle)
1380 {
1381    struct nvc0_context *nvc0 = nvc0_context(pipe);
1382    struct nvc0_screen *screen = nvc0->screen;
1383    int i = handle & (NVE4_IMG_MAX_HANDLES - 1);
1384 
1385    free(screen->img.entries[i]);
1386    screen->img.entries[i] = NULL;
1387 }
1388 
1389 static void
nve4_make_image_handle_resident(struct pipe_context * pipe,uint64_t handle,unsigned access,bool resident)1390 nve4_make_image_handle_resident(struct pipe_context *pipe, uint64_t handle,
1391                                 unsigned access, bool resident)
1392 {
1393    struct nvc0_context *nvc0 = nvc0_context(pipe);
1394    struct nvc0_screen *screen = nvc0->screen;
1395 
1396    if (resident) {
1397       struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
1398       struct pipe_image_view *view =
1399          screen->img.entries[handle & (NVE4_IMG_MAX_HANDLES - 1)];
1400       assert(view);
1401 
1402       if (view->resource->target == PIPE_BUFFER &&
1403           access & PIPE_IMAGE_ACCESS_WRITE)
1404          nvc0_mark_image_range_valid(view);
1405       res->handle = handle;
1406       res->buf = nv04_resource(view->resource);
1407       res->flags = (access & 3) << 8;
1408       list_add(&res->list, &nvc0->img_head);
1409    } else {
1410       list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->img_head, list) {
1411          if (pos->handle == handle) {
1412             list_del(&pos->list);
1413             free(pos);
1414             break;
1415          }
1416       }
1417    }
1418 }
1419 
1420 static uint64_t
gm107_create_image_handle(struct pipe_context * pipe,const struct pipe_image_view * view)1421 gm107_create_image_handle(struct pipe_context *pipe,
1422                           const struct pipe_image_view *view)
1423 {
1424    /* GM107+ use TIC handles to reference images. As such, image handles are
1425     * just the TIC id.
1426     */
1427    struct nvc0_context *nvc0 = nvc0_context(pipe);
1428    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1429    struct pipe_sampler_view *sview =
1430       gm107_create_texture_view_from_image(pipe, view);
1431    struct nv50_tic_entry *tic = nv50_tic_entry(sview);
1432 
1433    if (tic == NULL)
1434       goto fail;
1435 
1436    tic->bindless = 1;
1437    tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
1438    if (tic->id < 0)
1439       goto fail;
1440 
1441    nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
1442                          NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
1443                          tic->tic);
1444 
1445    IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
1446 
1447    nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
1448 
1449    // Compute handle. This will include the TIC as well as some additional
1450    // info regarding the bound 3d surface layer, if applicable.
1451    uint64_t handle = 0x100000000ULL | tic->id;
1452    struct nv04_resource *res = nv04_resource(view->resource);
1453    if (res->base.target == PIPE_TEXTURE_3D) {
1454       handle |= 1 << 11;
1455       handle |= view->u.tex.first_layer << (11 + 16);
1456    }
1457    return handle;
1458 
1459 fail:
1460    FREE(tic);
1461    return 0;
1462 }
1463 
1464 static void
gm107_delete_image_handle(struct pipe_context * pipe,uint64_t handle)1465 gm107_delete_image_handle(struct pipe_context *pipe, uint64_t handle)
1466 {
1467    struct nvc0_context *nvc0 = nvc0_context(pipe);
1468    int tic = handle & NVE4_TIC_ENTRY_INVALID;
1469    struct nv50_tic_entry *entry = nvc0->screen->tic.entries[tic];
1470    struct pipe_sampler_view *view = &entry->pipe;
1471    assert(entry->bindless == 1);
1472    assert(!view_bound(nvc0, view));
1473    entry->bindless = 0;
1474    nvc0_screen_tic_unlock(nvc0->screen, entry);
1475    pipe_sampler_view_reference(&view, NULL);
1476 }
1477 
1478 static void
gm107_make_image_handle_resident(struct pipe_context * pipe,uint64_t handle,unsigned access,bool resident)1479 gm107_make_image_handle_resident(struct pipe_context *pipe, uint64_t handle,
1480                                  unsigned access, bool resident)
1481 {
1482    struct nvc0_context *nvc0 = nvc0_context(pipe);
1483 
1484    if (resident) {
1485       struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
1486       struct nv50_tic_entry *tic =
1487          nvc0->screen->tic.entries[handle & NVE4_TIC_ENTRY_INVALID];
1488       assert(tic);
1489       assert(tic->bindless);
1490 
1491       res->handle = handle;
1492       res->buf = nv04_resource(tic->pipe.texture);
1493       res->flags = (access & 3) << 8;
1494       if (res->buf->base.target == PIPE_BUFFER &&
1495           access & PIPE_IMAGE_ACCESS_WRITE)
1496          util_range_add(&res->buf->base, &res->buf->valid_buffer_range,
1497                         tic->pipe.u.buf.offset,
1498                         tic->pipe.u.buf.offset + tic->pipe.u.buf.size);
1499       list_add(&res->list, &nvc0->img_head);
1500    } else {
1501       list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->img_head, list) {
1502          if (pos->handle == handle) {
1503             list_del(&pos->list);
1504             free(pos);
1505             break;
1506          }
1507       }
1508    }
1509 }
1510 
1511 void
nvc0_init_bindless_functions(struct pipe_context * pipe)1512 nvc0_init_bindless_functions(struct pipe_context *pipe) {
1513    pipe->create_texture_handle = nve4_create_texture_handle;
1514    pipe->delete_texture_handle = nve4_delete_texture_handle;
1515    pipe->make_texture_handle_resident = nve4_make_texture_handle_resident;
1516 
1517    if (nvc0_context(pipe)->screen->base.class_3d < GM107_3D_CLASS) {
1518       pipe->create_image_handle = nve4_create_image_handle;
1519       pipe->delete_image_handle = nve4_delete_image_handle;
1520       pipe->make_image_handle_resident = nve4_make_image_handle_resident;
1521    } else {
1522       pipe->create_image_handle = gm107_create_image_handle;
1523       pipe->delete_image_handle = gm107_delete_image_handle;
1524       pipe->make_image_handle_resident = gm107_make_image_handle_resident;
1525    }
1526 }
1527 
1528 
1529 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
1530 {
1531    [PIPE_FORMAT_R32G32B32A32_FLOAT] = GK104_IMAGE_FORMAT_RGBA32_FLOAT,
1532    [PIPE_FORMAT_R32G32B32A32_SINT] = GK104_IMAGE_FORMAT_RGBA32_SINT,
1533    [PIPE_FORMAT_R32G32B32A32_UINT] = GK104_IMAGE_FORMAT_RGBA32_UINT,
1534    [PIPE_FORMAT_R16G16B16A16_FLOAT] = GK104_IMAGE_FORMAT_RGBA16_FLOAT,
1535    [PIPE_FORMAT_R16G16B16A16_UNORM] = GK104_IMAGE_FORMAT_RGBA16_UNORM,
1536    [PIPE_FORMAT_R16G16B16A16_SNORM] = GK104_IMAGE_FORMAT_RGBA16_SNORM,
1537    [PIPE_FORMAT_R16G16B16A16_SINT] = GK104_IMAGE_FORMAT_RGBA16_SINT,
1538    [PIPE_FORMAT_R16G16B16A16_UINT] = GK104_IMAGE_FORMAT_RGBA16_UINT,
1539    [PIPE_FORMAT_B8G8R8A8_UNORM] = GK104_IMAGE_FORMAT_BGRA8_UNORM,
1540    [PIPE_FORMAT_R8G8B8A8_UNORM] = GK104_IMAGE_FORMAT_RGBA8_UNORM,
1541    [PIPE_FORMAT_R8G8B8A8_SNORM] = GK104_IMAGE_FORMAT_RGBA8_SNORM,
1542    [PIPE_FORMAT_R8G8B8A8_SINT] = GK104_IMAGE_FORMAT_RGBA8_SINT,
1543    [PIPE_FORMAT_R8G8B8A8_UINT] = GK104_IMAGE_FORMAT_RGBA8_UINT,
1544    [PIPE_FORMAT_R11G11B10_FLOAT] = GK104_IMAGE_FORMAT_R11G11B10_FLOAT,
1545    [PIPE_FORMAT_R10G10B10A2_UNORM] = GK104_IMAGE_FORMAT_RGB10_A2_UNORM,
1546    [PIPE_FORMAT_R10G10B10A2_UINT] = GK104_IMAGE_FORMAT_RGB10_A2_UINT,
1547    [PIPE_FORMAT_R32G32_FLOAT] = GK104_IMAGE_FORMAT_RG32_FLOAT,
1548    [PIPE_FORMAT_R32G32_SINT] = GK104_IMAGE_FORMAT_RG32_SINT,
1549    [PIPE_FORMAT_R32G32_UINT] = GK104_IMAGE_FORMAT_RG32_UINT,
1550    [PIPE_FORMAT_R16G16_FLOAT] = GK104_IMAGE_FORMAT_RG16_FLOAT,
1551    [PIPE_FORMAT_R16G16_UNORM] = GK104_IMAGE_FORMAT_RG16_UNORM,
1552    [PIPE_FORMAT_R16G16_SNORM] = GK104_IMAGE_FORMAT_RG16_SNORM,
1553    [PIPE_FORMAT_R16G16_SINT] = GK104_IMAGE_FORMAT_RG16_SINT,
1554    [PIPE_FORMAT_R16G16_UINT] = GK104_IMAGE_FORMAT_RG16_UINT,
1555    [PIPE_FORMAT_R8G8_UNORM] = GK104_IMAGE_FORMAT_RG8_UNORM,
1556    [PIPE_FORMAT_R8G8_SNORM] = GK104_IMAGE_FORMAT_RG8_SNORM,
1557    [PIPE_FORMAT_R8G8_SINT] = GK104_IMAGE_FORMAT_RG8_SINT,
1558    [PIPE_FORMAT_R8G8_UINT] = GK104_IMAGE_FORMAT_RG8_UINT,
1559    [PIPE_FORMAT_R32_FLOAT] = GK104_IMAGE_FORMAT_R32_FLOAT,
1560    [PIPE_FORMAT_R32_SINT] = GK104_IMAGE_FORMAT_R32_SINT,
1561    [PIPE_FORMAT_R32_UINT] = GK104_IMAGE_FORMAT_R32_UINT,
1562    [PIPE_FORMAT_R16_FLOAT] = GK104_IMAGE_FORMAT_R16_FLOAT,
1563    [PIPE_FORMAT_R16_UNORM] = GK104_IMAGE_FORMAT_R16_UNORM,
1564    [PIPE_FORMAT_R16_SNORM] = GK104_IMAGE_FORMAT_R16_SNORM,
1565    [PIPE_FORMAT_R16_SINT] = GK104_IMAGE_FORMAT_R16_SINT,
1566    [PIPE_FORMAT_R16_UINT] = GK104_IMAGE_FORMAT_R16_UINT,
1567    [PIPE_FORMAT_R8_UNORM] = GK104_IMAGE_FORMAT_R8_UNORM,
1568    [PIPE_FORMAT_R8_SNORM] = GK104_IMAGE_FORMAT_R8_SNORM,
1569    [PIPE_FORMAT_R8_SINT] = GK104_IMAGE_FORMAT_R8_SINT,
1570    [PIPE_FORMAT_R8_UINT] = GK104_IMAGE_FORMAT_R8_UINT,
1571 };
1572 
1573 /* Auxiliary format description values for surface instructions.
1574  * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
1575  */
1576 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
1577 {
1578    [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
1579    [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
1580    [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,
1581 
1582    [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
1583    [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
1584    [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
1585    [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
1586    [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,
1587 
1588    [PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
1589    [PIPE_FORMAT_R32G32_SINT] = 0x3433,
1590    [PIPE_FORMAT_R32G32_UINT] = 0x3433,
1591 
1592    [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
1593    [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24,
1594    [PIPE_FORMAT_B8G8R8A8_UNORM] = 0x2a24,
1595    [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
1596    [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
1597    [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
1598    [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
1599    [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,
1600 
1601    [PIPE_FORMAT_R16G16_UNORM] = 0x2524,
1602    [PIPE_FORMAT_R16G16_SNORM] = 0x2524,
1603    [PIPE_FORMAT_R16G16_SINT] = 0x2524,
1604    [PIPE_FORMAT_R16G16_UINT] = 0x2524,
1605    [PIPE_FORMAT_R16G16_FLOAT] = 0x2524,
1606 
1607    [PIPE_FORMAT_R32_SINT] = 0x2024,
1608    [PIPE_FORMAT_R32_UINT] = 0x2024,
1609    [PIPE_FORMAT_R32_FLOAT] = 0x2024,
1610 
1611    [PIPE_FORMAT_R8G8_UNORM] = 0x1615,
1612    [PIPE_FORMAT_R8G8_SNORM] = 0x1615,
1613    [PIPE_FORMAT_R8G8_SINT] = 0x1615,
1614    [PIPE_FORMAT_R8G8_UINT] = 0x1615,
1615 
1616    [PIPE_FORMAT_R16_UNORM] = 0x1115,
1617    [PIPE_FORMAT_R16_SNORM] = 0x1115,
1618    [PIPE_FORMAT_R16_SINT] = 0x1115,
1619    [PIPE_FORMAT_R16_UINT] = 0x1115,
1620    [PIPE_FORMAT_R16_FLOAT] = 0x1115,
1621 
1622    [PIPE_FORMAT_R8_UNORM] = 0x0206,
1623    [PIPE_FORMAT_R8_SNORM] = 0x0206,
1624    [PIPE_FORMAT_R8_SINT] = 0x0206,
1625    [PIPE_FORMAT_R8_UINT] = 0x0206
1626 };
1627 
1628 /* NOTE: These are hardcoded offsets for the shader library.
1629  * TODO: Automate them.
1630  */
1631 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
1632 {
1633    [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
1634    [PIPE_FORMAT_R32G32B32A32_SINT]  = 0x218,
1635    [PIPE_FORMAT_R32G32B32A32_UINT]  = 0x218,
1636    [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
1637    [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
1638    [PIPE_FORMAT_R16G16B16A16_SINT]  = 0x330,
1639    [PIPE_FORMAT_R16G16B16A16_UINT]  = 0x388,
1640    [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
1641    [PIPE_FORMAT_R32G32_FLOAT]       = 0x428,
1642    [PIPE_FORMAT_R32G32_SINT]        = 0x468,
1643    [PIPE_FORMAT_R32G32_UINT]        = 0x468,
1644    [PIPE_FORMAT_R10G10B10A2_UNORM]  = 0x4a8,
1645    [PIPE_FORMAT_R10G10B10A2_UINT]   = 0x530,
1646    [PIPE_FORMAT_R8G8B8A8_UNORM]     = 0x588,
1647    [PIPE_FORMAT_R8G8B8A8_SNORM]     = 0x5f8,
1648    [PIPE_FORMAT_R8G8B8A8_SINT]      = 0x670,
1649    [PIPE_FORMAT_R8G8B8A8_UINT]      = 0x6c8,
1650    [PIPE_FORMAT_B5G6R5_UNORM]       = 0x718,
1651    [PIPE_FORMAT_B5G5R5X1_UNORM]     = 0x7a0,
1652    [PIPE_FORMAT_R16G16_UNORM]       = 0x828,
1653    [PIPE_FORMAT_R16G16_SNORM]       = 0x890,
1654    [PIPE_FORMAT_R16G16_SINT]        = 0x8f0,
1655    [PIPE_FORMAT_R16G16_UINT]        = 0x948,
1656    [PIPE_FORMAT_R16G16_FLOAT]       = 0x998,
1657    [PIPE_FORMAT_R32_FLOAT]          = 0x9e8,
1658    [PIPE_FORMAT_R32_SINT]           = 0xa30,
1659    [PIPE_FORMAT_R32_UINT]           = 0xa30,
1660    [PIPE_FORMAT_R8G8_UNORM]         = 0xa78,
1661    [PIPE_FORMAT_R8G8_SNORM]         = 0xae0,
1662    [PIPE_FORMAT_R8G8_UINT]          = 0xb48,
1663    [PIPE_FORMAT_R8G8_SINT]          = 0xb98,
1664    [PIPE_FORMAT_R16_UNORM]          = 0xbe8,
1665    [PIPE_FORMAT_R16_SNORM]          = 0xc48,
1666    [PIPE_FORMAT_R16_SINT]           = 0xca0,
1667    [PIPE_FORMAT_R16_UINT]           = 0xce8,
1668    [PIPE_FORMAT_R16_FLOAT]          = 0xd30,
1669    [PIPE_FORMAT_R8_UNORM]           = 0xd88,
1670    [PIPE_FORMAT_R8_SNORM]           = 0xde0,
1671    [PIPE_FORMAT_R8_SINT]            = 0xe38,
1672    [PIPE_FORMAT_R8_UINT]            = 0xe88,
1673    [PIPE_FORMAT_R11G11B10_FLOAT]    = 0xed0
1674 };
1675